In [2]:
#import all the needed functions
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
import glob
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

In [3]:
class ArioneoDataset(Dataset):
    
    def file_extraction(self, path):
        directories = glob.glob(path + '\*')
        tfiles = []
        files = []
        for d in directories:
            tfiles.extend(glob.glob(d + '\*'))

        for f in tfiles:
            if(target_file in f):
                files.append(f)
        return files

    def file_to_panda(self, files):
        data = []
        for f in files:
            x = pd.read_json(f)
            name = os.path.splitext(f)[0]
            name += '_MIX.json'
            x.name = name
            data.append(x)
        return data
    
    def select_features(self, d):
        y = d.drop(undesired_features, axis=1)
        y.name = d.name
        return y
    
    def longest_data_finder(self, data):
        maxi = 0
        for d in data:
            if(d.shape[0]>maxi):
                maxi = d.shape[0]
        return maxi
    
    def adapt_size_with_mean(self, d, maxi):
        name = d.name
        size = ((maxi - d.shape[0]), d.shape[1])
        df = pd.DataFrame(np.zeros(size), columns=d.columns)
        df=df.mask(df==0).fillna(d.mean())
        d = d.append(df, ignore_index=True)
        d.name = name
        return d
    
    def split_test_train(self, ratio=0.2, shuffle=True):
        
        if shuffle:
            random_index = torch.randperm(self.data.shape[0])
            self.data = self.data[random_index]
            self.targets = self.targets[random_index]
            
        test_size = int(self.data.shape[0]*ratio)
        train_size = int(self.data.shape[0] - test_size)
        
        self.data, test_data = torch.split(self.data, [train_size, test_size])
        self.targets, test_targets = torch.split(self.targets, [train_size, test_size])
        
        self.data = torch.tensor(self.data)
        self.targets = torch.tensor(self.targets)
        
        return ArioneoDataset(data=test_data, targets=test_targets)

    def __getitem__(self, index):
        
        img, target = self.data[index], self.targets[index]        
        return img, target
    
    def __len__(self):
        return len(self.data)


    def __init__(self, path=None, target_file=None, undesired_features=None, label=None, data=None, 
                 targets=None, transform=None, target_transform=None):
        
        if path is not None:
            files = self.file_extraction(path)
            data = self.file_to_panda(files)
            maxi = self.longest_data_finder(data)
            XdataFrames = []
            ydataFrames = []

            for d in data:
                d = self.adapt_size_with_mean(d, maxi)
                f = d.loc[:, label]
                d = self.select_features(d)
                XdataFrames.append(d)
                ydataFrames.append(f)
                
            self.data = torch.empty(len(XdataFrames), XdataFrames[0].shape[0], XdataFrames[0].shape[1])
            self.targets = torch.empty(len(ydataFrames), ydataFrames[0].shape[0])

            for i in range(len(XdataFrames)):
                self.data[i] = torch.tensor(XdataFrames[i].to_numpy())
                self.targets[i] = torch.tensor(ydataFrames[i].to_numpy())

            self.data = torch.tensor(self.data)
            self.targets = torch.tensor(self.targets)
        else:
            self.data = torch.tensor(data)
            self.targets = torch.tensor(targets)

In [4]:
path = r'C:\Users\Ithan Velarde\Desktop\Stage 2020\Horse Data\Small_dataset'
target_file = 'moments.json'
undesired_features = ['timestamp', 'latitude', 'longitude', 'run', 'propulsion', 'alert', 'heart_rate_variability', 'lo_cardio', 'altitude', 'bpm']
label = 'bpm'
train_dataset = ArioneoDataset(path=path, target_file=target_file, undesired_features=undesired_features, label=label)
test_dataset = train_dataset.split_test_train()

In [5]:
print(train_dataset.data.shape)
print(train_dataset.targets.shape)

torch.Size([252, 6223, 9])
torch.Size([252, 6223])


In [6]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size = 5, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size = 5, shuffle=True)

In [26]:
#Convolutional NN classifier with it's hyperparameters
class NeuralNet(nn.Module):
    def __init__(self, output_size):
        super(NeuralNet, self).__init__()
        #convolutional layers
        self.layer1 = nn.Sequential(
                                    nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
                                    nn.ReLU(),
                                    nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
                                    nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
                                    nn.ReLU(),
                                    nn.MaxPool2d(kernel_size=5, stride=2))
        self.drop_out = nn.Dropout()
        #linear layers
        self.layer3 = nn.Linear(16000, 10000)
        self.layer4 = nn.Linear(10000,output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        output = self.layer1(x)
        output = self.layer2(output)
        output = output.reshape(output.size(0), -1)
        output = self.drop_out(output)
        output = self.layer3(output)
        output = self.relu(output)
        output = self.layer4(output)
        return output
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [27]:
num_epochs = 5

learning_rate = 0.001

model = NeuralNet(train_dataset.targets.shape[1]) #train_dataset.data.shape[1]*train_dataset.data.shape[2],

lossFunction = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [28]:
#fitting process
total_step = train_dataset.data.shape[0]
lo = []
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = images.unsqueeze(1)
        print(images.shape)
        out = model(images)
        loss = lossFunction(out,labels)
        print(i)
        lo.append(loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 50 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            plt.plot(list(range(len(lo))), lo)
            plt.show()                

torch.Size([5, 1, 6223, 9])


RuntimeError: Given input size: (64x3111x4). Calculated output size: (64x1554x0). Output size is too small at c:\programdata\miniconda3\conda-bld\pytorch_1532509700152\work\aten\src\thnn\generic/SpatialDilatedMaxPooling.c:67