In [1]:
# this is a classic usage for google colab

import sys
import os.path as osp
import os
import gc
from google.colab import drive
drive.mount('/content/drive')
ROOT = osp.join('/content', 'drive', 'My Drive', 'OpportunityUCIDataset')
os.chdir(ROOT)

Mounted at /content/drive


In [None]:
# this is used when running the code on the pc

import sys
import os
import gc

ROOT = os.path.join(os.getcwd(), 'OpportunityUCIDataset')
os.chdir(ROOT)
print(ROOT)

In [2]:
import torch
import pandas as pd
import numpy as np
import glob

if torch.cuda.is_available():
    device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [3]:
from torch.utils.data import Dataset, DataLoader

class OpportunityDatasetParsed(Dataset):
    def __init__(self, filenames):
        self.filenames = filenames
        
    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, index):
        filename = self.filenames[index]
        #print(filename)
        df = pd.read_csv(filename, header = 0)
        data = torch.FloatTensor(
            df.iloc[:, 0:15].values.astype('float'))
        label = torch.FloatTensor(
            df.iloc[99, 15:18].values.astype('float'))

        # data_row = self.data[index]
        # label_row = self.label[index]

        # # assign the data
        # self.data = torch.FloatTensor(
        #     df.iloc[:, 0:15].values.astype('float'))
        
        # # assign the label. Only one label for now
        # self.label = torch.FloatTensor(
        #     df.iloc[0, 15:18].values.astype('float'))

        return data, label

In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch

In [8]:
num_epochs = 20
batch_size = 32
learning_rate = 0.01

In [34]:
class Net(nn.Module):
    def __init__(self, batch_size):
        super(Net, self).__init__()
        self.conv1 = nn.Conv1d(1, 3, 3)
        self.conv2 = nn.Conv1d(3, 6, 3)
        self.conv3 = nn.Conv1d(6, 12, 3)
        self.conv4 = nn.Conv1d(12, 24, 3)
        self.pool = nn.MaxPool1d(2)
        self.lstm1 = nn.LSTM(168, 64)
        self.lstm2 = nn.LSTM(96, 96)
        self.lstm3 = nn.LSTM(96, 96)
        self.fc1 = nn.Linear(64, 16)
        self.fc2 = nn.Linear(16, 3)
        self.fc3 = nn.Linear(96, 64)
        
        self.lstmtemp = nn.LSTM(15, 8)
        self.fctemp = nn.Linear(8*100, 3)  # the output is only 3 anw lul
        self.batch_size = batch_size


#     def forward(self, x):
#         x = F.relu(self.conv1(x), inplace=False)
#         #print(x.shape)
#         x = F.relu(self.conv2(x), inplace=False)
#         #print(x.shape)
#         x = F.relu(self.conv3(x), inplace=False)
#         #print(x.shape)
#         x = F.relu(self.conv4(x), inplace=False)
#         #print(x.shape)
#         x = x.view(-1, 1, 24 * 7)
#         #print(x.shape)

#         lstm_out1, h1 = self.lstm1(x)
#         #print(lstm_out1.shape)
#         x_out = self.fc1(lstm_out1[:, -1, :])
#         #print(x_out[:, -1, :].shape)
#         out1 = self.fc2(x_out)
#         #print(lstm_out1.shape)

#         # lstm_out2, h2 = self.lstm2(x - lstm_out1)
#         # out2 = self.fc2(lstm_out2[:, -1, :])

#         # lstm_out3, h3 = self.lstm3(x - lstm_out1 - lstm_out2)
#         # out3 = self.fc3(lstm_out3[:, -1, :])

#         return out1#, out2, out3

    def forward(self, x, h):
        #print(x.shape)
        cur_len = x.shape[0]

        x, _ = self.lstmtemp(x, h)
        #print(x.shape)
        x = x.contiguous().view(cur_len, -1)
        #print(x.shape)
        out = self.fctemp(x)
        #print(out.shape)
        return out

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(1, 100, 8).zero_().to(device),
                      weight.new(1, 100, 8).zero_().to(device))
        return hidden
            
loss_function = nn.CrossEntropyLoss()

net = Net(batch_size).to(device)
print(net)

optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)

Net(
  (conv1): Conv1d(1, 3, kernel_size=(3,), stride=(1,))
  (conv2): Conv1d(3, 6, kernel_size=(3,), stride=(1,))
  (conv3): Conv1d(6, 12, kernel_size=(3,), stride=(1,))
  (conv4): Conv1d(12, 24, kernel_size=(3,), stride=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (lstm1): LSTM(168, 64)
  (lstm2): LSTM(96, 96)
  (lstm3): LSTM(96, 96)
  (fc1): Linear(in_features=64, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=3, bias=True)
  (fc3): Linear(in_features=96, out_features=64, bias=True)
  (lstmtemp): LSTM(15, 8)
  (fctemp): Linear(in_features=800, out_features=3, bias=True)
)


In [None]:
# to refresh

drive.flush_and_unmount()
drive.mount('/content/drive')
ROOT = osp.join('/content', 'drive', 'My Drive', 'OpportunityUCIDataset')
os.chdir(ROOT)

Mounted at /content/drive


In [22]:
label_1.shape

torch.Size([32])

In [35]:
data_train_files = glob.glob("dataset_parsed/train/*.csv")
data_test_files = glob.glob("dataset_parsed/test/*.csv")

net.train()
h = net.init_hidden(batch_size)

for epoch in range(num_epochs):
    print("Epoch " + str(epoch + 1))

    data_train = OpportunityDatasetParsed(data_train_files)

    dataloader_train = DataLoader(data_train, batch_size=batch_size,
                    shuffle=False)

    for data, labels in dataloader_train:
        optimizer.zero_grad()
        data, labels = data.to(device), labels.to(device)

        data = torch.reshape(data, (len(labels), -1, 15))
        #print(data.shape)

        label_1 = torch.reshape(labels[:, 0:1].long(), (-1,))
        
        output_1 = net(data, h)
        #print(output_1.shape)

        loss = loss_function(output_1, label_1)
        loss.backward()
        optimizer.step()

        print("Loss: {}".format(loss.item()))

    # Calculate Accuracy

Epoch 1
torch.Size([32, 100, 15])
Loss: 1.0831167697906494
torch.Size([32, 100, 15])
Loss: 1.0830488204956055
torch.Size([32, 100, 15])
Loss: 1.0439990758895874
torch.Size([32, 100, 15])
Loss: 1.0184584856033325
torch.Size([32, 100, 15])
Loss: 0.9628299474716187
torch.Size([32, 100, 15])
Loss: 1.119839072227478
torch.Size([32, 100, 15])
Loss: 0.9517092108726501
torch.Size([32, 100, 15])
Loss: 0.869767963886261
torch.Size([32, 100, 15])
Loss: 0.8347958922386169
torch.Size([32, 100, 15])
Loss: 0.8330819010734558
torch.Size([32, 100, 15])
Loss: 0.8465489149093628
torch.Size([32, 100, 15])
Loss: 0.9827572703361511
torch.Size([32, 100, 15])
Loss: 0.8389487862586975
torch.Size([32, 100, 15])
Loss: 0.7232874631881714
torch.Size([32, 100, 15])
Loss: 0.7909082770347595
torch.Size([26, 100, 15])
Loss: 0.7836095094680786
Epoch 2
torch.Size([32, 100, 15])
Loss: 0.8731740713119507
torch.Size([32, 100, 15])
Loss: 0.9919978976249695
torch.Size([32, 100, 15])
Loss: 0.8388593196868896
torch.Size([32, 1

In [None]:
output_1

tensor([ 0.1042, -0.1701,  0.0515], device='cuda:0', grad_fn=<AddBackward0>)

In [38]:
h = net.init_hidden(batch_size)
with torch.no_grad():
    net.eval()
    data_test = OpportunityDatasetParsed(data_test_files)

    dataloader_test = DataLoader(data_test, batch_size=batch_size,
                    shuffle=False)

    correct_1 = 0

    total_1 = 0

    # Iterate through test dataset
    for data, labels in dataloader_test:

        data, labels = data.to(device), labels.to(device)
        data = torch.reshape(data, (len(labels), -1, 15))
        #print(data.shape)

        label_1 = torch.reshape(labels[:, 0:1].long(), (-1,))



        output_1 = net(data, h)
        #print(output_1)

        _, predicted_1 = torch.max(output_1, 1)
        print(predicted_1)


        total_1 += label_1.size(0)


        correct_1 += (predicted_1 == label_1).sum()


    accuracy_1 = 100 * torch.true_divide(correct_1, total_1)

    print('Accuracy_1: {}'.format(accuracy_1))

torch.Size([32, 100, 15])
tensor([2, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
        1, 1, 1, 1, 0, 1, 0, 1], device='cuda:0')
torch.Size([32, 100, 15])
tensor([0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
        1, 1, 1, 0, 0, 1, 0, 0], device='cuda:0')
torch.Size([32, 100, 15])
tensor([1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
        0, 0, 1, 0, 1, 1, 0, 1], device='cuda:0')
torch.Size([32, 100, 15])
tensor([0, 1, 1, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 2, 2,
        2, 2, 2, 2, 1, 0, 1, 1], device='cuda:0')
torch.Size([32, 100, 15])
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
        1, 0, 0, 1, 1, 0, 1, 1], device='cuda:0')
torch.Size([32, 100, 15])
tensor([1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 1,
        0, 1, 1, 1, 1, 1, 0, 0], device='cuda:0')
torch.Size([9, 100, 15])
tensor([2, 2, 1, 1, 1, 0, 0, 1, 0], dev

In [None]:
label_1

tensor([ 0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [None]:
# clean the data
data_train_files = glob.glob("dataset/train/*.dat")
data_test_files = glob.glob("dataset/test/*.dat")

for filename in data_train_files:
#for filename in data_test_files:
    print(filename)
    data = pd.read_table(filename, sep = "\s+", header = None)

    # clean the data
    df = data[col_all].copy()
    del data
    gc.collect()
    df.columns = col_all_name

    df.dropna(inplace=True)
    df = df[(df["Locomotion"] != 0) &
            (df["LL_Right_Arm"] != 0) &
            (df["ML_Both_Arms"] != 0)]

    # remap the output as 0-indexing to make learning possible
    df["Locomotion"] = df["Locomotion"].map({1: 0, 2: 1, 4: 2, 5: 3})
    df["LL_Right_Arm"] = df["LL_Right_Arm"].map(
        {401: 0, 402: 1, 403: 2, 404: 3, 405: 4, 406: 5, 407: 6, 408: 7,
         409: 8, 410: 9, 411: 10, 412: 11, 413: 12})
    df["ML_Both_Arms"] = df["ML_Both_Arms"].map(
        {406516: 0, 406517: 1, 404516: 2, 404517: 3, 406520: 4, 404520: 5,
         406505: 6, 404505: 7, 406519: 8, 404519: 9, 406511: 10, 404511: 11,
         406508: 12, 404508: 13, 408512: 14, 407521: 15, 405506: 16})
    
    df[col_feature_name] = df[col_feature_name].apply(normalize, axis=1)
    
    file = filename.split("\\")[1][:-4]
    df.to_csv("dataset_cleaned/train_temp/" + file + ".csv")
    #df.to_csv("dataset_cleaned/test/" + file + ".csv", index=False)

dataset/train\S2-ADL2.dat


  


dataset/train\S2-ADL3.dat
dataset/train\S2-ADL4.dat
dataset/train\S3-ADL1.dat
dataset/train\S3-ADL2.dat
dataset/train\S3-ADL3.dat
dataset/train\S3-ADL4.dat
dataset/train\S4-ADL1.dat
dataset/train\S4-ADL2.dat
