In [1]:
import sys
import os
import numpy as np
import torch
from torch import device, nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from os import mkdir
import torch.optim as optim
import scipy
import matplotlib
import plotly.express as px
import pandas as pdfr
from sklearn.metrics import f1_score
import pandas as pd
from sklearn.utils import resample

KeyboardInterrupt: ignored

Only run the following cell if using google colab:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Specify directory of the folder 'Required_Functions:'

In [None]:
dir_IHDA               = '/content/drive/MyDrive/IHDA/'
dir_required_functions = dir_IHDA + 'Required_Functions/'

In [None]:
import sys
sys.path.append(dir_required_functions)
sys.path.append(dir_required_functions + 'VAE')
sys.path.append(dir_required_functions + 'FC')
sys.path.append(dir_required_functions + 'Potential')
sys.path.append(dir_required_functions + 'Load_Process_Data')
sys.path.append(dir_required_functions + 'Hyperparametersearch')

import VAE
import basic_functions
import load_process_data as lpd
import train_VAE
import losses
import eval_VAE
import FC
import eval_FC
import potential
import eval_potential
import retrain_FC
import hyperparameter_search as hps

In this notebook the baseline classifier is trained.

#**Load training, validation and testing data:**

Following the different datasets are loaded. Here a short definition of the datasets:: 

*   **Training Dataset:** The sample of data used to fit the model.
*  **Validation Dataset:** The sample of data used to provide an unbiased evaluation of a model fit on the training dataset while tuning model hyperparameters. The evaluation becomes more biased as skill on the validation dataset is incorporated into the model configuration.
*   **Test Dataset:** The sample of data used to provide an unbiased evaluation of a final model fit on the training dataset.


For training and testing the data from the SHL challenge 2018 is used (http://www.shl-dataset.org/activity-recognition-challenge/). The data is recored by one user and the data was worn at the body position hips.



In [2]:
dir_data_np = dir_IHDA + 'Data/' # directory of non processed data
dir_data_pp = dir_IHDA + 'Baseline_classifier/Classifier/Data/' # directoy of preprocessed datase
batch_size  = 500 # batch_size for dataloader

Specify whether you want to sample a new dataset or whether you wou would like to retrain your classifier on an already splitted and stored dataset. In the you run this the first time, new_dataset_state needs to be set to one

In [3]:
new_dataset_state = 1   # If new_dataset_state == 1, a new dataset will be generated.
frac_validate     = 0.1 # Fraction of validation data used for training

Load training, validation and testing data:

In [4]:
#@title

if new_dataset_state == 1:
  x, y, x_test, y_test = lpd.load_and_normalize_data(dir_data_np)
  # Split data in train and validation data:
  x_train, y_train, x_validate, y_validate = lpd.train_validate_split(x, y, frac_validate)
  
  # Upsample train and validation data:
  x_train_upsample, y_train_upsample       = lpd.upsample(x_train,y_train)
  x_validate_upsample, y_validate_upsample = lpd.upsample(x_validate,y_validate)

  torch.save(x_validate,dir_data_pp +'x_validate.pth')
  torch.save(y_validate,dir_data_pp +'y_validate.pth')
  torch.save(x_train,dir_data_pp +'x_train.pth')
  torch.save(y_train,dir_data_pp +'y_train.pth')
  torch.save(x_test,dir_data_pp +'x_test.pth')
  torch.save(y_test,dir_data_pp +'y_test.pth')

  torch.save(x_validate_upsample,dir_data_pp +'x_validate_upsample.pth')
  torch.save(y_validate_upsample,dir_data_pp +'y_validate_upsample.pth')
  torch.save(x_train_upsample,dir_data_pp +'x_train_upsample.pth')
  torch.save(y_train_upsample,dir_data_pp +'y_train_upsample.pth')
else:
  x_test               = torch.load(dir_data_pp+'x_test.pth')
  y_test               = torch.load(dir_data_pp+'y_test.pth')
  x_train              = torch.load(dir_data_pp+'x_train.pth')
  y_train              = torch.load(dir_data_pp+'y_train.pth')
  x_validate           = torch.load(dir_data_pp+'x_validate.pth')
  y_validate           = torch.load(dir_data_pp+'y_validate.pth')
  x_train_upsample     = torch.load(dir_data_pp+'x_train_upsample.pth')
  y_train_upsample     = torch.load(dir_data_pp+'y_train_upsample.pth')
  x_validate_upsample  = torch.load(dir_data_pp+'x_validate_upsample.pth')
  y_validate_upsample  = torch.load(dir_data_pp+'y_validate_upsample.pth')

Specify whether you would like to train on upsample dataset or not. Upsampled dataset contains of every class an equal amount of samples, which is achieved by upsampling.

In [5]:
# If upsample_state == 1, the baseline classifier will be trained on an upsampled dataset
upsample_state = 1

if upsample_state == 1:
  x_train = x_train_upsample
  y_train = y_train_upsample
  
  x_validate = x_validate_upsample
  y_validate = y_validate_upsample

Uses loaded datasets to generate dataloaders:

In [6]:
trainloader            = lpd.generate_dataloader(x_train,y_train, batch_size, shuffle = True)
validationloader       = lpd.generate_dataloader(x_validate, y_validate, batch_size, shuffle = True)
testloader             = lpd.generate_dataloader(x_test, y_test, batch_size, shuffle = True)
trainloader_no_shuffle = lpd.generate_dataloader(x_train,y_train, batch_size, shuffle = False)

#**Train classifier :**

In [7]:
retrain_state = 0
epochs        = 120 # Number of training epochs
device        =  'cuda'

dir_baseline_classifier = dir_IHDA + 'Baseline_classifier/Classifier/'
fc_base_dropout  = 0.25

# Definition of loss function:
criterion  = nn.CrossEntropyLoss()  

Initialize baseline classifier:

In [8]:
fc_base  = FC.FC(fc_base_dropout)

Define train and testloader:

In [9]:
optimizer  = optim.Adam(fc_base.parameters(), lr =  0.00001,betas=(0.9, 0.999) ) 
#optimizer  = optim.SGD(fc.parameters(),lr = 0.05)

if retrain_state == 1:
  retrain_FC.main_retrain_classifier(trainloader,validationloader,fc_base, device, criterion, optimizer,epochs,dir_baseline_classifier)

Print some stats about trained baseline classifier:

In [10]:
fc_best  = FC.FC(fc_base_dropout)
dir_best = dir_baseline_classifier + 'best_model.pth'
fc_best  = basic_functions.initialize_model(fc_best,dir_best)

train_loss, train_acc, train_f1 = eval_FC.test_classifier(trainloader,fc_best, device, criterion)
valid_loss, valid_acc, valid_f1 = eval_FC.test_classifier(validationloader,fc_best, device, criterion)
test_loss, test_acc, test_f1    = eval_FC.test_classifier(testloader,fc_best, device, criterion)

Print train, validation and test loss, accuracy and f1 score:

In [11]:
d = {'Loss': [train_loss, valid_loss, test_loss],'Acc': [train_acc, valid_acc, test_acc],'F1 score': [train_f1, valid_f1, test_f1]}
df = pd.DataFrame(data=d, index =['Train', 'Valid', 'Test'] )
df

Unnamed: 0,Loss,Acc,F1 score
Train,1.42906,0.847523,0.847073
Valid,1.437958,0.836898,0.836389
Test,1.51142,0.760793,0.760542


Print confussion matrix:

In [12]:
y_pred = eval_FC.classifier_predict(fc_best,x_test,device)
confusion_matrix = eval_FC.calc_confusion_matrix(y_pred,y_test)
confusion_matrix 

Unnamed: 0,Still,Walking,Run,Bike,Car,Bus,Train,Subway
Still,10493,157,0,97,210,234,165,220
Walking,263,8197,14,79,1,28,2,143
Run,3,44,3976,32,1,0,0,1
Bike,104,499,125,5292,27,82,3,16
Car,607,7,1,45,9457,4677,214,313
Bus,416,166,1,167,1277,8162,243,346
Train,631,103,0,63,225,484,3669,2603
Subway,266,25,1,9,31,41,844,2773


In [13]:
confusion_matrix_numpy = confusion_matrix.to_numpy()

In [14]:
n_classes = 8
index     = ['Still','Walking','Run','Bike','Car','Bus','Train','Subway']
eval_FC.confusion_matrix_to_per(confusion_matrix,n_classes,index)

Unnamed: 0,Still,Walking,Run,Bike,Car,Bus,Train,Subway
Still,90.64,1.36,0.0,0.84,1.81,2.02,1.43,1.9
Walking,3.01,93.93,0.16,0.91,0.01,0.32,0.02,1.64
Run,0.07,1.08,98.0,0.79,0.02,0.0,0.0,0.02
Bike,1.69,8.12,2.03,86.08,0.44,1.33,0.05,0.26
Car,3.96,0.05,0.01,0.29,61.73,30.53,1.4,2.04
Bus,3.86,1.54,0.01,1.55,11.85,75.73,2.25,3.21
Train,8.11,1.32,0.0,0.81,2.89,6.22,47.17,33.47
Subway,6.67,0.63,0.03,0.23,0.78,1.03,21.15,69.5
