In [1]:
import torch
import torch.nn.functional as F
import pandas as pd
import numpy as np
import glob
from enum import Enum
from torch.utils.data import TensorDataset, DataLoader
import math
import pickle
import datetime
import os 
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
BATCH_SIZE = 224
#TODO: build a validation/test split

In [2]:
#Check if GPU is available and select if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.is_available())
device = torch.device("cuda")

cuda
True


In [3]:
#importing user data

subjects = []
for i in range (1,31):
    interimPysch = pd.read_csv('.\\CASE_full\\data\\non-interpolated\\physiological\\sub_' + str(i) + '.csv')
    interimPysch = interimPysch.drop(columns=['video'])
    interimPysch['daqtime'] = interimPysch['daqtime'].astype('int32')
    interimAnote = pd.read_csv('.\\CASE_full\\data\\non-interpolated\\annotations\\sub_' + str(i) + '.csv')
    interimAnote = interimAnote.drop(columns=['video'])
    interimAnote['jstime'] = interimAnote['jstime'].astype('int32')
    final = interimPysch.merge(interimAnote, left_on='daqtime',right_on='jstime', how='outer')
    #jstime dropped as it is redundant
    final = final.drop(columns=['jstime','daqtime'])
    final = final.ffill()
    final = final.bfill()
    final.to_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
    subjects.append(final)
subjects[0]



In [None]:

def createStressLevel(Dataframe):
    vals = []
    valence = Dataframe.loc[:,'valence']
    arousal = Dataframe.loc[:,'arousal']
    for i in range(0,len(valence)):
        vals.append(round((arousal[i]-0.5)*math.cos((math.pi/18)*(valence[i] - 0.5))))
    return vals


In [None]:
#loading datasets and converting them into tensors
subjects = []
global testSubjects
global validationSubjects

for i in range(1,31):
   if i < 29:
      temp = pd.read_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
      temp['StressLevel'] = createStressLevel(temp)
      subjects.append(temp)
   elif i==29:
      temp = pd.read_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
      temp['StressLevel'] = createStressLevel(temp)
      testSubjects = temp

   else:
      
      temp = pd.read_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
      temp['StressLevel'] = createStressLevel(temp)
      validationSubjects = temp


   

In [None]:

subjects[1]

Unnamed: 0.1,Unnamed: 0,ecg,bvp,gsr,rsp,skt,emg_zygo,emg_coru,emg_trap,valence,arousal,StressLevel
0,0.0,0.856,36.545,24.949,27.649,31.675,8.438,7.781,-52.548,5.0,5.0,3
1,1.0,0.869,36.671,24.953,27.668,31.689,8.602,7.863,-52.466,5.0,5.0,3
2,2.0,0.859,36.661,24.945,27.649,31.679,8.561,7.781,-52.507,5.0,5.0,3
3,3.0,0.866,36.410,24.945,27.668,31.682,8.684,7.945,-52.548,5.0,5.0,3
4,4.0,0.866,36.526,24.953,27.649,31.686,8.644,7.904,-52.466,5.0,5.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...
2358253,2451578.0,1.750,35.799,24.432,27.049,28.233,7.534,7.575,-80.228,5.0,5.0,3
2358254,2451579.0,1.799,35.896,24.452,27.058,28.236,7.617,7.575,-80.433,5.0,5.0,3
2358255,2451580.0,1.815,35.974,24.417,27.068,28.240,7.534,7.575,-80.515,5.0,5.0,3
2358256,2451581.0,1.828,35.828,24.421,27.049,28.229,7.534,7.452,-80.556,5.0,5.0,3


In [None]:
#combining Dataset and train set frames
#it may be required that timestamps be dropped from the prediciton model entirely, or atleast switched to date time, as the model
#may try to learn emotions based on when measuring starts and finishes. This could be problematic
#as sessions in the field could go longer than eny training sessions, or disconnects could restart sessions
datasetFrame = pd.concat(subjects.copy())
datasetFrame = datasetFrame.drop(columns=['Unnamed: 0'])
trainLabels = torch.Tensor(datasetFrame['StressLevel'].values.astype(int))
datasetFrame = datasetFrame.drop(columns=['StressLevel','valence','arousal'])

testFrame = testSubjects.copy()
testFrame = testFrame.drop(columns=['Unnamed: 0']) 
testLabels = torch.Tensor(testFrame['StressLevel'].values.astype(int)) 
testFrame = testFrame.drop(columns=['StressLevel','valence','arousal'])

validationFrame = validationSubjects.copy()
validationFrame = validationFrame.drop(columns=['Unnamed: 0'])
validationLabels = torch.Tensor(validationFrame['StressLevel'].values.astype(int))
validationFrame = validationFrame.drop(columns=['StressLevel','valence','arousal'])

datasetFrame.to_csv('.\\Processed-Data\\trainingData.csv',index=False)
testFrame.to_csv('.\\Processed-Data\\testData.csv',index=False)
validationFrame.to_csv('.\\Processed-Data\\validationData.csv',index=False)
pickle.dump(trainLabels, open('.\\Processed-Data\\trainLabels.pkl','wb'))
pickle.dump(testLabels, open('.\\Processed-Data\\testLabels.pkl','wb'))
pickle.dump(validationLabels, open('.\\Processed-Data\\validationLabels.pkl','wb'))

  


In [3]:
trainLabels = pickle.load(open('.\\Processed-Data\\trainLabels.pkl','rb')).to(device)
testLabels = pickle.load(open('.\\Processed-Data\\testLabels.pkl','rb')).to(device)
validationLabels = pickle.load(open('.\\Processed-Data\\validationLabels.pkl','rb')).to(device)
datasetFrame = pd.read_csv('.\\Processed-Data\\trainingData.csv')
testFrame = pd.read_csv('.\\Processed-Data\\testData.csv')
validationFrame = pd.read_csv('.\\Processed-Data\\validationData.csv')

In [4]:
trainTensor = torch.tensor(datasetFrame.values.astype(float),dtype=torch.float32).to(device)
testTensor = torch.tensor(testFrame.values.astype(float), dtype=torch.float32).to(device)
validationTensor = torch.tensor(validationFrame.values.astype(float), dtype=torch.float32).to(device)

In [5]:
print(f'trainTensor shape: {trainTensor.shape} testTensor shape: {testTensor.shape} validationTensor shape: {validationTensor.shape}' )
print(f'Train type: {trainTensor.dtype} Test type: {testTensor.dtype} Validation type: {validationTensor.dtype}')

trainTensor shape: torch.Size([68511001, 8]) testTensor shape: torch.Size([2451501, 8]) validationTensor shape: torch.Size([2451501, 8])
Train type: torch.float32 Test type: torch.float32 Validation type: torch.float32


In [14]:
TrainSet = TensorDataset(trainTensor,trainLabels)
ValidationSet = TensorDataset(validationTensor,validationLabels)
TestSet = TensorDataset(testTensor,testLabels)
TrainLoader = DataLoader(TrainSet,BATCH_SIZE,shuffle=False)
TestLoader = DataLoader(TestSet,BATCH_SIZE)
ValidationLoader = DataLoader(ValidationSet,BATCH_SIZE)
print(f'Trainset Length: {len(TrainSet)} Testset Length: {len(TestSet)} Validationset Length: {len(ValidationSet)}')

Trainset Length: 68511001 Testset Length: 2451501 Validationset Length: 2451501


In [7]:
#definining the model
#proposed function for stress level S = A/V where S is stress, A is arousal, and V is valence
#since valence represents the positivity of the emotion, it would be inversely proportional to stress as higher valence means a better emotion
#Since higher arousal can be generally translated to 
class StressScanner(torch.nn.Module):
    def __init__(self):
        super(StressScanner, self).__init__()
        self.ConvolutionIn = torch.nn.Conv1d(in_channels=8, out_channels=16,kernel_size=3,padding=1)
        self.ConvHidden1 = torch.nn.Conv1d(in_channels=16, out_channels=32,kernel_size=3,padding=1)
        self.ConvHidden2 = torch.nn.Conv1d(in_channels=32, out_channels=64,kernel_size=3,padding=1)
        self.linear1 = torch.nn.Linear(64, 41)
        self.linear2 = torch.nn.Linear(41, 23)
        self.linearOut = torch.nn.Linear(23, 10)

    def forward(self, x):
        x = F.relu(self.ConvolutionIn(x))
        x = F.relu(self.ConvHidden1(x))
        x = F.relu(self.ConvHidden2(x))
        x = x.squeeze()
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linearOut(x)
        return x

In [8]:
model = StressScanner()
model = model.to(device)

In [31]:

loss_fn = torch.nn.CrossEntropyLoss().to(device)

#We need to expirment with momemtum and learning rate to optimize training time and accuracy. 
#Accuracy should not be expect on the first run about, as the model is rellying on daq time and bvp to learn
#daqtime will need to be removed as in the field it may introduce far to many inconsitencies

optimizer = torch.optim.ASGD(model.parameters(), lr=0.001)
CUDA_LAUNCH_BLOCKING=1
TORCH_USE_CUDA_DSA=1

In [10]:
#Train loop
#Note: tb_writer is just a tensorboard writer for statistics
def train(epoch, tb_writer):
    current_loss = 0.0
    last_loss = 0.0
    for i, batch in enumerate(TrainLoader):
        input,labels = batch
        optimizer.zero_grad()
        #The tensor is a size too small, so we squeeze and add an extra dimension 
        input = input.unsqueeze(2)
        output = model(input)

        output = output.unsqueeze(2)
        labels = labels.long()
        labels = labels.unsqueeze(1)



        loss = loss_fn(output,labels)
        loss.backward()
        optimizer.step()
        current_loss += loss.item() 
        if (i % 1000 == 0):
            print(f'Epoch: {epoch} Loss: {current_loss}')
            print(output.shape)
            print(labels.shape)
            last_loss = current_loss
            current_loss = 0.0
            #tb_x = epoch * len(TrainLoader) + i + 1
            #tb_writer.add_scalar('Loss/train', loss.item(), tb_x)
            running_loss = 0.0
    return last_loss

In [None]:
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
epoch_num = 0
EPOCHS = 10

best_loss = 1000000.0

for epoch in range(EPOCHS):
    print(f'Epoch: {epoch}')
    model.train(True)
    train_loss = train(epoch_num,None)
    running_loss = 0.0
    model.eval()
    
    with torch.no_grad():
        for i, validation_data in enumerate(ValidationLoader):
            validation_inputs, validation_labels = validation_data
            
            validation_inputs = validation_inputs.unsqueeze(2)
            validation_outputs = model(validation_inputs)
            validation_outputs = validation_outputs.unsqueeze(2)

            validation_labels = validation_labels.long()
            validation_labels = validation_labels.unsqueeze(1)

            
            validation_loss = loss_fn(validation_outputs, validation_labels)
            running_loss += validation_loss
    avg_validation_loss = running_loss / i+1
    print(f'Train Loss: {train_loss} Validation Loss: {avg_validation_loss}')

    
    #track performance
    if avg_validation_loss < best_loss:
        best_loss = avg_validation_loss
        model_path = f'./models/StressPredictor-{timestamp}.pth'
        torch.save(model.state_dict(), model_path)
    epoch_num += 1
    
    
    

In [34]:
# Saving model once training is complete
saved_model = StressScanner()
saved_model.load_state_dict(torch.load(".\models\StressPredictor-2023-08-13_11-01-12.pth"))
model = saved_model
model = model.to(device)
print(model)


StressScanner(
  (ConvolutionIn): Conv1d(8, 16, kernel_size=(3,), stride=(1,), padding=(1,))
  (ConvHidden1): Conv1d(16, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (ConvHidden2): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (linear1): Linear(in_features=64, out_features=41, bias=True)
  (linear2): Linear(in_features=41, out_features=23, bias=True)
  (linearOut): Linear(in_features=23, out_features=10, bias=True)
)


In [43]:
#testing
def test():
  with torch.no_grad():    
    total_loss = 0
    for i,batch in enumerate(TestLoader):
      input, label = batch
      input = input.unsqueeze(2)
      output = model(input)
      output = output.unsqueeze(2)
      label = label.long()
      label = label.unsqueeze(1)
      loss = loss_fn(output, label)
      total_loss += loss
    print(f"Test Loss: {total_loss/len(TestLoader)}")



In [44]:
model.eval()
test()

Test Loss: 0.9593360424041748
