In [None]:
import torch
import torch.nn.functional as F
import pandas as pd
import numpy as np
import glob
from enum import Enum
from torch.utils.data import TensorDataset, DataLoader
import math
import pickle
import datetime
import os 
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
BATCH_SIZE = 1024
#TODO: build a validation/test split

In [None]:
#Check if GPU is available and select if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.is_available())
device = torch.device("cuda")

In [None]:
#importing user data

subjects = []
for i in range (1,31):
    interimPysch = pd.read_csv('.\\CASE_full\\data\\non-interpolated\\physiological\\sub_' + str(i) + '.csv')
    interimPysch = interimPysch.drop(columns=['video'])
    interimPysch['daqtime'] = interimPysch['daqtime'].astype('int32')
    interimAnote = pd.read_csv('.\\CASE_full\\data\\non-interpolated\\annotations\\sub_' + str(i) + '.csv')
    interimAnote = interimAnote.drop(columns=['video'])
    interimAnote['jstime'] = interimAnote['jstime'].astype('int32')
    final = interimPysch.merge(interimAnote, left_on='daqtime',right_on='jstime', how='outer')
    #jstime dropped as it is redundant
    final = final.drop(columns=['jstime','daqtime'])
    final = final.ffill()
    final = final.bfill()
    final.to_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
    subjects.append(final)
subjects[0]



In [None]:

def createStressLevel(Dataframe):
    vals = []
    valence = Dataframe.loc[:,'valence']
    arousal = Dataframe.loc[:,'arousal']
    for i in range(0,len(valence)):
        vals.append((arousal[i]-0.5)*math.cos((math.pi/18)*(valence[i] - 0.5)))
    return vals


In [None]:
#loading datasets and converting them into tensors
subjects = []
global testSubjects
global validationSubjects

for i in range(1,31):
   if i < 29:
      temp = pd.read_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
      temp['StressLevel'] = createStressLevel(temp)
      subjects.append(temp)
   elif i==29:
      temp = pd.read_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
      temp['StressLevel'] = createStressLevel(temp)
      testSubjects = temp

   else:
      
      temp = pd.read_csv('.\\Processed-Data\\Filled-Data\\sub_' + str(i) + '_processed'+'.csv')
      temp['StressLevel'] = createStressLevel(temp)
      validationSubjects = temp

In [None]:
#combining Dataset and train set frames
#it may be required that timestamps be dropped from the prediciton model entirely, or atleast switched to date time, as the model
#may try to learn emotions based on when measuring starts and finishes. This could be problematic
#as sessions in the field could go longer than eny training sessions, or disconnects could restart sessions
datasetFrame = pd.concat(subjects.copy())
datasetFrame = datasetFrame.drop(columns=['Unnamed: 0'])
trainLabels = torch.Tensor(datasetFrame['StressLevel'].values)
datasetFrame = datasetFrame.drop(columns=['StressLevel','valence','arousal'])

testFrame = testSubjects.copy()
testFrame = testFrame.drop(columns=['Unnamed: 0']) 
testLabels = torch.Tensor(testFrame['StressLevel'].values) 
testFrame = testFrame.drop(columns=['StressLevel','valence','arousal'])

validationFrame = validationSubjects.copy()
validationFrame = validationFrame.drop(columns=['Unnamed: 0'])
validationLabels = torch.Tensor(validationFrame['StressLevel'].values)
validationFrame = validationFrame.drop(columns=['StressLevel','valence','arousal'])

datasetFrame.to_csv('.\\Processed-Data\\STRESSPRED2\\trainingData.csv',index=False)
testFrame.to_csv('.\\Processed-Data\\STRESSPRED2\\testData.csv',index=False)
validationFrame.to_csv('.\\Processed-Data\\STRESSPRED2\\validationData.csv',index=False)
pickle.dump(trainLabels, open('.\\Processed-Data\\STRESSPRED2\\trainLabels.pkl','wb'))
pickle.dump(testLabels, open('.\\Processed-Data\\STRESSPRED2\\testLabels.pkl','wb'))
pickle.dump(validationLabels, open('.\\Processed-Data\\STRESSPRED2\\validationLabels.pkl','wb'))

  


In [None]:
trainLabels = pickle.load(open('.\\Processed-Data\\STRESSPRED2\\trainLabels.pkl','rb')).to(device)
testLabels = pickle.load(open('.\\Processed-Data\\STRESSPRED2\\testLabels.pkl','rb')).to(device)
validationLabels = pickle.load(open('.\\Processed-Data\\STRESSPRED2\\validationLabels.pkl','rb')).to(device)
datasetFrame = pd.read_csv('.\\Processed-Data\\STRESSPRED2\\trainingData.csv')
testFrame = pd.read_csv('.\\Processed-Data\\STRESSPRED2\\testData.csv')
validationFrame = pd.read_csv('.\\Processed-Data\\STRESSPRED2\\validationData.csv')

In [None]:
trainLabels

In [None]:
trainTensor = torch.tensor(datasetFrame.values.astype(float),dtype=torch.float32).to(device)
testTensor = torch.tensor(testFrame.values.astype(float), dtype=torch.float32).to(device)
validationTensor = torch.tensor(validationFrame.values.astype(float), dtype=torch.float32).to(device)

In [None]:
print(f'trainTensor shape: {trainTensor.shape} testTensor shape: {testTensor.shape} validationTensor shape: {validationTensor.shape}' )
print(f'Train type: {trainTensor.dtype} Test type: {testTensor.dtype} Validation type: {validationTensor.dtype}')

In [None]:
TrainSet = TensorDataset(trainTensor,trainLabels)
ValidationSet = TensorDataset(validationTensor,validationLabels)
TestSet = TensorDataset(testTensor,testLabels)
TrainLoader = DataLoader(TrainSet,BATCH_SIZE,shuffle=False)
TestLoader = DataLoader(TestSet,BATCH_SIZE)
ValidationLoader = DataLoader(ValidationSet,BATCH_SIZE)
print(f'Trainset Length: {len(TrainSet)} Testset Length: {len(TestSet)} Validationset Length: {len(ValidationSet)}')

In [None]:
#definining the model
#proposed function for stress level S = A/V where S is stress, A is arousal, and V is valence
#since valence represents the positivity of the emotion, it would be inversely proportional to stress as higher valence means a better emotion
#Since higher arousal can be generally translated to 
class StressScanner(torch.nn.Module):
    def __init__(self):
        super(StressScanner, self).__init__()
        self.ConvolutionIn = torch.nn.Conv1d(in_channels=8, out_channels=16,kernel_size=3,padding=1)
        self.ConvHidden1 = torch.nn.Conv1d(in_channels=16, out_channels=32,kernel_size=3,padding=1)
        self.ConvHidden2 = torch.nn.Conv1d(in_channels=32, out_channels=64,kernel_size=3,padding=1)
        self.linear1 = torch.nn.Linear(64, 41)
        self.linear2 = torch.nn.Linear(41, 23)
        self.linearOut = torch.nn.Linear(23, 1)

    def forward(self, x):
        x = F.relu(self.ConvolutionIn(x))
        x = F.relu(self.ConvHidden1(x))
        x = F.relu(self.ConvHidden2(x))
        x = x.squeeze()
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linearOut(x)
        return x

In [None]:
model = StressScanner()
model = model.to(device)

In [None]:

loss_fn = torch.nn.MSELoss().to(device)

#We need to expirment with momemtum and learning rate to optimize training time and accuracy. 
#Accuracy should not be expect on the first run about, as the model is rellying on daq time and bvp to learn
#daqtime will need to be removed as in the field it may introduce far to many inconsitencies

optimizer = torch.optim.SGD(model.parameters(), lr=0.001,momentum=0.1)
CUDA_LAUNCH_BLOCKING=1
TORCH_USE_CUDA_DSA=1

In [None]:
#Train loop
#Note: tb_writer is just a tensorboard writer for statistics
def train(epoch, tb_writer):
    current_loss = 0.0
    last_loss = 0.0
    for i, batch in enumerate(TrainLoader):
        input,labels = batch
        optimizer.zero_grad()
        #The tensor is a size too small, so we squeeze and add an extra dimension 
        input = input.unsqueeze(2)
        output = model(input)
        
        labels = labels.unsqueeze(1)

        loss = loss_fn(output,labels)
        loss.backward()
        optimizer.step()
        current_loss += loss.item() 
        if (i % 1000 == 0):
            print(f'Epoch: {epoch} Loss: {current_loss}')
            print(output.shape)
            print(labels.shape)
            last_loss = current_loss
            current_loss = 0.0
            running_loss = 0.0
    return last_loss

In [None]:
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
epoch_num = 0
EPOCHS = 10

best_loss = 1000000.0

for epoch in range(EPOCHS):
    print(f'Epoch: {epoch}')
    model.train(True)
    train_loss = train(epoch_num,None)
    running_loss = 0.0
    model.eval()
    
    with torch.no_grad():
        for i, validation_data in enumerate(ValidationLoader):
            validation_inputs, validation_labels = validation_data
            
            validation_inputs = validation_inputs.unsqueeze(2)
            validation_outputs = model(validation_inputs)
            validation_labels = validation_labels.unsqueeze(1)

            
            validation_loss = loss_fn(validation_outputs, validation_labels)
            running_loss += validation_loss
    avg_validation_loss = running_loss / i+1
    print(f'Train Loss: {train_loss} Validation Loss: {avg_validation_loss}')

    
    #track performance
    if avg_validation_loss < best_loss:
        best_loss = avg_validation_loss
        model_path = f'./models/StressPredictor-{timestamp}.pth'
        torch.save(model.state_dict(), model_path)
    epoch_num += 1
    
    
    

In [None]:
#Testing
worst = 0
best = 99999
model.eval()
total_err = 0
outs_to_plot = []
labels_to_plot = []
inputs_to_plot = []
for i, batch in enumerate(TestLoader): 
    input, labels = batch
    input = input.unsqueeze(2)
    output = model(input)
    outs_to_plot.append(output.squeeze().tolist())
    labels_to_plot.append(labels.tolist())
    inputs_to_plot.append(input.squeeze()[:,1].tolist())
  
    error = abs(torch.sum(output) - torch.sum(labels))
    total_err = total_err + error

    MAE = error/BATCH_SIZE
    if MAE > worst:
        worst = MAE
    if MAE < best:
        best = MAE
    if(i % 1000 == 0):
        print(f"BEST: {best}\nWORST: {worst} \nTOTAL: {error/(len(TestLoader)*BATCH_SIZE)}\n")
print("STATS FOR TEST:\n")
print(f"BEST: {best}\nWORST: {worst} \nTOTAL: {total_err/(len(TestLoader)*BATCH_SIZE)}\n")
        
        

In [None]:
x_flat = []
y_pred_flat = []
y_true_flat = []
for subset in inputs_to_plot:
  for item in subset:
    x_flat.append(item)
x = np.array(x_flat)

for subset in outs_to_plot:
  for item in subset:
    y_pred_flat.append(item)
y_pred = np.array(y_pred_flat)

for subset in labels_to_plot:
  for item in subset:
    y_true_flat.append(item)
y_true = np.array(y_true_flat)

import matplotlib.pyplot as plt
plt.figure()
plt.scatter(x,y_true)
plt.scatter(x,y_pred)
plt.legend(labels=["Predicted Values","True Values"])
plt.show()



In [None]:
labels_to_plot