## Imports and Hyper-Parameter

In [1]:
# Notebook Imports
import import_ipynb
from Datasets.RECOLA.Code import labelRECOLAWindowed
from SharedCode import supervisedModelTraining
from SharedCode import semiSupervisedModelTraining

importing Jupyter notebook from c:\Users\User\Documents\University\Final Year Project\Code\Datasets\RECOLA\Code\labelRECOLAWindowed.ipynb
importing Jupyter notebook from c:\Users\User\Documents\University\Final Year Project\Code\SharedCode\supervisedModelTraining.ipynb
importing Jupyter notebook from c:\Users\User\Documents\University\Final Year Project\Code\SharedCode\semiSupervisedModelTraining.ipynb


In [2]:
# Code Imports
import warnings
warnings.filterwarnings("ignore")

import os
import pandas as pd
from pandas.core.frame import DataFrame
import time
import numpy as np

In [3]:
RERUN_CODE = False

## Create Dataset

In [4]:
if "RecolaWindowed.csv" not in os.listdir("Datasets/RECOLA") or RERUN_CODE is True:
  # Dataset Location
  recolaLoc = "Datasets/RECOLA/Dataset/"

  # Get participant dataframes
  participantCSV = []
  for fileName in list(os.listdir(recolaLoc)):
    if fileName[3:] == ".csv":
      temp = pd.read_csv(recolaLoc + fileName)
      nullChecker = temp.isna().any().any()
      if not nullChecker:
        participantCSV.append(temp)

  # Big list with dictionaries for each participants data
  participantDataListMessy = []

  # Get participant data
  for participantDF in participantCSV:
    print("Participant Begun")
    listOfDict = labelRECOLAWindowed.getParticipantData(DataFrame.copy(participantDF), len(participantDataListMessy)+1)
    participantDataListMessy.append(listOfDict)
    print("Participant Completed [" + str(len(participantDataListMessy)) + "/" + str(len(participantCSV)) + "]\n")

  # Flatten the participant data list
  participantDataListFlattened = labelRECOLAWindowed.flattenList(participantDataListMessy)

  # Convert to dataframe
  windowedDataframe = labelRECOLAWindowed.convertToFrame(participantDataListFlattened)

  # Create class labels
  compltededDataFrame = labelRECOLAWindowed.createClassLabels(windowedDataframe)

  # Saving dataframe to csv
  compltededDataFrame.to_csv("Datasets/RECOLA/RecolaWindowed.csv", index=False)

else:
  print("Windowed Dataset has already been created, will not re-execute code.")

Windowed Dataset has already been created, will not re-execute code.


## Supervised Training

### Setup

In [5]:
# Make folder for results
if "Supervised Models" not in os.listdir("Datasets/RECOLA"): os.makedirs("Datasets/RECOLA/Supervised Models") 

# Loading dataset
dataset = pd.read_csv("Datasets/RECOLA/RecolaWindowed.csv")

# Groups
groups = list(dataset["participantNumber"])

# Features
audioFeatures = dataset.filter(regex=f'^{"ComPar"}|{"audio_speech"}', axis=1)
visualFeatures = dataset.filter(regex=f'^{"VIDEO"}|{"Face_detection"}', axis=1)
physiologyFeatures = dataset.filter(regex=f'^{"ECG"}|{"EDA"}', axis=1)
allFeatures = dataset.filter(regex=f'^{"ComPar"}|{"audio_speech"}|{"VIDEO"}|{"Face_detection"}|{"ECG"}|{"EDA"}', axis=1)

# Labels
targetArousal = dataset["classLabelArousal"]
targetValence = dataset["classLabelValence"]

# Number of Folds
folds = 9

# Grouping data for easy running
featureList = ["Audio", "Visual", "Physiology", "All"]
targetList = ["Arousal", "Valence"]
valueDict = {"Audio": audioFeatures, "Visual": visualFeatures, "Physiology": physiologyFeatures, "All": allFeatures, "Arousal": targetArousal, "Valence": targetValence,}

### Binary Logistic Regression

In [6]:
if "Binary Logistic Regression" not in os.listdir("Datasets/RECOLA/Supervised Models"):
  os.makedirs("Datasets/RECOLA/Supervised Models/Binary Logistic Regression")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Supervised Models/Binary Logistic Regression"):
  os.makedirs("Datasets/RECOLA/Supervised Models/Binary Logistic Regression/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Supervised Models/Binary Logistic Regression") or RERUN_CODE == True:
  open("Datasets/RECOLA/Supervised Models/Binary Logistic Regression/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Supervised Models/Binary Logistic Regression") and RERUN_CODE == False:
  print("Binary Logistic Regression has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Supervised Models/Binary Logistic Regression/NoCopyChecker.txt", "w") as file:
    file.close

Binary Logistic Regression has already started training, will not start again from scratch.


In [7]:
completed  = []
with open("Datasets/RECOLA/Supervised Models/Binary Logistic Regression/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList) * len(targetList)))

for i in featureList:
  for j in targetList:
    fileName = i + j
    if fileName not in completed:
      start = time.time()
      print(fileName)
      supervisedModelTraining.binaryLogisticRegression(valueDict[i], valueDict[j], folds, groups, fileName, "Datasets/RECOLA/Supervised Models/")
      end = time.time()
      print("Done: " + str(end-start) + "\n")

      with open("Datasets/RECOLA/Supervised Models/Binary Logistic Regression/NoCopyChecker.txt", "a") as file:
        file.write(fileName + " ")
        file.close

Completed Tests: 8/8


### Random Forest

In [8]:
if "Random Forest" not in os.listdir("Datasets/RECOLA/Supervised Models"):
    os.makedirs("Datasets/RECOLA/Supervised Models/Random Forest")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Supervised Models/Random Forest"):
  os.makedirs("Datasets/RECOLA/Supervised Models/Random Forest/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Supervised Models/Random Forest") or RERUN_CODE == True:
  open("Datasets/RECOLA/Supervised Models/Random Forest/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Supervised Models/Random Forest") and RERUN_CODE == False:
  print("Random Forest has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Supervised Models/Random Forest/NoCopyChecker.txt", "w") as file:
    file.close

Random Forest has already started training, will not start again from scratch.


In [9]:
completed  = []
with open("Datasets/RECOLA/Supervised Models/Random Forest/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList) * len(targetList)))

for i in featureList:
  for j in targetList:
    fileName = i + j
    if fileName not in completed:
      start = time.time()
      print(fileName)
      supervisedModelTraining.randomForest(valueDict[i], valueDict[j], folds, groups, fileName, "Datasets/RECOLA/Supervised Models/")
      end = time.time()
      print("Done: " + str(end-start) + "\n")

      with open("Datasets/RECOLA/Supervised Models/Random Forest/NoCopyChecker.txt", "a") as file:
        file.write(fileName + " ")
        file.close

Completed Tests: 8/8


### Neural Networks

In [10]:
if "Neural Network" not in os.listdir("Datasets/RECOLA/Supervised Models"):
    os.makedirs("Datasets/RECOLA/Supervised Models/Neural Network")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Supervised Models/Neural Network"):
  os.makedirs("Datasets/RECOLA/Supervised Models/Neural Network/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Supervised Models/Neural Network") or RERUN_CODE == True:
  open("Datasets/RECOLA/Supervised Models/Neural Network/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Supervised Models/Neural Network") and RERUN_CODE == False:
  print("Neural Network has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Supervised Models/Neural Network/NoCopyChecker.txt", "w") as file:
    file.close

Neural Network has already started training, will not start again from scratch.


In [11]:
completed  = []
with open("Datasets/RECOLA/Supervised Models/Neural Network/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList) * len(targetList)))

for i in featureList:
  for j in targetList:
    fileName = i + j
    if fileName not in completed:
      start = time.time()
      print(fileName)
      supervisedModelTraining.neuralNetwork(valueDict[i], valueDict[j], folds, groups, fileName, "Datasets/RECOLA/Supervised Models/")
      end = time.time()
      print("Done: " + str(end-start) + "\n")

      with open("Datasets/RECOLA/Supervised Models/Neural Network/NoCopyChecker.txt", "a") as file:
        file.write(fileName + " ")
        file.close

Completed Tests: 8/8


## Semi-Supervised Training

### Setup

In [12]:
if "Semi-Supervised Models" not in os.listdir("Datasets/RECOLA"):
    os.makedirs("Datasets/RECOLA/Semi-Supervised Models") 

# Grouping data for easy running
featureList = ["Audio", "Visual", "Phys", "All"]
targetList = ["Arousal", "Valence"]
baseEstList = ["BLR", "RF", "NN"]
labelledCount = ["4", "8", "12"]
valueDict = {"Audio": audioFeatures, "Visual": visualFeatures, "Phys": physiologyFeatures, "All": allFeatures, "Arousal": targetArousal, "Valence": targetValence}

if not hasattr(np, 'float'):
    np.float = float

### Co-Training (Disagreement)

In [13]:
if "Co-Training" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/Co-Training")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Co-Training"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/Co-Training/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Co-Training") or RERUN_CODE == True:
  open("Datasets/RECOLA/Semi-Supervised Models/Co-Training/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Co-Training") and RERUN_CODE == False:
  print("Co-Training has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Semi-Supervised Models/Co-Training/NoCopyChecker.txt", "w") as file:
    file.close

Co-Training has already started training, will not start again from scratch.


In [14]:
completed  = []
with open("Datasets/RECOLA/Semi-Supervised Models/Co-Training/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList)*len(targetList)*len(baseEstList)*len(labelledCount)))

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      for l in labelledCount:
        fileName = i + j + k + l
        if fileName not in completed:
          start = time.time()

          print(fileName)
          semiSupervisedModelTraining.coTraining (k, valueDict[i], valueDict[j], folds, groups, int(l), fileName, "Datasets/RECOLA/Semi-Supervised Models/")

          end = time.time()
          print("Done: " + str(end-start) + "\n")

          with open("Datasets/RECOLA/Semi-Supervised Models/Co-Training/NoCopyChecker.txt", "a") as file:
            file.write(fileName + " ")
            file.close

Completed Tests: 72/72


### Tri-Training (Disagreement)

In [15]:
if "Tri-Training" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/Tri-Training")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Tri-Training"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/Tri-Training/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Tri-Training") or RERUN_CODE == True:
  open("Datasets/RECOLA/Semi-Supervised Models/Tri-Training/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Tri-Training") and RERUN_CODE == False:
  print("Tri-Training has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Semi-Supervised Models/Tri-Training/NoCopyChecker.txt", "w") as file:
    file.close

Tri-Training has already started training, will not start again from scratch.


In [16]:
completed  = []
with open("Datasets/RECOLA/Semi-Supervised Models/Tri-Training/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList)*len(targetList)*len(baseEstList)*len(labelledCount)))

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      for l in labelledCount:
        fileName = i + j + k + l
        if fileName not in completed:
          start = time.time()

          print(fileName)
          semiSupervisedModelTraining.triTraining(k, valueDict[i], valueDict[j], folds, groups, int(l), fileName, "Datasets/RECOLA/Semi-Supervised Models/")

          end = time.time()
          print("Done: " + str(end-start) + "\n")

          with open("Datasets/RECOLA/Semi-Supervised Models/Tri-Training/NoCopyChecker.txt", "a") as file:
            file.write(fileName + " ")
            file.close

Completed Tests: 72/72


### SSGMM (Generative)

In [19]:
if "SSGMM" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/SSGMM")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/SSGMM"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/SSGMM/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/SSGMM") or RERUN_CODE == True:
  open("Datasets/RECOLA/Semi-Supervised Models/SSGMM/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Semi-Supervised Models/SSGMM") and RERUN_CODE == False:
  print("SSGMM has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Semi-Supervised Models/SSGMM/NoCopyChecker.txt", "w") as file:
    file.close

In [20]:
completed  = []
with open("Datasets/RECOLA/Semi-Supervised Models/SSGMM/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList)*len(targetList)*len(labelledCount)))

for i in featureList:
  for j in targetList:
    for l in labelledCount:
      fileName = i + j + l
      if fileName not in completed:
        start = time.time()
        print(fileName)
        semiSupervisedModelTraining.SSGMMModel(valueDict[i], valueDict[j], folds, groups, int(l), fileName, "Datasets/RECOLA/Semi-Supervised Models/")
        end = time.time()
        print("Done: " + str(end-start) + "\n")

        with open("Datasets/RECOLA/Semi-Supervised Models/SSGMM/NoCopyChecker.txt", "a") as file:
          file.write(fileName + " ")
          file.close

Completed Tests: 24/24


### Assemble (Ensemble)

In [None]:
if "Assemble" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/Assemble")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Assemble"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/Assemble/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Assemble") or RERUN_CODE == True:
  open("Datasets/RECOLA/Semi-Supervised Models/Assemble/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Semi-Supervised Models/Assemble") and RERUN_CODE == False:
  print("Assemble has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Semi-Supervised Models/Assemble/NoCopyChecker.txt", "w") as file:
    file.close

Assemble has already started training, will not start again from scratch.


In [None]:
completed = []
with open("Datasets/RECOLA/Semi-Supervised Models/Assemble/NoCopyChecker.txt", "r+") as file:
  completed = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList)*len(targetList)*len(baseEstList)*len(labelledCount)))

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      if k is not "NN":
        for l in labelledCount:
          fileName = i + j + k + l
          if fileName not in completed:
            start = time.time()
            print(fileName)
            semiSupervisedModelTraining.assemble(k, valueDict[i], valueDict[j], folds, groups, int(l), fileName, "Datasets/RECOLA/Semi-Supervised Models/")
            end = time.time()
            print("Done: " + str(end-start) + "\n")

            with open("Datasets/RECOLA/Semi-Supervised Models/Assemble/NoCopyChecker.txt", "a") as file:
              file.write(fileName + " ")
              file.close

Completed Tests: 48/72


### SemiBoost (Ensemble)

In [None]:
if "SemiBoost" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/SemiBoost")

if "Per Fold Results" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/SemiBoost"):
  os.makedirs("Datasets/RECOLA/Semi-Supervised Models/SemiBoost/Per Fold Results")

if "ConfusionMatrices.csv" not in os.listdir("Datasets/RECOLA/Semi-Supervised Models/SemiBoost") or RERUN_CODE == True:
  open("Datasets/RECOLA/Semi-Supervised Models/SemiBoost/ConfusionMatrices.csv", "w")

if "NoCopyChecker.txt" in os.listdir("Datasets/RECOLA/Semi-Supervised Models/SemiBoost") and RERUN_CODE == False:
  print("SemiBoost has already started training, will not start again from scratch.")
else:
  with open("Datasets/RECOLA/Semi-Supervised Models/SemiBoost/NoCopyChecker.txt", "w") as file:
    file.close

SemiBoost has already started training, will not start again from scratch.


In [None]:
completed = []
with open("Datasets/RECOLA/Semi-Supervised Models/SemiBoost/NoCopyChecker.txt", "r+") as file:
  completed = file.read()
  file.close()

completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/" + str(len(featureList)*len(targetList)*len(baseEstList)*len(labelledCount)))

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      for l in labelledCount:
        fileName = i + j + k + l
        if fileName not in completed:
          start = time.time()
          print(fileName)
          semiSupervisedModelTraining.semiBoost(k, valueDict[i], valueDict[j], folds, groups, int(l), fileName, "Datasets/RECOLA/Semi-Supervised Models/")
          end = time.time()
          print("Done: " + str(end-start) + "\n")

          with open("Datasets/RECOLA/Semi-Supervised Models/SemiBoost/NoCopyChecker.txt", "a") as file:
            file.write(fileName + " ")
            file.close

Completed Tests: 72/72
