In [None]:
# Importing Libraries
from google.colab import drive
import pandas as pd
import os
import time

# Loading Drive Folder
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Final Year Project/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Final Year Project


In [None]:
# Dataset Location
recolaDataLoc = "Datasets/RECOLA/"

# List of CSV Files
csvFiles = []

for file in os.listdir(recolaDataLoc):
    if file.endswith(".csv"):
        csvFiles.append(file)

In [None]:
# Function to add new participant to the dataframe
def addNewParticipant(df, recolaDataLoc, csv):
    temp = pd.read_csv(recolaDataLoc + csv)
    temp["Participant"] = csv[1:-4]

    # Checks if CSV file has any empty entries
    nullChecker = temp.isna().any().any()
    if not nullChecker:
        df = pd.concat([df, temp], ignore_index = True)

    return df

In [None]:
# Full data set (RecolaLabelled.csv)
recolaLabelled = pd.DataFrame()

print(csvFiles)

# Adding entries to dataframes
for csv in csvFiles:
    recolaLabelled = addNewParticipant(recolaLabelled, recolaDataLoc, csv)

# Pointer
pointer = 7

# Choosing the annotators for arousal and valence
annotatorArousal = "FM1 _x"
annotatorValence = "FM1 _y"

if pointer == 7:
  annotatorArousal = "FM2 _x"
  annotatorValence = "FM2 _y"

In [None]:
# Function to normalize the data
def normalizeColumn(df, ann):
    min_value = df[ann].min()
    max_value = df[ann].max()

    # Define the desired min and max range
    min_range = -1
    max_range = 1

    # Normalize the column using the formula
    df[ann] = ((df[ann] - min_value) / (max_value - min_value)) * (max_range - min_range) + min_range

# Normalizing the columns
normalizeColumn(recolaLabelled, annotatorArousal)
normalizeColumn(recolaLabelled, annotatorValence)

# Finding the median
medianArousal = recolaLabelled[annotatorArousal].median()
medianValence = recolaLabelled[annotatorValence].median()

In [None]:
dataFrames = []

# [RecolaLabelledFull,
# RecolaArousalEpsilon05, RecolaArousalEpsilon1, RecolaArousalEpsilon15,
# RecolaValenceEpsilon05, RecolaValenceEpsilon1, RecolaValenceEpsilon15,
# RecolaLabelledFullAlt1]

for i in range(8):
  dataFrames.append(recolaLabelled.copy())

# Epsilons for Arousal
arousalEp = [0, 0.05, 0.1, 0.15, 0, 0, 0, 0]

# Epsilons for Valence
valenceEp = [0, 0, 0, 0, 0.05, 0.1, 0.15, 0]

In [None]:
def createCSV(pointer, dataFrames, arousalEp, valenceEp,annAro, annVal):
  print("Pointer " + str(pointer))

  df = dataFrames[pointer]
  epsilonA = arousalEp[pointer]
  epsilonV = valenceEp[pointer]

  # Arousal
  start = time.time()

  df["classLabelArousal"] = 0
  epsilonUpA = medianArousal+epsilonA
  epsilonDownA = medianArousal-epsilonA

  for index, row in df.iterrows():
    # If within bounding area, delete the row
    if row[annAro] < epsilonUpA and row[annAro] > epsilonDownA:
      df = df.drop(index)
    # If higher than median, set to 1
    elif row[annAro] > medianArousal:
      df.at[index, "classLabelArousal"] = 1

  end = time.time()
  print("Arousal Done: ", end-start)

  # Valence
  start = time.time()

  df["classLabelValence"] = 0
  epsilonUpV = medianValence+epsilonV
  epsilonDownV = medianValence-epsilonV

  for index, row in df.iterrows():
    # If within bounding area, delete the row
    if row[annVal] < epsilonUpV and row[annVal] > epsilonDownV:
      df = df.drop(index)
    # If higher than median, set to 1
    elif row[annVal] > medianValence:
      df.at[index, "classLabelValence"] = 1

  end = time.time()
  print("Valence Done: ", end-start)

  # Saving
  start = time.time()

  if pointer == 0:
    df.to_csv('Datasets/RecolaLabelledFull.csv', index=False)
  elif pointer == 1:
    df.to_csv('Datasets/RecolaArousalEpsilon05.csv', index=False)
  elif pointer == 2:
    df.to_csv('Datasets/RecolaArousalEpsilon1.csv', index=False)
  elif pointer == 3:
    df.to_csv('Datasets/RecolaArousalEpsilon15.csv', index=False)
  elif pointer == 4:
    df.to_csv('Datasets/RecolaValenceEpsilon05.csv', index=False)
  elif pointer == 5:
    df.to_csv('Datasets/RecolaValenceEpsilon1.csv', index=False)
  elif pointer == 6:
    df.to_csv('Datasets/RecolaValenceEpsilon15.csv', index=False)
  elif pointer == 7:
    df.to_csv('Datasets/RecolaLabelledFullAlt1.csv', index=False)

  end = time.time()
  print("Save Done: ", end-start)

  print("Rows: " + str(len(df)) + "\n")

In [None]:
# createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)
# createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)
# createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)
# createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)
# createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)
# createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)
# createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)
createCSV(pointer, dataFrames, arousalEp, valenceEp, annotatorArousal, annotatorValence)

Pointer 7
Arousal Done:  13.481858253479004
Valence Done:  14.4138822555542
Save Done:  47.73934745788574
Rows: 130593

