In [1]:
import numpy as np
import pickle as pickle
import pandas as pd
from scipy.stats import skew,kurtosis
import matplotlib.pyplot as plt



from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold,RepeatedKFold
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.metrics import classification_report,confusion_matrix,roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate,cross_val_score
from sklearn.utils import shuffle

import os
import tensorflow as tf
import time
import sys
import torch
sys.path.append("..")
# import autosklearn.classification

2023-06-17 14:53:59.536268: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Real Data

In [53]:
num_testsub = 32
num_classes = 4
num_channel = 40
num_datapoints = 8064
num_trials = 40
sampling_rate = 128 # 128Hz as given in the data

def loadfiles_normalized():
    data_dict = {}
    print("Loading files into data_dict .................")
    for i in range(num_testsub):
                    if i < 10:
                        name = '%0*d' % (2,i+1)
                    else:
                        name = i+1
                    fname = 'data/data_prepared/data_norm_bhat/noramlized_datasub'+str(name) +'.npy'
                    data_dict["sub%s" %name] = np.load(fname)    
    print("Loaded!!!!!") 
    return data_dict
data_dict = loadfiles_normalized()

# training with 22 participants each has 40 videos = 880
data_R = np.zeros((0,40,99))
data_R[:] = np.nan

train_R = np.zeros((0,40,99))
train_R[:] = np.nan
#train1 = 
test_R = np.zeros((0,40,99))
test_R[:] = np.nan

participation_counter = 0
for sub in data_dict.keys():
    data_R = np.concatenate((data_R, data_dict[sub]), axis=0)
    if participation_counter < 22:
        train_R = np.concatenate((train_R, data_dict[sub]), axis=0)
    else:
        test_R = np.concatenate((test_R, data_dict[sub]), axis=0)

    participation_counter = participation_counter + 1

Loading files into data_dict .................
Loaded!!!!!


In [54]:
data_R.shape

(1280, 40, 99)

In [15]:
train_R.shape

(880, 40, 99)

In [16]:
test_R.shape

(400, 40, 99)

## Synthetic Data

In [26]:
arrays = []
for i in range(10):
    arrays.append(torch.load("data/sampled/0xx/1_sample/sampled_03_128_"+str(i),map_location=torch.device('cpu')))
generated_data = np.concatenate(arrays, axis=0)
data_S = np.zeros((1280,40,99))
data_S[:] = np.nan

for i in range(1280):
    data_S[i] = generated_data[i][0][:,:99] 
    
train_S = data_S[:880]
test_S = data_S[880:]

## Labels

In [22]:
valence = np.load('data/data_prepared/labels/label_class_0.npy')
arousal = np.load('data/data_prepared/labels/label_class_1.npy')
dominance = np.load('data/data_prepared/labels/label_class_2.npy')
liking = np.load('data/data_prepared/labels/label_class_3.npy')

val22 = valence[: 880]
aro22 = arousal[: 880]
dom22 = dominance[: 880]
lik22 = liking[: 880]

val10 = valence[880: ]
aro10 = arousal[880: ]
dom10 = dominance[880: ]
lik10 = liking[880: ]

## K fold based 

### Actual data

## Train R and Test R

In [58]:
def main_k_fold(X_R, X_S):
    conf_matrix = 0
    cls_report = 0
    # This is the main K fold block 
    y = valence
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    kval = 20
    dispresults = 0
    nsplit = 32
    shuffle = False
    dispfoldres = 0
    print()
    print("################# Valence #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')


      # ###############################################################################
    y = arousal
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Arousal #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################
    y = dominance
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Dominance #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################

    y = liking
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("#################Liking#################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)

      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')



In [59]:
main_k_fold(data_R.reshape(32*40,40*99),data_S.reshape(32*40,40*99))


################# Valence #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 52.65625 (+- 8.476472493761777)
------------------------------------------------------------------------
################# Arousal #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 53.90625 (+- 9.247835473098556)
------------------------------------------------------------------------
################# Dominance #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 56.015625 (+- 11.351269460257518)
------------------------------------------------------------------------
#################Liking#################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 60.390625 (+- 10.195536994654818)
------------------------------------------------------------------------


## Train R and Test S

In [61]:
def main_k_fold(X_R, X_S):
    conf_matrix = 0
    cls_report = 0
    # This is the main K fold block 
    y = valence
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    kval = 20
    dispresults = 0
    nsplit = 32
    shuffle = False
    dispfoldres = 0
    print()
    print("################# Valence #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')


      # ###############################################################################
    y = arousal
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Arousal #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################
    y = dominance
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Dominance #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################

    y = liking
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("#################Liking#################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_R[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)

      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')



In [62]:
main_k_fold(data_R.reshape(32*40,40*99),data_S.reshape(32*40,40*99))


################# Valence #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 47.1875 (+- 8.309209875192707)
------------------------------------------------------------------------
################# Arousal #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 54.6875 (+- 9.636964965693297)
------------------------------------------------------------------------
################# Dominance #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 51.40625 (+- 7.7039331472631565)
------------------------------------------------------------------------
#################Liking#################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 60.546875 (+- 8.563512143646145)
------------------------------------------------------------------------


In [None]:
################# Valence #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 52.65625 (+- 8.476472493761777)
------------------------------------------------------------------------
################# Arousal #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 53.90625 (+- 9.247835473098556)
------------------------------------------------------------------------
################# Dominance #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 56.015625 (+- 11.351269460257518)
------------------------------------------------------------------------
#################Liking#################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 60.390625 (+- 10.195536994654818)
------------------------------------------------------------------------

## Train S and Test R

In [67]:
def main_k_fold(X_R, X_S):
    conf_matrix = 0
    cls_report = 0
    # This is the main K fold block 
    y = valence
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    kval = 20
    dispresults = 0
    nsplit = 32
    shuffle = False
    dispfoldres = 0
    print()
    print("################# Valence #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')


      # ###############################################################################
    y = arousal
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Arousal #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################
    y = dominance
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Dominance #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################

    y = liking
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("#################Liking#################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_R[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)

      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')



In [68]:
main_k_fold(data_R.reshape(32*40,40*99),data_S.reshape(32*40,40*99))


################# Valence #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 47.890625 (+- 6.906444425996273)
------------------------------------------------------------------------
################# Arousal #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 51.25 (+- 11.659223816361019)
------------------------------------------------------------------------
################# Dominance #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 49.84375 (+- 10.842218912081604)
------------------------------------------------------------------------
#################Liking#################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 64.375 (+- 12.005857945186591)
------------------------------------------------------------------------


## Train S and Test S

In [70]:
def main_k_fold(X_R, X_S):
    conf_matrix = 0
    cls_report = 0
    # This is the main K fold block 
    y = valence
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    kval = 20
    dispresults = 0
    nsplit = 32
    shuffle = False
    dispfoldres = 0
    print()
    print("################# Valence #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')


      # ###############################################################################
    y = arousal
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Arousal #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################
    y = dominance
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("################# Dominance #################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)
      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1

    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')

      # ###############################################################################

    y = liking
    # Split the data into training/testing sets
    acc_per_fold = []
    fold_no = 1
    print("#################Liking#################")
    kfold = KFold(n_splits=nsplit,shuffle=shuffle) # 4,8,32
    for train, test in kfold.split(X_R, y):
      # print(X[train].shape, X[test].shape)
      X_train = X_S[train]
      y_train = y[train]
      X_test = X_S[test]
      y_test = y[test]


      # KNN
      clf = KNeighborsClassifier(n_neighbors=kval) #RandomForestClassifier(n_jobs=-1,random_state=123)# DecisionTreeClassifier(random_state=123) # clf = KNeighborsClassifier(n_neighbors=1) 
      clf.fit(X_train, y_train)
      y_predict = clf.predict(X_test)
      acc = accuracy_score(y_test, y_predict)*100
      acc = round(acc, 4)
      if dispresults:
        print("Accuracy score for fold",fold_no)
        print(acc)
        print('\n')
      acc_per_fold.append(acc)

      if conf_matrix:
          print(confusion_matrix(y_test, y_predict))
      if cls_report:
          print(classification_report(y_test, y_predict))

      fold_no = fold_no + 1
    if dispfoldres:  
      print('------------------------------------------------------------------------')
      print('Score per fold')
      for i in range(0, len(acc_per_fold)):
          print('------------------------------------------------------------------------')
          print(f'> Fold {i+1} Accuracy: {acc_per_fold[i]}%')
      print('------------------------------------------------------------------------')
    print("For K value -",kval, "and nsplits",nsplit,"shuffle -",shuffle)
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print('------------------------------------------------------------------------')



In [71]:
main_k_fold(data_R.reshape(32*40,40*99),data_S.reshape(32*40,40*99))


################# Valence #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 50.859375 (+- 8.18748509063528)
------------------------------------------------------------------------
################# Arousal #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 53.984375 (+- 6.698279134178793)
------------------------------------------------------------------------
################# Dominance #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 55.078125 (+- 11.206240849828948)
------------------------------------------------------------------------
#################Liking#################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 62.5 (+- 9.921567416492215)
------------------------------------------------------------------------


In [None]:
################# Valence #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 47.890625 (+- 6.906444425996273)
------------------------------------------------------------------------
################# Arousal #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 51.25 (+- 11.659223816361019)
------------------------------------------------------------------------
################# Dominance #################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 49.84375 (+- 10.842218912081604)
------------------------------------------------------------------------
#################Liking#################
For K value - 20 and nsplits 32 shuffle - False
Average scores for all folds:
> Accuracy: 64.375 (+- 12.005857945186591)
------------------------------------------------------------------------
