In [1]:
# import the usual libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import confusion_matrix, classification_report
from sktime.classification.interval_based import CanonicalIntervalForest
from sktime.classification.kernel_based import RocketClassifier

In [2]:
#ignore warnings
import warnings
warnings.simplefilter(action='ignore')

In [3]:
# import the .csv files in merged_opensmile_out as dataframes in a dictionary
import os
import glob

# get the current directory
path = os.getcwd()

print(path)
# get the path to the directory with the csv files
path = path + '/merged_opensmile_out_cut'
# get the list of files in the directory
all_files = glob.glob(path + "/*.csv")

# create an empty dictionary to store the dataframes
data = {}
# loop through the list of files
for filename in all_files:
    # get the name of the file
    name = os.path.basename(filename)
    # delete the .csv extension
    name = name[:-4]
    # read the file into a dataframe
    df = pd.read_csv(filename, index_col='Unnamed: 0', header=0)
    # drop the columns starting with timestamp
    df = df.drop(df.filter(regex='timestamp').columns, axis=1)
    # store the dataframe in the dictionary
    data[name] = df

# get the path to the directory with the csv files
path = os.getcwd()

path = path + '/opensmile_out_A_cut'
# get the list of files in the directory
all_files = glob.glob(path + "/*.csv")
print(all_files)
# create an empty dictionary to store the dataframes
data_A = {}
# loop through the list of files
for filename in all_files:
    # get the name of the file
    name = os.path.basename(filename)
    # delete the .csv extension
    name = name[:-4]
    # read the file into a dataframe
    df = pd.read_csv(filename, index_col='Unnamed: 0', header=0)
    # drop the columns starting with timestamp
    df = df.drop(df.filter(regex='timestamp').columns, axis=1)
    # store the dataframe in the dictionary
    data_A[name] = df

/home/h20/frru0901/deception_experiment
['/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/12_5.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/17_0.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/13_0.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/15_2.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/19_0.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/4_3.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/2_3.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/18_4.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/21_2.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/9_4.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/16_2.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/12_1.csv', '/home/h20/frru0901/deception_experiment/opensmile_out_A_cut/5_0.csv', '/home/h20/frru0901/decepti

In [4]:
# check the number of missing values in data and in data_A
missing_values = {}
for key in data.keys():
    missing_values[key] = data[key].isnull().sum().sum()
missing_values_A = {}
for key in data_A.keys():
    missing_values_A[key] = data_A[key].isnull().sum().sum()

print(missing_values)
print(missing_values_A)

{'12_5': 0, '17_0': 0, '13_0': 0, '15_2': 0, '19_0': 0, '4_3': 0, '2_3': 0, '18_4': 0, '21_2': 0, '9_4': 0, '16_2': 0, '12_1': 0, '5_0': 0, '20_2': 0, '3_3': 0, '17_1': 0, '6_3': 0, '3_1': 0, '14_0': 0, '4_0': 0, '6_1': 0, '10_0': 0, '19_4': 0, '14_1': 0, '20_1': 0, '6_0': 0, '16_1': 0, '11_4': 0, '10_1': 0, '14_3': 0, '3_2': 0, '2_5': 0, '12_3': 0, '15_0': 0, '22_2': 0, '12_0': 0, '11_3': 0, '18_3': 0, '21_1': 0, '5_1': 0, '5_3': 0, '8_0': 0, '4_2': 0, '2_6': 0, '22_1': 0, '18_2': 0, '8_1': 0, '10_2': 0, '19_1': 0, '15_3': 0, '1_1': 0, '12_6': 0, '2_0': 0, '9_1': 0, '7_3': 0, '7_1': 0, '15_4': 0, '21_0': 0, '13_1': 0, '1_0': 0, '5_2': 0, '19_3': 0, '10_3': 0, '2_1': 0, '22_0': 0, '1_3': 0, '13_2': 0, '20_0': 0, '1_4': 0, '7_4': 0, '17_3': 0, '15_1': 0, '2_4': 0, '8_2': 0, '9_3': 0, '12_4': 0, '21_4': 0, '11_1': 0, '17_2': 0, '9_2': 0, '8_4': 0, '11_0': 0, '19_2': 0, '7_2': 0, '12_2': 0, '3_0': 0, '18_1': 0, '16_3': 0, '14_2': 0, '8_3': 0, '13_3': 0, '18_0': 0, '11_2': 0, '9_0': 0, '16

In [5]:
sorted_keys = sorted(list(data.keys()))
print(sorted_keys)
# create a list of groups, where each group is given by the elements of sorted_keys, except the last two characters
groups = list(set([key[:-2] for key in sorted_keys]))
print(sorted(groups))

group_dict = {}
for group in groups:
    # create a list of keys for the current group
    keys = [key for key in sorted_keys if key[:-2] == group]
    # create a list of dataframes for the current group
    dfs = [data[key] for key in keys]

    # append the list of dataframes to the dictionary
    group_dict[group] = dfs

['10_0', '10_1', '10_2', '10_3', '11_0', '11_1', '11_2', '11_3', '11_4', '12_0', '12_1', '12_2', '12_3', '12_4', '12_5', '12_6', '13_0', '13_1', '13_2', '13_3', '14_0', '14_1', '14_2', '14_3', '15_0', '15_1', '15_2', '15_3', '15_4', '16_0', '16_1', '16_2', '16_3', '17_0', '17_1', '17_2', '17_3', '18_0', '18_1', '18_2', '18_3', '18_4', '19_0', '19_1', '19_2', '19_3', '19_4', '1_0', '1_1', '1_2', '1_3', '1_4', '20_0', '20_1', '20_2', '21_0', '21_1', '21_2', '21_3', '21_4', '22_0', '22_1', '22_2', '22_3', '2_0', '2_1', '2_2', '2_3', '2_4', '2_5', '2_6', '3_0', '3_1', '3_2', '3_3', '3_4', '4_0', '4_1', '4_2', '4_3', '5_0', '5_1', '5_2', '5_3', '6_0', '6_1', '6_2', '6_3', '7_0', '7_1', '7_2', '7_3', '7_4', '8_0', '8_1', '8_2', '8_3', '8_4', '9_0', '9_1', '9_2', '9_3', '9_4']
['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '3', '4', '5', '6', '7', '8', '9']


In [6]:
print(len(group_dict['7']))

5


In [7]:
# read the full_dataset.csv file into a dataframe. Keep only the 'Dyad Number' and 'Truth/Lie' columns
full_dataset = pd.read_csv('full_dataset.csv', usecols=['Dyad Number', 'Truth/Lie'])
# delete the duplicates in the full_dataset dataframe based on the 'Dyad Number' column
full_dataset = full_dataset.drop_duplicates(subset='Dyad Number')

In [8]:
print(type(data['1_1']))

<class 'pandas.core.frame.DataFrame'>


In [9]:
# Create a function to transform the dataframes in a dictionary into a single 3d numpy array, structured as (n_samples, n_features, n_timepoints).
# Use the keys of the dictionary, as integers, from the smallest to the largest, as the first dimension of the numpy array.
#Use the columns of the dataframes as the second dimension of the numpy array.
# Use the rows of the dataframes as the third dimension of the numpy array.

def dict_to_array(data):
    # get the number of keys
    n_keys = len(list(data.keys()))
    # get the number of columns
    n_columns = data[list(data.keys())[0]].shape[1]
    # get the number of rows
    n_rows = data[list(data.keys())[0]].shape[0]
    # create an empty numpy array
    array = np.zeros((n_keys, n_columns, n_rows))
    # create an empty list to store the groups from the keys
    groups = np.array([])
    # loop through the keys
    for i in range(n_keys):
        # get the key
        key = list(data.keys())[i]
        # get the group: the key except the last two characters
        group = key[:-2]
        # append the group to the list
        groups = np.append(groups, int(group))
        df = data[key]
        # get the values of the dataframe
        values = df.values
        # store the values in the numpy array
        array[i, :, :] = values.T
    for element in groups:
        element = int(element)
    return array, groups

In [10]:
# transform the dataframes in the dictionary into a single 3d numpy array
print('working on X')
X, groups_X = dict_to_array(data)
print('working on X_A')
X_A, groups_XA = dict_to_array(data_A)

# create a label array, there 'Lie' is 0 and 'Truth' is 1
y = full_dataset['Truth/Lie'].values
y = np.where(y == 'Lie', 0, 1)

working on X
working on X_A


In [11]:
for element in groups_X:
    element = int(element)

In [12]:
# create a dictionary using the "Dyad Number" column of the full_dataset dataframe as keys and the "Truth/Lie" column as values, where 'Lie' is 0 and 'Truth' is 1
map = full_dataset.set_index('Dyad Number').to_dict()['Truth/Lie']
# change each truth value in map to 1 and each lie value to 0
for key in map.keys():
    map[key] = 1 if map[key] == 'Truth' else 0

# create a numpy array mapping each value in groups to the corresponding value in map
y = np.array([map[group] for group in groups_X])
y_a = np.array([map[group] for group in groups_XA])

In [13]:
# create a canonical interval forest model
cif = CanonicalIntervalForest(n_estimators=100, random_state=47, n_jobs=-1)

# create a rocket model
rocket = RocketClassifier(num_kernels=1000, random_state=47, n_jobs=-1)

In [14]:
# create a function to perform the training using leave one out cross validation and to create the confusion matrix and the classification report
def train_one_out(X, y, model):
    # create a leave one out cross validation object
    logo = LeaveOneGroupOut()
    # create an empty list to store the predictions
    predictions = []
    # loop through the training and test sets
    for i, (train_index, test_index) in enumerate(logo.split(X, y, groups=groups_X)):
        print(f"Fold {i}:")
        print(f"  Train: index={train_index}")
        print(f"  Test:  index={test_index}")
        # train the cif model
        model.fit(X[train_index], y[train_index])
        # the test set contains multiple samples. Produce a prediction for each sample
        for j in range(len(test_index)):
            # get the test sample
            X_test = X[test_index][j]
            # reshape X_test to 1, dimnsion 0, dimension 1
            X_test = X_test.reshape(1, X_test.shape[0], X_test.shape[1])
            y_pred = model.predict(X_test)
            # store the prediction
            predictions.append(y_pred)
        print(len(predictions))
    print('for loop done')
    # create the confusion matrix
    cm = confusion_matrix(y, predictions)
    # create the classification report
    cr = classification_report(y, predictions)
    return cm, cr, predictions

In [15]:
cm_rocket, cr_rocket, predictions_rocket = train_one_out(X, y, rocket)

Fold 0:
  Train: index=[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  51  52  53  54
  55  56  57  58  60  61  62  63  64  66  67  69  70  71  72  73  74  75
  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93
  94  95  96  98  99 100 101 102]
  Test:  index=[50 59 65 68 97]
5
Fold 1:
  Train: index=[  0   1   2   3   4   5   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  32  33  34  35  36  37
  38  39  40  41  42  44  45  46  47  48  49  50  51  53  54  55  56  57
  58  59  60  61  62  64  65  66  67  68  69  70  71  73  74  75  76  77
  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95
  96  97  98 100 101 102]
  Test:  index=[ 6 31 43 52 63 72 99]
12
Fold 2:
  Train: index=[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  15  16  

In [None]:
cm_cif, cr_cif, predictions_cif = train_one_out(X, y, cif)

In [None]:
cm_rocket_A, cr_rocket_A, predictions_rocket_A = train_one_out(X_A, y, rocket)

In [None]:
cm_cif_A, cr_cif_A, predictions_cif_A = train_one_out(X_A, y, cif)

In [None]:
# create a file to store the results

with open('results.txt', 'a') as f:
    f.write('Rocket on merged_opensmile_out_cut\n')
    f.write('Confusion Matrix:\n')
    f.write(str(cm_rocket))
    f.write('\n')
    f.write('Classification Report:\n')
    f.write(cr_rocket)
    f.write('\n')
    f.write('Rocket on opensmile_out_A_cut\n')
    f.write('Confusion Matrix:\n')
    f.write(str(cm_rocket_A))
    f.write('\n')
    f.write('Classification Report:\n')
    f.write(cr_rocket_A)
    f.write('\n')
    f.write('CIF on merged_opensmile_out_cut\n')
    f.write('Confusion Matrix:\n')
    f.write(str(cm_cif))
    f.write('\n')
    f.write('Classification Report:\n')
    f.write(cr_cif)
    f.write('\n')
    f.write('CIF on opensmile_out_A_cut\n')
    f.write('Confusion Matrix:\n')
    f.write(str(cm_cif_A))
    f.write('\n')
    f.write('Classification Report:\n')
    f.write(cr_cif_A)
    f.write('\n')