In [81]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

# Extraccion y carga de datos

In [82]:
def read_file(filename):
    x = pickle._Unpickler(open(filename,'rb'))
    x.encoding='latin1'
    p = x.load()
    return p

In [83]:
files = []
for n in range(5,9):
    s = ''
    if n < 10:
        s+='0'
    s+=str(n)
    files.append(s)
print(files)    

['05', '06', '07', '08']


In [84]:
labels = []
data = []
for i in files:
    filename = "Archivos/s" + i + ".dat"
    trial = read_file(filename)
    labels.append(trial['labels'])
    data.append(trial['data'])
    
labels = np.array(labels)
labels = labels.flatten()
labels = labels.reshape(160, 4) #video/trial x label (valence, arousal, dominance, liking)


data = np.array(data)
data = data.flatten()
data = data.reshape(160, 40, 8064)# video/trial x channel x data


In [85]:
print('Labels: ', labels.shape)
print('Data', data.shape)

Labels:  (160, 4)
Data (160, 40, 8064)


# One hot encoder para labels

In [86]:
# Funtion to check if trial has positive or negative valence
def valence(trial):
    return 1 if labels[trial,0] >= np.median(labels[:,0]) else 0

# Funtion to check if trial has high or low arousal

def araousal(trial):
    return 1 if labels[trial,1] >= np.median(labels[:,1]) else 0

In [87]:
labels_encoded = []
for i in range (len(labels)):
    labels_encoded.append([valence(i),araousal(i)])
    
labels_encoded = np.reshape(labels_encoded, (160,2))
df_labels = pd.DataFrame(data =labels_encoded,columns=['Valence','Arousal'])
print(df_labels.describe())

         Valence     Arousal
count  160.00000  160.000000
mean     0.50000    0.506250
std      0.50157    0.501531
min      0.00000    0.000000
25%      0.00000    0.000000
50%      0.50000    1.000000
75%      1.00000    1.000000
max      1.00000    1.000000


In [88]:
df_valence = df_labels['Valence']
df_valence

0      1
1      1
2      0
3      1
4      0
      ..
155    0
156    0
157    0
158    0
159    0
Name: Valence, Length: 160, dtype: int32

In [89]:
df_arousal = df_labels['Arousal']


# array para los 32 canales 

In [90]:
eeg_channels = []
for n in range(1, 33):
    eeg_channels.append(n)
print(eeg_channels)
    


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]


In [91]:
egg_data = []
for i in range (len(data)):
    for j in range (len(eeg_channels)):
        egg_data.append(data[i,j])
egg_data = np.reshape(egg_data, (len(data),len(eeg_channels),len(data[0,0])))
print(egg_data.shape)


(160, 32, 8064)


# Realizando extraccion de caracteristicas

In [92]:
mean_data = np.mean(egg_data, axis=2)
var_data = np.var(egg_data, axis=2) 
median_data = np.median(egg_data, axis=2)
feature_data = np.concatenate((mean_data, var_data, median_data), axis=1)
feature_data = pd.DataFrame(feature_data)
feature_data


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,-0.124613,-0.213920,-0.108840,0.238012,-0.044924,-0.018392,-0.133522,0.491180,0.140811,-0.440095,...,-0.033168,-0.891401,-0.076324,0.213730,0.083256,-0.429163,0.106847,-0.125578,-0.047205,0.297791
1,-0.038647,-0.103394,-0.016777,0.098475,-0.048333,-0.016133,-0.067308,0.216928,0.094275,-0.213614,...,-0.059511,-0.939760,-0.042556,0.352929,0.226317,-0.324682,0.185749,0.028388,0.016532,0.408709
2,-0.101349,0.017155,-0.086764,0.190292,0.134925,0.204365,0.114452,0.283354,-0.037267,-0.045507,...,-0.123067,0.275576,0.067654,-0.075377,-0.091235,0.116436,-0.129738,-0.177661,-0.221460,-0.281146
3,0.204372,-0.127836,0.193915,-0.154159,-0.213724,-0.344462,-0.241752,-0.126305,0.261137,-0.225269,...,0.061754,-0.855222,-0.086867,0.431572,-0.149623,0.046950,0.389890,0.121562,0.175134,-0.033605
4,0.670014,0.697445,0.467880,-1.242901,-0.091543,-0.333689,0.327771,-2.299656,-0.379023,1.731752,...,0.134903,2.812627,0.469224,-0.598827,-1.023712,2.170608,-0.336869,0.145911,0.290126,-1.614567
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,-0.034560,-0.125993,-0.011701,0.089914,-0.020768,-0.104013,0.054367,-0.090257,-0.093217,0.044990,...,0.037330,0.004209,-0.142122,0.206289,-0.038145,0.013670,-0.015553,-0.296836,0.121054,0.233882
156,0.001392,-0.079255,-0.234174,-0.622337,0.112699,-0.147966,-0.360885,-0.620235,0.183689,-0.366755,...,-0.034174,0.130220,-0.009028,0.072097,0.260309,-0.152198,0.350030,0.547847,0.328656,0.188243
157,-0.009958,0.123989,0.114569,0.331677,0.029168,0.086859,0.146303,0.235718,-0.089044,0.132446,...,0.030623,-0.142049,-0.091385,0.090471,-0.134790,-0.033024,-0.105118,-0.345880,-0.083218,-0.083567
158,-0.153765,-0.439413,-0.360844,-0.596541,0.183947,-0.365720,-0.335553,-0.974069,0.083692,-0.283637,...,0.199438,0.538902,-0.319997,0.158450,-0.042942,-0.125502,0.480972,0.205598,0.680985,0.500240


# Unificando la data

In [93]:
unified_data_valence = pd.concat([feature_data, df_valence,df_arousal],axis=1)
unified_data_valence

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,88,89,90,91,92,93,94,95,Valence,Arousal
0,-0.124613,-0.213920,-0.108840,0.238012,-0.044924,-0.018392,-0.133522,0.491180,0.140811,-0.440095,...,-0.076324,0.213730,0.083256,-0.429163,0.106847,-0.125578,-0.047205,0.297791,1,0
1,-0.038647,-0.103394,-0.016777,0.098475,-0.048333,-0.016133,-0.067308,0.216928,0.094275,-0.213614,...,-0.042556,0.352929,0.226317,-0.324682,0.185749,0.028388,0.016532,0.408709,1,1
2,-0.101349,0.017155,-0.086764,0.190292,0.134925,0.204365,0.114452,0.283354,-0.037267,-0.045507,...,0.067654,-0.075377,-0.091235,0.116436,-0.129738,-0.177661,-0.221460,-0.281146,0,0
3,0.204372,-0.127836,0.193915,-0.154159,-0.213724,-0.344462,-0.241752,-0.126305,0.261137,-0.225269,...,-0.086867,0.431572,-0.149623,0.046950,0.389890,0.121562,0.175134,-0.033605,1,1
4,0.670014,0.697445,0.467880,-1.242901,-0.091543,-0.333689,0.327771,-2.299656,-0.379023,1.731752,...,0.469224,-0.598827,-1.023712,2.170608,-0.336869,0.145911,0.290126,-1.614567,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,-0.034560,-0.125993,-0.011701,0.089914,-0.020768,-0.104013,0.054367,-0.090257,-0.093217,0.044990,...,-0.142122,0.206289,-0.038145,0.013670,-0.015553,-0.296836,0.121054,0.233882,0,1
156,0.001392,-0.079255,-0.234174,-0.622337,0.112699,-0.147966,-0.360885,-0.620235,0.183689,-0.366755,...,-0.009028,0.072097,0.260309,-0.152198,0.350030,0.547847,0.328656,0.188243,0,0
157,-0.009958,0.123989,0.114569,0.331677,0.029168,0.086859,0.146303,0.235718,-0.089044,0.132446,...,-0.091385,0.090471,-0.134790,-0.033024,-0.105118,-0.345880,-0.083218,-0.083567,0,0
158,-0.153765,-0.439413,-0.360844,-0.596541,0.183947,-0.365720,-0.335553,-0.974069,0.083692,-0.283637,...,-0.319997,0.158450,-0.042942,-0.125502,0.480972,0.205598,0.680985,0.500240,0,0


# Revision de balance en los datos

In [94]:
print(unified_data_valence['Valence'].value_counts())
print(unified_data_valence['Arousal'].value_counts())


1    80
0    80
Name: Valence, dtype: int64
1    81
0    79
Name: Arousal, dtype: int64


# Corroborando si los datos estan correctamente unidos

In [95]:
print('feature_data shape:', feature_data.shape)
print('df_valence shape:', df_valence.shape)
print('df_arousal shape:', df_arousal.shape)
print('unified_data_valence shape:', unified_data_valence.shape)


feature_data shape: (160, 96)
df_valence shape: (160,)
df_arousal shape: (160,)
unified_data_valence shape: (160, 98)


# Arboles de decision sin normalizar

## Valence

In [114]:
from sklearn.model_selection import train_test_split

X = unified_data_valence.drop(['Valence', 'Arousal'], axis=1) # características
y = unified_data_valence[['Valence']]  # etiquetas
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [115]:
y

Unnamed: 0,Valence
0,1
1,1
2,0
3,1
4,0
...,...
155,0
156,0
157,0
158,0


In [116]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# crear modelo de árbol de decisión
dtc = DecisionTreeClassifier()

# ajustar modelo a los datos de entrenamiento
dtc.fit(X_train, y_train)

# hacer predicciones sobre los datos de prueba
y_pred = dtc.predict(X_test)

# calcular la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print("Precisión del modelo de árbol de decisión:", accuracy)


Precisión del modelo de árbol de decisión: 0.4791666666666667


## Arousal

In [98]:

X2 = unified_data_valence.drop(['Valence', 'Arousal'], axis=1) # características
y2 = unified_data_valence[['Arousal']]  # etiquetas
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [99]:
# crear modelo de árbol de decisión
dtc = DecisionTreeClassifier(random_state=42)

# ajustar modelo a los datos de entrenamiento
dtc.fit(X_train, y_train)

# hacer predicciones sobre los datos de prueba
y_pred = dtc.predict(X_test)

# calcular la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print("Precisión del modelo de árbol de decisión:", accuracy)


Precisión del modelo de árbol de decisión: 0.1875


## Normalizacion de datos

In [100]:
from sklearn.preprocessing import StandardScaler
# crea un objeto StandardScaler
scaler = StandardScaler()
# escala los datos
scaled_data = scaler.fit_transform(unified_data_valence.drop(['Valence', 'Arousal'], axis=1))

In [101]:
#Se crea un DataFrame con los datos escalados y las etiquetas Valence y Arousal
scaled_data_df = pd.DataFrame(scaled_data, columns=unified_data_valence.drop(['Valence', 'Arousal'], axis=1).columns)
scaled_data_df['Valence'] = unified_data_valence['Valence']
scaled_data_df['Arousal'] = unified_data_valence['Arousal']

## Valence

In [102]:
# dividir los datos en conjunto de entrenamiento y prueba (70% para entrenamiento, 30% para prueba)
X_train, X_test, y_train, y_test = train_test_split(unified_data_valence.iloc[:, :-2], unified_data_valence['Valence'], test_size=0.3)

# crear una instancia del clasificador de árbol de decisión
tree = DecisionTreeClassifier()

# ajustar el modelo a los datos de entrenamiento
tree.fit(X_train, y_train)

# predecir las etiquetas de los datos de prueba
y_pred = tree.predict(X_test)


# evaluar la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print("Precisión del árbol de decisión: {:.2f}%".format(accuracy*100))


Precisión del árbol de decisión: 54.17%


## Arousal

In [103]:
# dividir los datos en conjunto de entrenamiento y prueba (70% para entrenamiento, 30% para prueba)
X_train, X_test, y_train, y_test = train_test_split(unified_data_valence.iloc[:, :-2], unified_data_valence['Arousal'], test_size=0.3)

# crear una instancia del clasificador de árbol de decisión
tree = DecisionTreeClassifier()

# ajustar el modelo a los datos de entrenamiento
tree.fit(X_train, y_train)

# predecir las etiquetas de los datos de prueba
y_pred = tree.predict(X_test)


# evaluar la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print("Precisión del árbol de decisión: {:.2f}%".format(accuracy*100))


Precisión del árbol de decisión: 39.58%


# Ramdon ForestClassifier

## Sin normalizar

## Valence

In [104]:
from sklearn.model_selection import train_test_split

# Separar en características y etiquetas
X = unified_data_valence.drop(['Valence', 'Arousal'], axis=1) # características
y = unified_data_valence[['Valence']]  # etiquetas

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [105]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Definir el objeto KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Crear el modelo de Random Forest
rfc = RandomForestClassifier(n_estimators=100, random_state=42)

# Iniciar el bucle de validación cruzada
scores = []
for train_index, test_index in kf.split(X_train):
    # Seleccionar los conjuntos de entrenamiento y prueba para esta iteración
    X_train_kf, X_test_kf = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_kf, y_test_kf = y_train.iloc[train_index], y_train.iloc[test_index]
    
    # Convertir y_train_kf a una matriz unidimensional
    y_train_kf = y_train_kf.values.ravel()

    # Entrenar el modelo de Random Forest en el conjunto de entrenamiento actual
    rfc.fit(X_train_kf, y_train_kf)

    # Predecir las etiquetas en el conjunto de prueba actual
    y_pred_kf = rfc.predict(X_test_kf)

    # Calcular el puntaje de precisión y agregarlo a la lista de puntajes
    score = accuracy_score(y_test_kf, y_pred_kf)
    scores.append(score)

# Calcular el puntaje promedio de precisión y mostrarlo
mean_score = sum(scores) / len(scores)
print(f'El puntaje promedio de precisión es: {mean_score:.2f}')


El puntaje promedio de precisión es: 0.47


## Normalizando

In [106]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Separar en características y etiquetas
X = unified_data_valence.drop(
    ['Valence', 'Arousal'], axis=1)  # características
y = unified_data_valence[['Valence']]  # etiquetas

# Normalizar los datos
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Definir el objeto KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Crear el modelo de Random Forest
rfc = RandomForestClassifier(n_estimators=100, random_state=42)

# Iniciar el bucle de validación cruzada
scores = []
for train_index, test_index in kf.split(X):
    # Seleccionar los conjuntos de entrenamiento y prueba para esta iteración
    X_train_kf, X_test_kf = X[train_index], X[test_index]
    y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]
    
    # Entrenar el modelo de Random Forest en el conjunto de entrenamiento actual
    rfc.fit(X_train_kf, y_train_kf.values.ravel())

    # Predecir las etiquetas en el conjunto de prueba actual
    y_pred_kf = rfc.predict(X_test_kf)

    # Calcular el puntaje de precisión y agregarlo a la lista de puntajes
    score = accuracy_score(y_test_kf, y_pred_kf)
    scores.append(score)

# Calcular el puntaje promedio de precisión y mostrarlo
mean_score = sum(scores) / len(scores)
print(f'El puntaje promedio de precisión es: {mean_score:.2f}')


El puntaje promedio de precisión es: 0.51


## Arousal

In [107]:
from sklearn.model_selection import train_test_split

# Separar en características y etiquetas
X = unified_data_valence.drop(['Valence', 'Arousal'], axis=1) # características
y = unified_data_valence[['Arousal']]  # etiquetas

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [108]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Definir el objeto KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Crear el modelo de Random Forest
rfc = RandomForestClassifier(n_estimators=100, random_state=42)

# Iniciar el bucle de validación cruzada
scores = []
for train_index, test_index in kf.split(X_train):
    # Seleccionar los conjuntos de entrenamiento y prueba para esta iteración
    X_train_kf, X_test_kf = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_kf, y_test_kf = y_train.iloc[train_index], y_train.iloc[test_index]
    
    # Convertir y_train_kf a una matriz unidimensional
    y_train_kf = y_train_kf.values.ravel()

    # Entrenar el modelo de Random Forest en el conjunto de entrenamiento actual
    rfc.fit(X_train_kf, y_train_kf)

    # Predecir las etiquetas en el conjunto de prueba actual
    y_pred_kf = rfc.predict(X_test_kf)

    # Calcular el puntaje de precisión y agregarlo a la lista de puntajes
    score = accuracy_score(y_test_kf, y_pred_kf)
    scores.append(score)

# Calcular el puntaje promedio de precisión y mostrarlo
mean_score = sum(scores) / len(scores)
print(f'El puntaje promedio de precisión es: {mean_score:.2f}')


El puntaje promedio de precisión es: 0.49


## Normalizado

In [109]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Separar en características y etiquetas
X = unified_data_valence.drop(
    ['Valence', 'Arousal'], axis=1)  # características
y = unified_data_valence[['Arousal']]  # etiquetas

# Normalizar los datos
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Definir el objeto KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Crear el modelo de Random Forest
rfc = RandomForestClassifier(n_estimators=100, random_state=42)

# Iniciar el bucle de validación cruzada
scores = []
for train_index, test_index in kf.split(X):
    # Seleccionar los conjuntos de entrenamiento y prueba para esta iteración
    X_train_kf, X_test_kf = X[train_index], X[test_index]
    y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo de Random Forest en el conjunto de entrenamiento actual
    rfc.fit(X_train_kf, y_train_kf.values.ravel())

    # Predecir las etiquetas en el conjunto de prueba actual
    y_pred_kf = rfc.predict(X_test_kf)

    # Calcular el puntaje de precisión y agregarlo a la lista de puntajes
    score = accuracy_score(y_test_kf, y_pred_kf)
    scores.append(score)

# Calcular el puntaje promedio de precisión y mostrarlo
mean_score = sum(scores) / len(scores)
print(f'El puntaje promedio de precisión es: {mean_score:.2f}')


El puntaje promedio de precisión es: 0.42


# SVM

## Kernel lineal (Linear kernel) sin normalizar

### Kernel lineal (Linear kernel) normalizado