In [68]:
import pandas as pd
import numpy as np
import statistics

## BEBRASK DATASET COUNT

In [69]:
BEBRASK_tasks = pd.read_excel('../Datasets/BEBRASK_task.xlsx')
BEBRASK_tasks = BEBRASK_tasks[["DataFile.Basename","Rating0.RESP","Rating.RESP","EvokedEmotion", "Expression", "ScenarioPick","TrialCount","Fulfilled"]].copy()
BEBRASK_tasks.dropna(subset = ["TrialCount"],inplace=True)
subjects_id = BEBRASK_tasks["DataFile.Basename"].unique()

#Eleccion de las variables relevantes para el clustering, y eliminación de las filas que no corresponden a trials.

In [70]:
BEBRASK_tasks_long= BEBRASK_tasks.pivot(index="DataFile.Basename", columns='TrialCount')
BEBRASK_tasks_long.columns = ['_'.join(str(col) for col in cols) for cols in BEBRASK_tasks_long.columns.values]
BEBRASK_tasks_long.head(5)
#Pivotamos el dataset por los trials, de manera que ahora se tienen 55 filas (los 55 sujetos) y cada una contiene toda la información del experimento

Unnamed: 0_level_0,Rating0.RESP_1.0,Rating0.RESP_2.0,Rating0.RESP_3.0,Rating0.RESP_4.0,Rating0.RESP_5.0,Rating0.RESP_6.0,Rating0.RESP_7.0,Rating0.RESP_8.0,Rating0.RESP_9.0,Rating0.RESP_10.0,...,Fulfilled_36.0,Fulfilled_37.0,Fulfilled_38.0,Fulfilled_39.0,Fulfilled_40.0,Fulfilled_41.0,Fulfilled_42.0,Fulfilled_43.0,Fulfilled_44.0,Fulfilled_45.0
DataFile.Basename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PREDWELL_RETOS-1001-1,3.0,3.0,1.0,4.0,2.0,3.0,4.0,1.0,3.0,4.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
PREDWELL_RETOS-1002-1,3.0,3.0,1.0,1.0,2.0,1.0,3.0,4.0,3.0,2.0,...,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0
PREDWELL_RETOS-1003-1,2.0,1.0,1.0,1.0,3.0,3.0,3.0,3.0,2.0,1.0,...,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0
PREDWELL_RETOS-1004-1,2.0,3.0,4.0,4.0,1.0,3.0,1.0,1.0,3.0,2.0,...,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
PREDWELL_RETOS-1005-1,2.0,1.0,2.0,1.0,1.0,4.0,3.0,4.0,1.0,3.0,...,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0


In [71]:
print("Mean difference pred/like:",np.mean(abs(BEBRASK_tasks["Rating0.RESP"] - BEBRASK_tasks["Rating.RESP"])))
print("Std difference pred/like:",np.std(abs(BEBRASK_tasks["Rating0.RESP"] - BEBRASK_tasks["Rating.RESP"])))
BEBRASK_tasks_long.isna().sum()

#Observamos que algunos valores de Rating0.RESP y Rating.RESP son NA y que la diferencia entre el valor Rating0 y Rating no suele ser mayor a 1, por lo tanto imputaremos los valores a partir del otro Rating en caso de que no falte el valor  

Mean difference pred/like: 0.8911592994161801
Std difference pred/like: 0.8296508347297797


Rating0.RESP_1.0    0
Rating0.RESP_2.0    2
Rating0.RESP_3.0    0
Rating0.RESP_4.0    0
Rating0.RESP_5.0    1
                   ..
Fulfilled_41.0      0
Fulfilled_42.0      0
Fulfilled_43.0      0
Fulfilled_44.0      0
Fulfilled_45.0      0
Length: 270, dtype: int64

In [72]:
for i in range(1, 46):
    rating1_col = f'Rating0.RESP_{i}.0'
    rating2_col = f'Rating.RESP_{i}.0'

    # Llenamos los NA cen Rating0 con los valores de Rating, y viceversa
    BEBRASK_tasks_long[rating1_col].fillna(BEBRASK_tasks_long[rating2_col], inplace=True)
    BEBRASK_tasks_long[rating2_col].fillna(BEBRASK_tasks_long[rating1_col], inplace=True)

BEBRASK_tasks_long.isna().sum()
#Todos los valores NaN han sido llenados, por lo tanto no hace falta utilizar ningún otro método de imputación.

Rating0.RESP_1.0    0
Rating0.RESP_2.0    0
Rating0.RESP_3.0    0
Rating0.RESP_4.0    0
Rating0.RESP_5.0    0
                   ..
Fulfilled_41.0      0
Fulfilled_42.0      0
Fulfilled_43.0      0
Fulfilled_44.0      0
Fulfilled_45.0      0
Length: 270, dtype: int64

In [73]:
def creation_dictionary_from_df(df,n_subjects):
    """
    Procesa un DataFrame para crear un diccionario que organiza los datos relacionados
    con emociones (happiness, sadness, fear) para un número especificado de sujetos.

    Parámetros:
    - df (pandas.DataFrame): El DataFrame que contiene los datos de la encuesta. Cada fila representa un sujeto.
    - n_subjects (int): El número de sujetos a procesar del DataFrame.

    Retorna:
    - dict_df (dict): Un diccionario donde cada clave es un identificador del sujeto y su valor es otro diccionario. Este 
      diccionario anidado categoriza los datos en emociones 'Happy', 'Sad' y 'Fear'. Cada clave de emoción se mapea a otro 
      diccionario con claves 'Rating0' (Rating0.RESP), 'Rating' (Rating.RESP), 'Expression' (Expression) y 'Fulfill' (Fulfillment).
    """
    dict_df  = {}
    for j in range(0,n_subjects):
        subject = df.iloc[j]
        id = df.index.values[j]
        happy_pred = []
        happy_like = []
        happy_facial = []
        happy_fulfill = []
        sad_pred = []
        sad_like = []
        sad_facial = []
        sad_fulfill = []
        fear_pred = []
        fear_like = []
        fear_facial = []
        fear_fulfill = []
        
        for i in range(1,46):
            pred = f'Rating0.RESP_{i}.0'
            like = f'Rating.RESP_{i}.0'
            emotion = f'EvokedEmotion_{i}.0'
            facial = f'Expression_{i}.0'
            fulfill = f'Fulfilled_{i}.0'
            
            if subject[emotion] == "happiness":
                happy_pred.append(int(subject[pred]))
                happy_like.append(int(subject[like]))
                happy_facial.append(subject[facial])
                happy_fulfill.append(subject[fulfill])
                
            elif subject[emotion] == "sadness":
                sad_pred.append(int(subject[pred]))
                sad_like.append(int(subject[like]))
                sad_facial.append(subject[facial])
                sad_fulfill.append(subject[fulfill])
    
            else:
                fear_pred.append(int(subject[pred]))
                fear_like.append(int(subject[like]))
                fear_facial.append(subject[facial])
                fear_fulfill.append(subject[fulfill])
    
        dict_df[id] = {'Happy':{'Rating0':np.array(happy_pred), 'Rating': np.array(happy_like), 'Expression':np.array(happy_facial),'Fulfill':np.array(happy_fulfill)},'Sad':{ 'Rating0':np.array(sad_pred), 'Rating':np.array(sad_like), 'Expression':np.array(sad_facial),'Fulfill': np.array(sad_fulfill)}, 'Fear':{'Rating0':np.array(fear_pred),'Rating': np.array(fear_like),'Expression': np.array(fear_facial), 'Fulfill': np.array(fear_fulfill)}}
    
    return dict_df   

subject_dict_BEBRASK = creation_dictionary_from_df(BEBRASK_tasks_long,len(BEBRASK_tasks_long))

In [74]:
def creation_count_df(subject_dict,rating):
    """
    Procesa un diccionario con la información de las respuestas dadas por cada sujeto organizado por sentimiento, y crea
    un dataframe que contiene cuantas veces un sujeto ha dado una score a un tipo de trial (sujeto a la emoción evocada
    y a si es match o no).

    Parámetros:
    - subject_dict (dict): Diccionario que contiene la información de las scores organizadas por sujetos y por emociones.
    - rating (string): Rating a partir del cual hacer las agregaciones de las scores (Rating0 o Rating).

    Retorna:
    - df_counts (pandas.DataFrame): Un dataframe donde cada fila contiene el identificador del sujeto, la emoción evocada,
    si es match o no, la puntuación correspondiente y cuantas veces sucede.
      
    """
    processed_data = {}
    for id in subject_dict.keys():
        for j,key in enumerate(subject_dict[id].keys()):
                    
            one_no_match = np.count_nonzero((subject_dict[id][key][rating]==1) & (subject_dict[id][key]['Fulfill'] == 0),keepdims=True)[0]
            two_no_match = np.count_nonzero((subject_dict[id][key][rating]==2) & (subject_dict[id][key]['Fulfill'] == 0),keepdims=True)[0]
            three_no_match = np.count_nonzero((subject_dict[id][key][rating]==3) & (subject_dict[id][key]['Fulfill'] == 0),keepdims=True)[0]
            four_no_match = np.count_nonzero((subject_dict[id][key][rating]==4) & (subject_dict[id][key]['Fulfill'] == 0),keepdims=True)[0]
    
            one_match = np.count_nonzero((subject_dict[id][key][rating]==1) & (subject_dict[id][key]['Fulfill'] == 1),keepdims=True)[0]
            two_match = np.count_nonzero((subject_dict[id][key][rating]==2) & (subject_dict[id][key]['Fulfill'] == 1),keepdims=True)[0]
            three_match = np.count_nonzero((subject_dict[id][key][rating]==3) & (subject_dict[id][key]['Fulfill'] == 1),keepdims=True)[0]
            four_match = np.count_nonzero((subject_dict[id][key][rating]==4) & (subject_dict[id][key]['Fulfill'] == 1),keepdims=True)[0]
            if j == 0 :
                processed_data[id] = {key:{'Match':[one_match,two_match,three_match,four_match],'No_Match':[one_no_match,two_no_match,three_no_match,four_no_match]}}
            else:
                processed_data[id].update({key:{'Match':[one_match,two_match,three_match,four_match],'No_Match':[one_no_match,two_no_match,three_no_match,four_no_match]}})

    df_list = []
    
    for subject, emotions in processed_data.items():
        for emotion, match_data in emotions.items():
            for match_type, scores in match_data.items():
                for score, count in enumerate(scores, start=1):
                    df_list.append([subject, emotion, match_type == 'Match', score, count])
    
    df_counts = pd.DataFrame(df_list, columns=['Subject', 'Emotion', 'Match', 'Score', 'Count'])
    df_counts['Match'] = df_counts['Match'].astype(int)
    
    return df_counts

Rating0_BEBRASK_counts =creation_count_df(subject_dict_BEBRASK,"Rating0")
Rating_BEBRASK_counts =creation_count_df(subject_dict_BEBRASK,"Rating")

In [75]:
Rating0_BEBRASK_counts.head(5)

Unnamed: 0,Subject,Emotion,Match,Score,Count
0,PREDWELL_RETOS-1001-1,Happy,1,1,0
1,PREDWELL_RETOS-1001-1,Happy,1,2,0
2,PREDWELL_RETOS-1001-1,Happy,1,3,1
3,PREDWELL_RETOS-1001-1,Happy,1,4,8
4,PREDWELL_RETOS-1001-1,Happy,0,1,5


In [76]:
Rating_BEBRASK_counts.head(5)


Unnamed: 0,Subject,Emotion,Match,Score,Count
0,PREDWELL_RETOS-1001-1,Happy,1,1,0
1,PREDWELL_RETOS-1001-1,Happy,1,2,0
2,PREDWELL_RETOS-1001-1,Happy,1,3,6
3,PREDWELL_RETOS-1001-1,Happy,1,4,3
4,PREDWELL_RETOS-1001-1,Happy,0,1,1


In [77]:
multiindex_Rating0 = Rating0_BEBRASK_counts.set_index(['Subject', 'Emotion', 'Match', 'Score'])
Rating0_BEBRASK_df = multiindex_Rating0.unstack(level=['Emotion', 'Match', 'Score'])
Rating0_BEBRASK_df.columns = ['_'.join(map(str, col)).strip() for col in Rating0_BEBRASK_df.columns.values]
Rating0_BEBRASK_df = Rating0_BEBRASK_df.reset_index()

#Modificamos el Dataset de manera que cada fila represente completamente un sujeto con toda la información referente a sus ratings.
Rating0_BEBRASK_df.to_excel('../Clustering_Predictive_Processing/BEBRASK_Rating0_count.xlsx', index=False) 
Rating0_BEBRASK_df.head(5)



Unnamed: 0,Subject,Count_Happy_1_1,Count_Happy_1_2,Count_Happy_1_3,Count_Happy_1_4,Count_Happy_0_1,Count_Happy_0_2,Count_Happy_0_3,Count_Happy_0_4,Count_Sad_1_1,...,Count_Sad_0_3,Count_Sad_0_4,Count_Fear_1_1,Count_Fear_1_2,Count_Fear_1_3,Count_Fear_1_4,Count_Fear_0_1,Count_Fear_0_2,Count_Fear_0_3,Count_Fear_0_4
0,PREDWELL_RETOS-1001-1,0,0,1,8,5,0,0,1,0,...,2,0,1,1,4,3,3,0,3,0
1,PREDWELL_RETOS-1002-1,0,0,2,7,5,0,1,0,0,...,1,1,1,1,4,3,2,0,4,0
2,PREDWELL_RETOS-1003-1,0,0,6,3,6,0,0,0,0,...,0,0,1,5,3,0,3,2,1,0
3,PREDWELL_RETOS-1004-1,3,1,4,1,3,1,2,0,4,...,1,0,2,2,4,1,2,2,1,1
4,PREDWELL_RETOS-1005-1,0,0,3,6,5,1,0,0,0,...,1,1,0,3,5,1,3,1,1,1


In [78]:
multiindex_Rating = Rating_BEBRASK_counts.set_index(['Subject', 'Emotion', 'Match', 'Score'])
Rating_BEBRASK_df = multiindex_Rating.unstack(level=['Emotion', 'Match', 'Score'])
Rating_BEBRASK_df.columns = ['_'.join(map(str, col)).strip() for col in Rating_BEBRASK_df.columns.values]
Rating_BEBRASK_df = Rating_BEBRASK_df.reset_index()

#Modificamos el Dataset de manera que cada fila represente completamente un sujeto con toda la información referente a sus ratings.
Rating_BEBRASK_df.to_excel('../Clustering_Predictive_Processing/BEBRASK_Rating_count.xlsx', index=False) 
Rating_BEBRASK_df.head(5)


Unnamed: 0,Subject,Count_Happy_1_1,Count_Happy_1_2,Count_Happy_1_3,Count_Happy_1_4,Count_Happy_0_1,Count_Happy_0_2,Count_Happy_0_3,Count_Happy_0_4,Count_Sad_1_1,...,Count_Sad_0_3,Count_Sad_0_4,Count_Fear_1_1,Count_Fear_1_2,Count_Fear_1_3,Count_Fear_1_4,Count_Fear_0_1,Count_Fear_0_2,Count_Fear_0_3,Count_Fear_0_4
0,PREDWELL_RETOS-1001-1,0,0,6,3,1,2,2,1,1,...,1,2,0,5,4,0,1,2,3,0
1,PREDWELL_RETOS-1002-1,0,1,2,6,0,3,3,0,0,...,2,0,0,3,5,1,1,1,3,1
2,PREDWELL_RETOS-1003-1,0,2,5,2,0,3,1,2,1,...,2,1,1,1,5,2,1,1,3,1
3,PREDWELL_RETOS-1004-1,2,2,4,1,1,4,1,0,3,...,1,1,0,5,3,1,2,1,3,0
4,PREDWELL_RETOS-1005-1,0,0,4,5,0,4,2,0,0,...,3,0,0,7,2,0,0,1,5,0


## RETOS DATASET COUNT


In [79]:
RETOS_tasks = pd.read_excel('../Datasets/RETOS_task.xlsx')
RETOS_tasks = RETOS_tasks[
    ["DataFile.Basename", "Rating0.RESP", "Rating.RESP", "EvokedEmotion", "Expression", "ScenarioPick", "TrialCount",
     "Fulfilled"]].copy()
RETOS_tasks.dropna(subset=["TrialCount"], inplace=True)
subjects_id = RETOS_tasks["DataFile.Basename"].unique()

#Eleccion de las variables relevantes para el clustering, y eliminación de las filas que no corresponden a trials.


In [80]:
RETOS_tasks_long= RETOS_tasks.pivot(index="DataFile.Basename", columns='TrialCount')
RETOS_tasks_long.columns = ['_'.join(str(col) for col in cols) for cols in RETOS_tasks_long.columns.values]

RETOS_tasks_long.head(5)

Unnamed: 0_level_0,Rating0.RESP_1.0,Rating0.RESP_2.0,Rating0.RESP_3.0,Rating0.RESP_4.0,Rating0.RESP_5.0,Rating0.RESP_6.0,Rating0.RESP_7.0,Rating0.RESP_8.0,Rating0.RESP_9.0,Rating0.RESP_10.0,...,Fulfilled_36.0,Fulfilled_37.0,Fulfilled_38.0,Fulfilled_39.0,Fulfilled_40.0,Fulfilled_41.0,Fulfilled_42.0,Fulfilled_43.0,Fulfilled_44.0,Fulfilled_45.0
DataFile.Basename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PREDWELL_RETOS-1-1,1.0,3.0,4.0,4.0,4.0,2.0,1.0,4.0,4.0,4.0,...,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0
PREDWELL_RETOS-10-1,4.0,2.0,2.0,4.0,3.0,4.0,3.0,1.0,1.0,3.0,...,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0
PREDWELL_RETOS-101-1,2.0,3.0,3.0,4.0,4.0,1.0,1.0,4.0,3.0,1.0,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0
PREDWELL_RETOS-102-1,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,4.0,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0
PREDWELL_RETOS-103-1,3.0,1.0,2.0,2.0,1.0,1.0,3.0,1.0,2.0,1.0,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


In [81]:
print("Mean difference pred/like:",np.mean(abs(RETOS_tasks["Rating0.RESP"] - RETOS_tasks["Rating.RESP"])))
print("Std difference pred/like:",np.std(abs(RETOS_tasks["Rating0.RESP"] - RETOS_tasks["Rating.RESP"])))
RETOS_tasks_long.isna().sum()

#Observamos muchos más valores NaN que anteriormente. Empezaremos haciendo imputación de los datos faltantes del mismo método que anteriormente.

Mean difference pred/like: 0.9506468615237182
Std difference pred/like: 0.8575928879401842


Rating0.RESP_1.0    5
Rating0.RESP_2.0    1
Rating0.RESP_3.0    1
Rating0.RESP_4.0    2
Rating0.RESP_5.0    1
                   ..
Fulfilled_41.0      1
Fulfilled_42.0      1
Fulfilled_43.0      1
Fulfilled_44.0      1
Fulfilled_45.0      1
Length: 270, dtype: int64

In [82]:
for i in range(1, 46):
    rating1_col = f'Rating0.RESP_{i}.0'
    rating2_col = f'Rating.RESP_{i}.0'

    # Llenamos los NA cen Rating0 con los valores de Rating, y viceversa
    RETOS_tasks_long[rating1_col].fillna(RETOS_tasks_long[rating2_col], inplace=True)
    RETOS_tasks_long[rating2_col].fillna(RETOS_tasks_long[rating1_col], inplace=True)

RETOS_tasks_long.isna().sum()
#Vemos que hay un valor que no se corrige, miramos si corresponde a un sujeto o a más.


Rating0.RESP_1.0    1
Rating0.RESP_2.0    1
Rating0.RESP_3.0    1
Rating0.RESP_4.0    1
Rating0.RESP_5.0    1
                   ..
Fulfilled_41.0      1
Fulfilled_42.0      1
Fulfilled_43.0      1
Fulfilled_44.0      1
Fulfilled_45.0      1
Length: 270, dtype: int64

In [83]:
RETOS_tasks_long.isna().sum(axis=1)


DataFile.Basename
PREDWELL_RETOS-1-1      0
PREDWELL_RETOS-10-1     0
PREDWELL_RETOS-101-1    0
PREDWELL_RETOS-102-1    0
PREDWELL_RETOS-103-1    0
                       ..
PREDWELL_RETOS-5-1      0
PREDWELL_RETOS-6-1      0
PREDWELL_RETOS-7-1      0
PREDWELL_RETOS-8-1      0
PREDWELL_RETOS-9-1      0
Length: 96, dtype: int64

In [84]:
RETOS_tasks_long[RETOS_tasks_long.isna().sum(axis=1) > 0]


Unnamed: 0_level_0,Rating0.RESP_1.0,Rating0.RESP_2.0,Rating0.RESP_3.0,Rating0.RESP_4.0,Rating0.RESP_5.0,Rating0.RESP_6.0,Rating0.RESP_7.0,Rating0.RESP_8.0,Rating0.RESP_9.0,Rating0.RESP_10.0,...,Fulfilled_36.0,Fulfilled_37.0,Fulfilled_38.0,Fulfilled_39.0,Fulfilled_40.0,Fulfilled_41.0,Fulfilled_42.0,Fulfilled_43.0,Fulfilled_44.0,Fulfilled_45.0
DataFile.Basename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PREDWELL_RETOS-124-1,2.0,3.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,...,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0
PREDWELL_RETOS-307-1,,,,,,,,,,,...,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
PREDWELL_RETOS-307-3,1.0,3.0,3.0,1.0,3.0,3.0,3.0,3.0,2.0,3.0,...,,,,,,,,,,


In [85]:
#Los datos de PREDWELL_RETOS-307-1 y PREDWELL_RETOS-307-3 son del mismo sujeto, por lo tanto se deben combinar y tratar como una sola fila.

In [86]:
part1 = RETOS_tasks_long.iloc[RETOS_tasks_long.index =='PREDWELL_RETOS-307-1'].T
part2 = RETOS_tasks_long.iloc[RETOS_tasks_long.index =='PREDWELL_RETOS-307-3'].T
combined_row = np.where(part1.isnull(), part2, part1).T
RETOS_tasks_long.iloc[RETOS_tasks_long.index == 'PREDWELL_RETOS-307-1'] = combined_row


In [87]:
RETOS_tasks_long[RETOS_tasks_long.isna().sum(axis=1) > 0]


Unnamed: 0_level_0,Rating0.RESP_1.0,Rating0.RESP_2.0,Rating0.RESP_3.0,Rating0.RESP_4.0,Rating0.RESP_5.0,Rating0.RESP_6.0,Rating0.RESP_7.0,Rating0.RESP_8.0,Rating0.RESP_9.0,Rating0.RESP_10.0,...,Fulfilled_36.0,Fulfilled_37.0,Fulfilled_38.0,Fulfilled_39.0,Fulfilled_40.0,Fulfilled_41.0,Fulfilled_42.0,Fulfilled_43.0,Fulfilled_44.0,Fulfilled_45.0
DataFile.Basename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PREDWELL_RETOS-124-1,2.0,3.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,...,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0
PREDWELL_RETOS-307-1,1.0,3.0,3.0,1.0,3.0,3.0,3.0,3.0,2.0,3.0,...,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
PREDWELL_RETOS-307-3,1.0,3.0,3.0,1.0,3.0,3.0,3.0,3.0,2.0,3.0,...,,,,,,,,,,


In [88]:
RETOS_tasks_long = RETOS_tasks_long.query("index != 'PREDWELL_RETOS-307-3'")

#Eliminamos la fila excedente de la combinación.

RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-124-1"] 
#Vemos que la evoked emotion es fear y la expression también, por lo tanto se corresponde a un match

RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1"] 
#Vemos que la evoked emotion es sadness y la expression es Happy, por lo tanto se corresponde a un non match


Unnamed: 0_level_0,Rating0.RESP_1.0,Rating0.RESP_2.0,Rating0.RESP_3.0,Rating0.RESP_4.0,Rating0.RESP_5.0,Rating0.RESP_6.0,Rating0.RESP_7.0,Rating0.RESP_8.0,Rating0.RESP_9.0,Rating0.RESP_10.0,...,Fulfilled_36.0,Fulfilled_37.0,Fulfilled_38.0,Fulfilled_39.0,Fulfilled_40.0,Fulfilled_41.0,Fulfilled_42.0,Fulfilled_43.0,Fulfilled_44.0,Fulfilled_45.0
DataFile.Basename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PREDWELL_RETOS-307-1,1.0,3.0,3.0,1.0,3.0,3.0,3.0,3.0,2.0,3.0,...,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0


In [89]:
RETOS_tasks_long

Unnamed: 0_level_0,Rating0.RESP_1.0,Rating0.RESP_2.0,Rating0.RESP_3.0,Rating0.RESP_4.0,Rating0.RESP_5.0,Rating0.RESP_6.0,Rating0.RESP_7.0,Rating0.RESP_8.0,Rating0.RESP_9.0,Rating0.RESP_10.0,...,Fulfilled_36.0,Fulfilled_37.0,Fulfilled_38.0,Fulfilled_39.0,Fulfilled_40.0,Fulfilled_41.0,Fulfilled_42.0,Fulfilled_43.0,Fulfilled_44.0,Fulfilled_45.0
DataFile.Basename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PREDWELL_RETOS-1-1,1.0,3.0,4.0,4.0,4.0,2.0,1.0,4.0,4.0,4.0,...,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0
PREDWELL_RETOS-10-1,4.0,2.0,2.0,4.0,3.0,4.0,3.0,1.0,1.0,3.0,...,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0
PREDWELL_RETOS-101-1,2.0,3.0,3.0,4.0,4.0,1.0,1.0,4.0,3.0,1.0,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0
PREDWELL_RETOS-102-1,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,4.0,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0
PREDWELL_RETOS-103-1,3.0,1.0,2.0,2.0,1.0,1.0,3.0,1.0,2.0,1.0,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PREDWELL_RETOS-5-1,1.0,1.0,1.0,4.0,2.0,3.0,2.0,2.0,2.0,4.0,...,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
PREDWELL_RETOS-6-1,2.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,1.0,2.0,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
PREDWELL_RETOS-7-1,1.0,1.0,1.0,3.0,3.0,1.0,4.0,4.0,2.0,1.0,...,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0
PREDWELL_RETOS-8-1,2.0,3.0,3.0,1.0,4.0,3.0,1.0,3.0,4.0,4.0,...,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0


In [90]:
rating0_list_124 = []
rating_list_124 = []

rating0_list_307 = []
rating_list_307 = []

for i in range(1,46):
        rating0 = f'Rating0.RESP_{i}.0'
        rating = f'Rating.RESP_{i}.0'
        emotion = f'EvokedEmotion_{i}.0'
        fulfill = f'Fulfilled_{i}.0'
        
        if RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-124-1"][emotion].values[0] == "fear" and RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-124-1"][fulfill].values[0] == 1 and i!=20:
            rating0_list_124.append(RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1"][rating0].values[0])

            rating_list_124.append(RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1"][rating].values[0])
        
        if RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1"][emotion].values[0] == "sadness" and RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1"][fulfill].values[0] == 0 and i!=18:
            rating0_list_307.append(RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1"][rating0].values[0])

            rating_list_307.append(RETOS_tasks_long[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1"][rating].values[0])

        

RETOS_tasks_long.loc[RETOS_tasks_long.index == "PREDWELL_RETOS-124-1", "Rating.RESP_20.0"] = statistics.multimode(rating_list_124)[0]
RETOS_tasks_long.loc[RETOS_tasks_long.index == "PREDWELL_RETOS-124-1", "Rating0.RESP_20.0"] = statistics.multimode(rating0_list_124)[0]
RETOS_tasks_long.loc[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1", "Rating.RESP_18.0"] = statistics.multimode(rating_list_307)[0]
RETOS_tasks_long.loc[RETOS_tasks_long.index == "PREDWELL_RETOS-307-1", "Rating0.RESP_18.0"] = statistics.multimode(rating0_list_307)[0]


In [91]:
RETOS_tasks_long = RETOS_tasks_long.query("index != 'PREDWELL_RETOS-307-1'")


In [92]:
subject_dict_RETOS = creation_dictionary_from_df(RETOS_tasks_long,len(RETOS_tasks_long))
Rating0_RETOS_counts = creation_count_df(subject_dict_RETOS, "Rating0")
Rating_RETOS_counts = creation_count_df(subject_dict_RETOS, "Rating")



In [93]:
Rating0_RETOS_counts.head(5)


Unnamed: 0,Subject,Emotion,Match,Score,Count
0,PREDWELL_RETOS-1-1,Happy,1,1,0
1,PREDWELL_RETOS-1-1,Happy,1,2,0
2,PREDWELL_RETOS-1-1,Happy,1,3,1
3,PREDWELL_RETOS-1-1,Happy,1,4,8
4,PREDWELL_RETOS-1-1,Happy,0,1,5


In [94]:
Rating_RETOS_counts.head(5)


Unnamed: 0,Subject,Emotion,Match,Score,Count
0,PREDWELL_RETOS-1-1,Happy,1,1,0
1,PREDWELL_RETOS-1-1,Happy,1,2,0
2,PREDWELL_RETOS-1-1,Happy,1,3,3
3,PREDWELL_RETOS-1-1,Happy,1,4,6
4,PREDWELL_RETOS-1-1,Happy,0,1,0


In [95]:
multiindex_Rating0 = Rating0_RETOS_counts.set_index(['Subject', 'Emotion', 'Match', 'Score'])
Rating0_RETOS_df = multiindex_Rating0.unstack(level=['Emotion', 'Match', 'Score'])
Rating0_RETOS_df.columns = ['_'.join(map(str, col)).strip() for col in Rating0_RETOS_df.columns.values]
Rating0_RETOS_df = Rating0_RETOS_df.reset_index()

#Modificamos el Dataset de manera que cada fila represente completamente un sujeto con toda la información referente a sus ratings.
Rating0_RETOS_df.to_excel('../Clustering_Predictive_Processing/RETOS_Rating0_count.xlsx', index=False) 
Rating0_RETOS_df.head(5)


Unnamed: 0,Subject,Count_Happy_1_1,Count_Happy_1_2,Count_Happy_1_3,Count_Happy_1_4,Count_Happy_0_1,Count_Happy_0_2,Count_Happy_0_3,Count_Happy_0_4,Count_Sad_1_1,...,Count_Sad_0_3,Count_Sad_0_4,Count_Fear_1_1,Count_Fear_1_2,Count_Fear_1_3,Count_Fear_1_4,Count_Fear_0_1,Count_Fear_0_2,Count_Fear_0_3,Count_Fear_0_4
0,PREDWELL_RETOS-1-1,0,0,1,8,5,0,0,1,0,...,2,1,0,3,1,5,3,1,1,1
1,PREDWELL_RETOS-10-1,0,1,4,4,4,2,0,0,0,...,1,0,1,1,3,4,1,3,1,1
2,PREDWELL_RETOS-101-1,0,1,2,6,5,1,0,0,1,...,1,0,2,1,3,3,5,1,0,0
3,PREDWELL_RETOS-102-1,1,0,4,4,6,0,0,0,1,...,0,3,0,2,3,4,4,2,0,0
4,PREDWELL_RETOS-103-1,0,1,1,7,5,1,0,0,0,...,1,0,3,2,3,1,3,2,1,0


In [96]:
multiindex_Rating = Rating0_RETOS_counts.set_index(['Subject', 'Emotion', 'Match', 'Score'])
Rating_RETOS_df = multiindex_Rating.unstack(level=['Emotion', 'Match', 'Score'])
Rating_RETOS_df.columns = ['_'.join(map(str, col)).strip() for col in Rating_RETOS_df.columns.values]
Rating_RETOS_df = Rating_RETOS_df.reset_index()

#Modificamos el Dataset de manera que cada fila represente completamente un sujeto con toda la información referente a sus ratings.
Rating_RETOS_df.to_excel('../Clustering_Predictive_Processing/RETOS_Rating_count.xlsx', index=False) 
Rating_RETOS_df.head(5)


Unnamed: 0,Subject,Count_Happy_1_1,Count_Happy_1_2,Count_Happy_1_3,Count_Happy_1_4,Count_Happy_0_1,Count_Happy_0_2,Count_Happy_0_3,Count_Happy_0_4,Count_Sad_1_1,...,Count_Sad_0_3,Count_Sad_0_4,Count_Fear_1_1,Count_Fear_1_2,Count_Fear_1_3,Count_Fear_1_4,Count_Fear_0_1,Count_Fear_0_2,Count_Fear_0_3,Count_Fear_0_4
0,PREDWELL_RETOS-1-1,0,0,1,8,5,0,0,1,0,...,2,1,0,3,1,5,3,1,1,1
1,PREDWELL_RETOS-10-1,0,1,4,4,4,2,0,0,0,...,1,0,1,1,3,4,1,3,1,1
2,PREDWELL_RETOS-101-1,0,1,2,6,5,1,0,0,1,...,1,0,2,1,3,3,5,1,0,0
3,PREDWELL_RETOS-102-1,1,0,4,4,6,0,0,0,1,...,0,3,0,2,3,4,4,2,0,0
4,PREDWELL_RETOS-103-1,0,1,1,7,5,1,0,0,0,...,1,0,3,2,3,1,3,2,1,0


## BEBRASK & RETOS Count Merge

In [97]:
RETOS_BEBRASK_Rating0_Count = pd.concat([Rating0_RETOS_df,Rating0_BEBRASK_df])
RETOS_BEBRASK_Rating0_Count.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating0_count.xlsx', index=False) 
RETOS_BEBRASK_Rating0_Count.head(5)

Unnamed: 0,Subject,Count_Happy_1_1,Count_Happy_1_2,Count_Happy_1_3,Count_Happy_1_4,Count_Happy_0_1,Count_Happy_0_2,Count_Happy_0_3,Count_Happy_0_4,Count_Sad_1_1,...,Count_Sad_0_3,Count_Sad_0_4,Count_Fear_1_1,Count_Fear_1_2,Count_Fear_1_3,Count_Fear_1_4,Count_Fear_0_1,Count_Fear_0_2,Count_Fear_0_3,Count_Fear_0_4
0,PREDWELL_RETOS-1-1,0,0,1,8,5,0,0,1,0,...,2,1,0,3,1,5,3,1,1,1
1,PREDWELL_RETOS-10-1,0,1,4,4,4,2,0,0,0,...,1,0,1,1,3,4,1,3,1,1
2,PREDWELL_RETOS-101-1,0,1,2,6,5,1,0,0,1,...,1,0,2,1,3,3,5,1,0,0
3,PREDWELL_RETOS-102-1,1,0,4,4,6,0,0,0,1,...,0,3,0,2,3,4,4,2,0,0
4,PREDWELL_RETOS-103-1,0,1,1,7,5,1,0,0,0,...,1,0,3,2,3,1,3,2,1,0


In [98]:
RETOS_BEBRASK_Rating_Count = pd.concat([Rating_RETOS_df, Rating_BEBRASK_df])
RETOS_BEBRASK_Rating_Count.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating_count.xlsx',index=False)
RETOS_BEBRASK_Rating_Count.head(5)


Unnamed: 0,Subject,Count_Happy_1_1,Count_Happy_1_2,Count_Happy_1_3,Count_Happy_1_4,Count_Happy_0_1,Count_Happy_0_2,Count_Happy_0_3,Count_Happy_0_4,Count_Sad_1_1,...,Count_Sad_0_3,Count_Sad_0_4,Count_Fear_1_1,Count_Fear_1_2,Count_Fear_1_3,Count_Fear_1_4,Count_Fear_0_1,Count_Fear_0_2,Count_Fear_0_3,Count_Fear_0_4
0,PREDWELL_RETOS-1-1,0,0,1,8,5,0,0,1,0,...,2,1,0,3,1,5,3,1,1,1
1,PREDWELL_RETOS-10-1,0,1,4,4,4,2,0,0,0,...,1,0,1,1,3,4,1,3,1,1
2,PREDWELL_RETOS-101-1,0,1,2,6,5,1,0,0,1,...,1,0,2,1,3,3,5,1,0,0
3,PREDWELL_RETOS-102-1,1,0,4,4,6,0,0,0,1,...,0,3,0,2,3,4,4,2,0,0
4,PREDWELL_RETOS-103-1,0,1,1,7,5,1,0,0,0,...,1,0,3,2,3,1,3,2,1,0


## BEBRASK & RETOS Percentage Difference between Match and no Match

In [99]:
RETOS_BEBRASK_Rating0_Percentage = RETOS_BEBRASK_Rating0_Count.copy()
RETOS_BEBRASK_Rating_Percentage = RETOS_BEBRASK_Rating_Count.copy()


RETOS_BEBRASK_Rating0_Percentage[["Count_Happy_1_1","Count_Happy_1_2","Count_Happy_1_3","Count_Happy_1_4","Count_Sad_1_1","Count_Sad_1_2","Count_Sad_1_3","Count_Sad_1_4","Count_Fear_1_1","Count_Fear_1_2","Count_Fear_1_3","Count_Fear_1_4"]] = RETOS_BEBRASK_Rating0_Count[["Count_Happy_1_1","Count_Happy_1_2","Count_Happy_1_3","Count_Happy_1_4","Count_Sad_1_1","Count_Sad_1_2","Count_Sad_1_3","Count_Sad_1_4","Count_Fear_1_1","Count_Fear_1_2","Count_Fear_1_3","Count_Fear_1_4"]]/9

RETOS_BEBRASK_Rating0_Percentage[["Count_Happy_0_1","Count_Happy_0_2","Count_Happy_0_3","Count_Happy_0_4","Count_Sad_0_1","Count_Sad_0_2","Count_Sad_0_3","Count_Sad_0_4","Count_Fear_0_1","Count_Fear_0_2","Count_Fear_0_3","Count_Fear_0_4"]] = RETOS_BEBRASK_Rating0_Count[["Count_Happy_0_1","Count_Happy_0_2","Count_Happy_0_3","Count_Happy_0_4","Count_Sad_0_1","Count_Sad_0_2","Count_Sad_0_3","Count_Sad_0_4","Count_Fear_0_1","Count_Fear_0_2","Count_Fear_0_3","Count_Fear_0_4"]]/6


RETOS_BEBRASK_Rating_Percentage[["Count_Happy_1_1","Count_Happy_1_2","Count_Happy_1_3","Count_Happy_1_4","Count_Sad_1_1","Count_Sad_1_2","Count_Sad_1_3","Count_Sad_1_4","Count_Fear_1_1","Count_Fear_1_2","Count_Fear_1_3","Count_Fear_1_4"]] = RETOS_BEBRASK_Rating_Count[["Count_Happy_1_1","Count_Happy_1_2","Count_Happy_1_3","Count_Happy_1_4","Count_Sad_1_1","Count_Sad_1_2","Count_Sad_1_3","Count_Sad_1_4","Count_Fear_1_1","Count_Fear_1_2","Count_Fear_1_3","Count_Fear_1_4"]]/9

RETOS_BEBRASK_Rating_Percentage[["Count_Happy_0_1","Count_Happy_0_2","Count_Happy_0_3","Count_Happy_0_4","Count_Sad_0_1","Count_Sad_0_2","Count_Sad_0_3","Count_Sad_0_4","Count_Fear_0_1","Count_Fear_0_2","Count_Fear_0_3","Count_Fear_0_4"]] = RETOS_BEBRASK_Rating_Count[["Count_Happy_0_1","Count_Happy_0_2","Count_Happy_0_3","Count_Happy_0_4","Count_Sad_0_1","Count_Sad_0_2","Count_Sad_0_3","Count_Sad_0_4","Count_Fear_0_1","Count_Fear_0_2","Count_Fear_0_3","Count_Fear_0_4"]]/6

In [100]:
delta0_columns = {"Delta_"+col[6:]: RETOS_BEBRASK_Rating0_Percentage[col] - RETOS_BEBRASK_Rating0_Percentage[col.replace("_1_", "_0_")] for col in RETOS_BEBRASK_Rating0_Percentage.columns if "_1_" in col}
RETOS_BEBRASK_Rating0_Delta = pd.DataFrame(delta0_columns)
RETOS_BEBRASK_Rating0_Delta.insert(0,"Subject", RETOS_BEBRASK_Rating0_Count["Subject"])
RETOS_BEBRASK_Rating0_Delta.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating0_Delta.xlsx', index=False) 

RETOS_BEBRASK_Rating0_Delta.head(5)

Unnamed: 0,Subject,Delta_Happy_1_1,Delta_Happy_1_2,Delta_Happy_1_3,Delta_Happy_1_4,Delta_Sad_1_1,Delta_Sad_1_2,Delta_Sad_1_3,Delta_Sad_1_4,Delta_Fear_1_1,Delta_Fear_1_2,Delta_Fear_1_3,Delta_Fear_1_4
0,PREDWELL_RETOS-1-1,-0.833333,0.0,0.111111,0.722222,-0.5,0.111111,-0.111111,0.5,-0.5,0.166667,-0.055556,0.388889
1,PREDWELL_RETOS-10-1,-0.666667,-0.222222,0.444444,0.444444,-0.333333,-0.055556,0.388889,0.0,-0.055556,-0.388889,0.166667,0.277778
2,PREDWELL_RETOS-101-1,-0.833333,-0.055556,0.222222,0.666667,-0.555556,0.055556,0.388889,0.111111,-0.611111,-0.055556,0.333333,0.333333
3,PREDWELL_RETOS-102-1,-0.888889,0.0,0.444444,0.444444,-0.222222,0.055556,0.222222,-0.055556,-0.666667,-0.111111,0.333333,0.444444
4,PREDWELL_RETOS-103-1,-0.833333,-0.055556,0.111111,0.777778,-0.833333,0.444444,0.277778,0.111111,-0.166667,-0.111111,0.166667,0.111111


In [101]:
delta_columns = {"Delta_"+col[6:]: RETOS_BEBRASK_Rating_Percentage[col] - RETOS_BEBRASK_Rating_Percentage[col.replace("_1_", "_0_")] for col in RETOS_BEBRASK_Rating_Percentage.columns if "_1_" in col}

RETOS_BEBRASK_Rating_Delta = pd.DataFrame(delta_columns)
RETOS_BEBRASK_Rating_Delta.insert(0,"Subject", RETOS_BEBRASK_Rating_Count["Subject"])

RETOS_BEBRASK_Rating_Delta.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating_Delta.xlsx', index=False) 

RETOS_BEBRASK_Rating_Delta.head(5)

Unnamed: 0,Subject,Delta_Happy_1_1,Delta_Happy_1_2,Delta_Happy_1_3,Delta_Happy_1_4,Delta_Sad_1_1,Delta_Sad_1_2,Delta_Sad_1_3,Delta_Sad_1_4,Delta_Fear_1_1,Delta_Fear_1_2,Delta_Fear_1_3,Delta_Fear_1_4
0,PREDWELL_RETOS-1-1,-0.833333,0.0,0.111111,0.722222,-0.5,0.111111,-0.111111,0.5,-0.5,0.166667,-0.055556,0.388889
1,PREDWELL_RETOS-10-1,-0.666667,-0.222222,0.444444,0.444444,-0.333333,-0.055556,0.388889,0.0,-0.055556,-0.388889,0.166667,0.277778
2,PREDWELL_RETOS-101-1,-0.833333,-0.055556,0.222222,0.666667,-0.555556,0.055556,0.388889,0.111111,-0.611111,-0.055556,0.333333,0.333333
3,PREDWELL_RETOS-102-1,-0.888889,0.0,0.444444,0.444444,-0.222222,0.055556,0.222222,-0.055556,-0.666667,-0.111111,0.333333,0.444444
4,PREDWELL_RETOS-103-1,-0.833333,-0.055556,0.111111,0.777778,-0.833333,0.444444,0.277778,0.111111,-0.166667,-0.111111,0.166667,0.111111


## BEBRASK & RETOS Time Series By Emotion and Match

In [102]:
merged_dict = {**subject_dict_BEBRASK, **subject_dict_RETOS}

#Unimos los dos diccionarios con la información organizada por emociones

In [103]:
def convert_time_series(subject_dict,rating):
    """
    Procesa un diccionario con la información de las respuestas dadas por cada sujeto organizado por sentimiento, y crea
    un dataframe donde los trials están organizados secuencialmente según la emoción y si es match o no (e.g. las primeras
    9 columnas de un sujeto son los 9 trials de match de Happy, las 6 siguientes son los no match de Happy, y así con las
    demás emociones).  

    Parámetros:
    - subject_dict (dict): Diccionario que contiene la información de las scores organizadas por sujetos y por emociones.
    - rating (string): Rating a partir del cual hacer las agregaciones de las scores (Rating0 o Rating).

    Retorna:
    - df_counts (pandas.DataFrame): Un dataframe donde cada fila contiene el identificador del sujeto y una sèrie temporal
    correspondiente a sus respuestas
    """
    results = {}
    for instance, categories in subject_dict.items():
        results[instance] = {}
        for category, info in categories.items():
            pred = info[rating]
            fulfill = info['Fulfill']
            no_match_pred = pred[np.where(fulfill == 0)[0]]
            match_pred = pred[np.where(fulfill == 1)[0]]
            

            results[instance][category] = {
                'Fulfill_0': no_match_pred,
                'Fulfill_1_': match_pred
            }
    records = []
    for subject_id, emotions in results.items():
        record = {'Subject ID': subject_id}
        for emotion, stats in emotions.items():
            for stat_key, stat_value in stats.items():
                for i,val in enumerate(stat_value):
                # Creating new column names in the format "Emotion_StatisticalMeasure"
                    column_name = f"{emotion}_{stat_key[8]}_{i}"
                    record[column_name] = val
        records.append(record)

    return pd.DataFrame(records)


In [104]:
RETOS_BEBRASK_Rating0_Time_Series = convert_time_series(merged_dict, "Rating0")
RETOS_BEBRASK_Rating_Time_Series = convert_time_series(merged_dict, "Rating")


In [105]:
RETOS_BEBRASK_Rating0_Time_Series.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating0_Time_Series.xlsx', index=False) 
RETOS_BEBRASK_Rating0_Time_Series.head(5)


Unnamed: 0,Subject ID,Happy_0_0,Happy_0_1,Happy_0_2,Happy_0_3,Happy_0_4,Happy_0_5,Happy_1_0,Happy_1_1,Happy_1_2,...,Fear_0_5,Fear_1_0,Fear_1_1,Fear_1_2,Fear_1_3,Fear_1_4,Fear_1_5,Fear_1_6,Fear_1_7,Fear_1_8
0,PREDWELL_RETOS-1001-1,4,1,1,1,1,1,4,4,4,...,3,4,1,3,2,3,4,3,3,4
1,PREDWELL_RETOS-1002-1,1,1,3,1,1,1,3,4,4,...,1,3,1,3,4,3,4,2,3,4
2,PREDWELL_RETOS-1003-1,1,1,1,1,1,1,3,3,4,...,2,2,2,3,2,2,2,1,3,3
3,PREDWELL_RETOS-1004-1,1,1,3,3,2,1,1,4,2,...,2,2,4,3,1,3,3,2,1,3
4,PREDWELL_RETOS-1005-1,1,1,1,1,2,1,4,3,4,...,1,2,2,2,3,3,3,4,3,3


In [106]:
RETOS_BEBRASK_Rating0_Time_Series

Unnamed: 0,Subject ID,Happy_0_0,Happy_0_1,Happy_0_2,Happy_0_3,Happy_0_4,Happy_0_5,Happy_1_0,Happy_1_1,Happy_1_2,...,Fear_0_5,Fear_1_0,Fear_1_1,Fear_1_2,Fear_1_3,Fear_1_4,Fear_1_5,Fear_1_6,Fear_1_7,Fear_1_8
0,PREDWELL_RETOS-1001-1,4,1,1,1,1,1,4,4,4,...,3,4,1,3,2,3,4,3,3,4
1,PREDWELL_RETOS-1002-1,1,1,3,1,1,1,3,4,4,...,1,3,1,3,4,3,4,2,3,4
2,PREDWELL_RETOS-1003-1,1,1,1,1,1,1,3,3,4,...,2,2,2,3,2,2,2,1,3,3
3,PREDWELL_RETOS-1004-1,1,1,3,3,2,1,1,4,2,...,2,2,4,3,1,3,3,2,1,3
4,PREDWELL_RETOS-1005-1,1,1,1,1,2,1,4,3,4,...,1,2,2,2,3,3,3,4,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144,PREDWELL_RETOS-5-1,2,2,1,1,1,1,3,4,4,...,1,1,1,4,2,4,3,3,2,2
145,PREDWELL_RETOS-6-1,1,2,1,1,1,1,3,3,4,...,1,2,2,2,4,2,3,2,4,4
146,PREDWELL_RETOS-7-1,1,1,1,1,1,1,3,4,2,...,4,4,1,2,4,4,1,4,4,1
147,PREDWELL_RETOS-8-1,1,1,1,1,1,1,4,4,1,...,3,3,3,4,3,2,4,2,1,3


In [107]:
RETOS_BEBRASK_Rating_Time_Series.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating_Time_Series.xlsx', index=False) 
RETOS_BEBRASK_Rating_Time_Series.head(5)

Unnamed: 0,Subject ID,Happy_0_0,Happy_0_1,Happy_0_2,Happy_0_3,Happy_0_4,Happy_0_5,Happy_1_0,Happy_1_1,Happy_1_2,...,Fear_0_5,Fear_1_0,Fear_1_1,Fear_1_2,Fear_1_3,Fear_1_4,Fear_1_5,Fear_1_6,Fear_1_7,Fear_1_8
0,PREDWELL_RETOS-1001-1,4,2,1,2,3,3,4,4,3,...,2,2,2,3,2,2,3,2,3,3
1,PREDWELL_RETOS-1002-1,3,3,2,3,2,2,3,4,4,...,1,3,2,4,3,3,2,2,3,3
2,PREDWELL_RETOS-1003-1,2,2,4,4,2,3,2,3,2,...,3,2,3,3,3,1,3,4,3,4
3,PREDWELL_RETOS-1004-1,2,2,3,2,2,1,2,3,2,...,3,3,3,2,4,3,2,2,2,2
4,PREDWELL_RETOS-1005-1,2,3,3,2,2,2,3,3,4,...,3,2,3,2,3,2,2,2,2,2


## BEBRASK & RETOS Time Series Moving Average By Emotion and Match

In [108]:
def compute_moving_averages_no_edges(subject_dict,rating):
    """
    Procesa un diccionario con la información de las respuestas dadas por cada sujeto organizado por sentimiento, y crea
    un dataframe donde se guarda los Moving Average de los trials donde estos están organizados secuencialmente según la 
    emoción y si es match o no. Se calcula un 3 point Moving Average (se hace un average de los tres primeros puntos, 
    luego uno del segundo al cuarto punto y así sucesivamente).

    Parámetros:
    - subject_dict (dict): Diccionario que contiene la información de las scores organizadas por sujetos y por emociones.
    - rating (string): Rating a partir del cual hacer las agregaciones de las scores (Rating0 o Rating).

    Retorna:
    - df_counts (pandas.DataFrame): Un dataframe donde cada fila contiene el identificador del sujeto y una sèrie temporal
    correspondiente a sus respuestas
    """
    results = {}
    for instance, categories in subject_dict.items():
        results[instance] = {}
        for category, info in categories.items():
            pred = info[rating]
            fulfill = info['Fulfill']
            fulfill_0_positions = np.where(fulfill == 0)[0]
            fulfill_1_positions = np.where(fulfill == 1)[0]

            # Calculate moving averages for fulfill 0 excluding edges
            ma_fulfill_0 = []
            for i,pos in enumerate(fulfill_0_positions):
                if i == 0 or i == len(fulfill_0_positions) - 1:
                    continue  # Skip first and last positions
                relevant_preds = [pred[fulfill_0_positions[i-1]],pred[fulfill_0_positions[i]],pred[fulfill_0_positions[i+1]]]
                ma_fulfill_0.append(np.mean(relevant_preds))

            # Calculate moving averages for fulfill 1 excluding edges
            ma_fulfill_1 = []
            for i,pos in enumerate(fulfill_1_positions):
                if i == 0 or i == len(fulfill_1_positions) - 1:
                    continue  # Skip first and last positions
                relevant_preds = [pred[fulfill_1_positions[i-1]],pred[fulfill_1_positions[i]],pred[fulfill_1_positions[i+1]]]
                ma_fulfill_1.append(np.mean(relevant_preds))

            results[instance][category] = {
                'Fulfill_0_MA': ma_fulfill_0,
                'Fulfill_1_MA': ma_fulfill_1
            }
            
    records = []
    for subject_id, emotions in results.items():
        record = {'Subject ID': subject_id}
        for emotion, stats in emotions.items():
            for stat_key, stat_value in stats.items():
                for i,val in enumerate(stat_value):
                # Creating new column names in the format "Emotion_StatisticalMeasure"
                    column_name = f"{emotion}_{stat_key[8]}_MA_{i}"
                    record[column_name] = val
        records.append(record)

    return pd.DataFrame(records)

In [109]:
RETOS_BEBRASK_Rating0_Time_MA_Series = compute_moving_averages_no_edges(merged_dict, "Rating0")
RETOS_BEBRASK_Rating_Time_MA_Series = compute_moving_averages_no_edges(merged_dict, "Rating")


In [110]:
RETOS_BEBRASK_Rating0_Time_MA_Series.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating0_Time_MA_Series.xlsx', index=False) 
RETOS_BEBRASK_Rating0_Time_MA_Series.head(5)


Unnamed: 0,Subject ID,Happy_0_MA_0,Happy_0_MA_1,Happy_0_MA_2,Happy_0_MA_3,Happy_1_MA_0,Happy_1_MA_1,Happy_1_MA_2,Happy_1_MA_3,Happy_1_MA_4,...,Fear_0_MA_1,Fear_0_MA_2,Fear_0_MA_3,Fear_1_MA_0,Fear_1_MA_1,Fear_1_MA_2,Fear_1_MA_3,Fear_1_MA_4,Fear_1_MA_5,Fear_1_MA_6
0,PREDWELL_RETOS-1001-1,2.0,1.0,1.0,1.0,4.0,4.0,3.666667,3.666667,3.666667,...,1.666667,1.0,1.666667,2.666667,2.0,2.666667,3.0,3.333333,3.333333,3.333333
1,PREDWELL_RETOS-1002-1,1.666667,1.666667,1.666667,1.0,3.666667,4.0,4.0,4.0,3.666667,...,3.0,2.333333,1.666667,2.333333,2.666667,3.333333,3.666667,3.0,3.0,3.0
2,PREDWELL_RETOS-1003-1,1.0,1.0,1.0,1.0,3.333333,3.666667,3.666667,3.333333,3.0,...,1.333333,1.666667,2.0,2.333333,2.333333,2.333333,2.0,1.666667,2.0,2.333333
3,PREDWELL_RETOS-1004-1,1.666667,2.333333,2.666667,2.0,2.333333,2.333333,1.333333,1.666667,2.333333,...,1.333333,2.333333,2.333333,3.0,2.666667,2.333333,2.333333,2.666667,2.0,2.0
4,PREDWELL_RETOS-1005-1,1.0,1.0,1.333333,1.333333,3.666667,3.333333,3.666667,3.333333,3.666667,...,2.333333,3.0,2.0,2.0,2.333333,2.666667,3.0,3.333333,3.333333,3.333333


In [111]:
RETOS_BEBRASK_Rating_Time_MA_Series.to_excel('../Clustering_Predictive_Processing/RETOS_BEBRASK_Rating_Time_MA_Series.xlsx', index=False) 
RETOS_BEBRASK_Rating_Time_MA_Series.head(5)


Unnamed: 0,Subject ID,Happy_0_MA_0,Happy_0_MA_1,Happy_0_MA_2,Happy_0_MA_3,Happy_1_MA_0,Happy_1_MA_1,Happy_1_MA_2,Happy_1_MA_3,Happy_1_MA_4,...,Fear_0_MA_1,Fear_0_MA_2,Fear_0_MA_3,Fear_1_MA_0,Fear_1_MA_1,Fear_1_MA_2,Fear_1_MA_3,Fear_1_MA_4,Fear_1_MA_5,Fear_1_MA_6
0,PREDWELL_RETOS-1001-1,2.333333,1.666667,2.0,2.666667,3.666667,3.333333,3.0,3.333333,3.333333,...,2.333333,2.0,1.666667,2.333333,2.333333,2.333333,2.333333,2.333333,2.666667,2.666667
1,PREDWELL_RETOS-1002-1,2.666667,2.666667,2.333333,2.333333,3.666667,4.0,4.0,4.0,4.0,...,3.333333,3.0,2.333333,3.0,3.0,3.333333,2.666667,2.333333,2.333333,2.666667
2,PREDWELL_RETOS-1003-1,2.666667,3.333333,3.333333,3.0,2.333333,3.0,3.333333,3.666667,3.333333,...,2.333333,2.666667,3.333333,2.666667,3.0,2.333333,2.333333,2.666667,3.333333,3.666667
3,PREDWELL_RETOS-1004-1,2.333333,2.333333,2.333333,1.666667,2.333333,2.666667,2.0,2.666667,2.666667,...,1.333333,2.0,2.333333,2.666667,3.0,3.0,3.0,2.333333,2.0,2.0
4,PREDWELL_RETOS-1005-1,2.666667,2.666667,2.333333,2.0,3.333333,3.333333,3.666667,3.333333,3.666667,...,2.666667,2.666667,3.0,2.333333,2.666667,2.333333,2.333333,2.0,2.0,2.0
