In [14]:
import pandas as pd
import numpy as np

In [15]:
#We read the predictive processing data
RETOS_BEBRASK_dataset = pd.read_excel("RETOS_BEBRASK_long.xlsx")


In [16]:
def slope_intercept_regression(time_series):
    """
    Perform linear regression on each row of a time series DataFrame to extract slopes and intercepts.

    This function takes a time series DataFrame, where each row represents a subject and each column 
    represents a time point. It fits a linear regression model to the non-NaN values of each row 
    and returns the slopes and intercepts of the fitted lines.

    Parameters:
    time_series (DataFrame): A pandas DataFrame where each row is a subject and each column is a time point.

    Returns:
    tuple: Two numpy arrays containing the slopes and intercepts for each subject.
    """
    slopes = np.zeros(time_series.shape[0])
    intercepts = np.zeros(time_series.shape[0])
    
    # Original time stamps as the independent variable
    X_original = np.arange(1, time_series.shape[1] + 1)
    
    # Loop over each subject to fit a linear model and extract the slope
    for i in range(time_series.shape[0]):
        # Time series values for the current subject, dropping NaNs
        Y = time_series.iloc[i, :].dropna().values
    
        # Ensure Y is of numeric type
        Y = pd.to_numeric(Y, errors='coerce').astype('float64')
    
        # Filter X based on the non-NaN entries of Y to maintain correspondence
        X_filtered = X_original[~time_series.iloc[i, :].isna()]
    
        # Ensure X_filtered is of numeric type
        X_filtered = pd.to_numeric(X_filtered, errors='coerce').astype('float64')
    
        # Check again after conversion to avoid fitting a model with insufficient data
        if len(Y) > 1 and not np.isnan(Y).all():
            slope, intercept = np.polyfit(X_filtered, Y, 1)  # Fit a linear model
            slopes[i] = slope  # Store the slope
            intercepts[i] = intercept
        else:
            slopes[i] = np.nan  # Assign NaN if not enough data points or if conversion resulted in NaNs
            intercepts[i] = np.nan
    return slopes,intercepts

## Time series Features

Getting info on predictability


In [17]:
import pandas as pd

# Extract the subject IDs and time series data from the dataset
subject_id = RETOS_BEBRASK_dataset.iloc[:, 0]
subject_timeseries = RETOS_BEBRASK_dataset.iloc[:, 1:46]

# Identify columns related to different types of data
rating_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Rating0' in col]
fulfilled_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Fulfilled' in col]
emotions_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'EvokedEmotion' in col]

# Get the index (subject IDs) of the dataset
subjects = RETOS_BEBRASK_dataset.index

# Initialize DataFrames to store ratings and fulfillment status over time for each subject

# DataFrame for Rating 0 over 45 time points
df_rating_0 = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(45)])

# DataFrames for Fulfilled status over 30 and 20 time points respectively (for margin of error)
df_fulfilled_1 = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(30)])
df_fulfilled_0 = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(20)])

# DataFrames for ratings of specific emotions (happy, sad, fear) over 18 time points each (for margin of error)
df_rating_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(18)])
df_rating_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(18)])
df_rating_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(18)])

# Initialize DataFrames for each emotion with 'Fulfilled' status over 11 time points (for margin of error)
df_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])

# Initialize DataFrames for each emotion with 'Not Fulfilled' status over 7 time points (for margin of error)
df_no_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_no_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_no_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])


In [18]:
# Iterate through each subject
for subject in subjects:
    ratings = []
    # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0
    ratings_1,ratings_0 = [], []
        # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0 for each emotion
    ratings_1_happy, ratings_1_sad, ratings_1_fear = [], [], []
    ratings_0_happy, ratings_0_sad, ratings_0_fear = [], [], []
    
    ratings_happy, ratings_sad, ratings_fear = [], [], []


    # Iterate through each 'Fulfilled' and 'Rating0' column pair
    for fulfilled_col, rating_col, emotion_col in zip(fulfilled_columns, rating_columns, emotions_columns):
        fulfilled_status = RETOS_BEBRASK_dataset.loc[subject, fulfilled_col]
        rating_value = RETOS_BEBRASK_dataset.loc[subject, rating_col]
        emotion_value = RETOS_BEBRASK_dataset.loc[subject, emotion_col]
        ratings.append(rating_value)
        if fulfilled_status == 1:
            ratings_1.append(rating_value)

            if emotion_value == "happiness":
                ratings_1_happy.append(rating_value)
                ratings_happy.append(rating_value)
            elif emotion_value == "sadness":
                ratings_1_sad.append(rating_value)
                ratings_sad.append(rating_value)
            
            elif emotion_value == "fear":
                ratings_1_fear.append(rating_value)
                ratings_fear.append(rating_value)

        elif fulfilled_status == 0:
            ratings_0.append(rating_value)

            if emotion_value == "happiness":
                ratings_0_happy.append(rating_value)
                ratings_sad.append(rating_value)

            elif emotion_value == "sadness":
                ratings_0_sad.append(rating_value)
                ratings_happy.append(rating_value)

            elif emotion_value == "fear":
                ratings_0_fear.append(rating_value)
                ratings_fear.append(rating_value)

    # Extend lists with NaN values if they are shorter than required lengths
    
    ratings_1.extend([np.nan] * (30 - len(ratings_1)))
    ratings_0.extend([np.nan] * (20 - len(ratings_0)))

    ratings_1_happy.extend([np.nan] * (11 - len(ratings_1_happy)))
    ratings_1_sad.extend([np.nan] * (11 - len(ratings_1_sad)))
    ratings_1_fear.extend([np.nan] * (11 - len(ratings_1_fear)))

    ratings_0_happy.extend([np.nan] * (7 - len(ratings_0_happy)))
    ratings_0_sad.extend([np.nan] * (7 - len(ratings_0_sad)))
    ratings_0_fear.extend([np.nan] * (7 - len(ratings_0_fear)))
    
    ratings_happy.extend([np.nan] * (18 - len(ratings_happy)))
    ratings_sad.extend([np.nan] * (18 - len(ratings_sad)))
    ratings_fear.extend([np.nan] * (18 - len(ratings_fear)))

    
    # Insert lists into their respective DataFrames for the current subject
    df_rating_0.loc[subject, :] = ratings
    
    df_fulfilled_1.loc[subject, :] = ratings_1
    df_fulfilled_0.loc[subject, :] = ratings_0
    
    df_rating_happy.loc[subject, :] = ratings_happy
    df_rating_sad.loc[subject, :] = ratings_sad
    df_rating_fear.loc[subject, :] = ratings_fear

    df_fulfilled_happy.loc[subject, :] = ratings_1_happy
    df_fulfilled_sad.loc[subject, :] = ratings_1_sad
    df_fulfilled_fear.loc[subject, :] = ratings_1_fear

    df_no_fulfilled_happy.loc[subject, :] = ratings_0_happy
    df_no_fulfilled_sad.loc[subject, :] = ratings_0_sad
    df_no_fulfilled_fear.loc[subject, :] = ratings_0_fear

In [19]:
# Extract the subjects' identifiers
subjects = RETOS_BEBRASK_dataset["DataFile.Basename"]

# Perform linear regression on fulfilled data (df_fulfilled_1 and df_fulfilled_0)
slopes_match, intercepts_match = slope_intercept_regression(df_fulfilled_1)
slopes_no_match, intercepts_no_match = slope_intercept_regression(df_fulfilled_0)

# Perform linear regression on fulfilled 'happy' emotion data
slopes_match_happy, intercepts_match_happy = slope_intercept_regression(df_fulfilled_happy)
slopes_match_no_happy, intercepts_match_no_happy = slope_intercept_regression(df_no_fulfilled_happy)

# Perform linear regression on fulfilled 'sad' emotion data
slopes_match_sad, intercepts_match_sad = slope_intercept_regression(df_fulfilled_sad)
slopes_match_no_sad, intercepts_match_no_sad = slope_intercept_regression(df_no_fulfilled_sad)

# Perform linear regression on fulfilled 'fear' emotion data
slopes_match_fear, intercepts_match_fear = slope_intercept_regression(df_fulfilled_fear)
slopes_match_no_fear, intercepts_match_no_fear = slope_intercept_regression(df_no_fulfilled_fear)


In [20]:
# Create a DataFrame from the computed slopes and intercepts for different conditions and emotions
time_df = pd.DataFrame([
    subjects,
    slopes_match,
    slopes_no_match,
    intercepts_match,
    intercepts_no_match,
    slopes_match_happy,
    slopes_match_no_happy,
    intercepts_match_happy,
    intercepts_match_no_happy,
    slopes_match_sad,
    slopes_match_no_sad,
    intercepts_match_sad,
    intercepts_match_no_sad,
    slopes_match_fear,
    slopes_match_no_fear,
    intercepts_match_fear,
    intercepts_match_no_fear,
    (slopes_match_sad + slopes_match_fear) / 2,  # Average slope for sad and fear (negative emotions)
    (slopes_match_no_sad + slopes_match_no_fear) / 2,  # Average slope for sad and fear (negative emotions) with no match
    (intercepts_match_sad + intercepts_match_fear) / 2,  # Average intercept for sad and fear (negative emotions)
    (intercepts_match_no_fear + intercepts_match_no_sad) / 2  # Average intercept for sad and fear (negative emotions) with no match
]).transpose()

# Assign column names to the DataFrame
time_df.columns = [
    "Subject",
    "Trend_Match",
    "Trend_No_Match",
    "Intercept_Match",
    "Intercept_No_Match",
    "Trend_Match_Happy",
    "Trend_No_Match_Happy",
    "Intercept_Match_Happy",
    "Intercept_No_Match_Happy",
    "Trend_Match_Sad",
    "Trend_No_Match_Sad",
    "Intercept_Match_Sad",
    "Intercept_No_Match_Sad",
    "Trend_Match_Fear",
    "Trend_No_Match_Fear",
    "Intercept_Match_Fear",
    "Intercept_No_Match_Fear",
    "Trend_Match_Negative",  # Average slope for sad and fear (negative emotions)
    "Trend_No_Match_Negative",  # Average slope for sad and fear (negative emotions) with no match
    "Intercept_Match_Negative",  # Average intercept for sad and fear (negative emotions)
    "Intercept_No_Match_Negative"  # Average intercept for sad and fear (negative emotions) with no match
]

# Calculate final values at specific time points for match and no match conditions
time_df["Final_Value_Match"] = time_df["Intercept_Match"] + time_df["Trend_Match"] * 27
time_df["Final_Value_No_Match"] = time_df["Intercept_No_Match"] + time_df["Trend_No_Match"] * 18

# Calculate final values for happy emotion at specific time points for match and no match conditions
time_df["Final_Value_Match_Happy"] = time_df["Intercept_Match_Happy"] + time_df["Trend_Match_Happy"] * 9
time_df["Final_Value_No_Match_Happy"] = time_df["Intercept_No_Match_Happy"] + time_df["Trend_No_Match_Happy"] * 6

# Calculate final values for sad emotion at specific time points for match and no match conditions
time_df["Final_Value_Match_Sad"] = time_df["Intercept_Match_Sad"] + time_df["Trend_Match_Sad"] * 9
time_df["Final_Value_No_Match_Sad"] = time_df["Intercept_No_Match_Sad"] + time_df["Trend_No_Match_Sad"] * 6

# Calculate final values for fear emotion at specific time points for match and no match conditions
time_df["Final_Value_Match_Fear"] = time_df["Intercept_Match_Fear"] + time_df["Trend_Match_Fear"] * 9
time_df["Final_Value_No_Match_Fear"] = time_df["Intercept_No_Match_Fear"] + time_df["Trend_No_Match_Fear"] * 6

# Calculate final values for negative emotions (average of sad and fear) at specific time points for match and no match conditions
time_df["Final_Value_Match_Negative"] = time_df["Intercept_Match_Negative"] + time_df["Trend_Match_Negative"] * 9
time_df["Final_Value_No_Match_Negative"] = time_df["Intercept_No_Match_Negative"] + time_df["Trend_No_Match_Negative"] * 6


In [21]:
time_df.to_excel('../Clustering_Predictive_Processing/trend_dataset.xlsx', index=False)

## Theory Driven Features
Getting info on likeability


In [22]:
import pandas as pd

# Identify columns related to different types of data
fulfilled_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Fulfilled' in col]
emotions_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'EvokedEmotion' in col]
likeability_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Rating.' in col]

# Get the index (subject IDs) of the dataset
subjects = RETOS_BEBRASK_dataset.index

# Initialize DataFrames to store likeability ratings over time for each subject

# DataFrame for Likeability ratings over 45 time points
df_likeability = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(45)])

# DataFrames for Likeability ratings with fulfilled and not fulfilled statuses over 30 and 20 time points respectively
df_likeability_fulfilled = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(30)])
df_likeability_no_fulfilled = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(20)])

# DataFrames for Likeability ratings of specific emotions (happy, sad, fear) over 18 time points each
df_likeability_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(18)])
df_likeability_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(18)])
df_likeability_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(18)])

# Initialize DataFrames for Likeability ratings of each emotion with 'Fulfilled' status over 11 time points
df_likeability_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_likeability_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_likeability_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])

# Initialize DataFrames for Likeability ratings of each emotion with 'Not Fulfilled' status over 7 time points
df_likeability_no_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_likeability_no_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_likeability_no_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])


In [23]:
# Iterate through each subject
for subject in subjects:
    likeability = []
    # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0
    likeability_1, likeability_0 = [], []
    # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0 for each emotion
    likeability_1_happy, likeability_1_sad, likeability_1_fear = [], [], []
    likeability_0_happy, likeability_0_sad, likeability_0_fear = [], [], []
    likeability_happy, likeability_sad, likeability_fear = [], [], []

    # Iterate through each 'Fulfilled' and 'Rating0' column pair
    for fulfilled_col, likeability_col, emotion_col in zip(fulfilled_columns, likeability_columns, emotions_columns):
        fulfilled_status = RETOS_BEBRASK_dataset.loc[subject, fulfilled_col]
        rating_value = RETOS_BEBRASK_dataset.loc[subject, likeability_col]
        emotion_value = RETOS_BEBRASK_dataset.loc[subject, emotion_col]
        likeability.append(rating_value)
        if fulfilled_status == 1:
            likeability_1.append(rating_value)

            if emotion_value == "happiness":
                likeability_1_happy.append(rating_value)
                likeability_happy.append(rating_value)
            elif emotion_value == "sadness":
                likeability_1_sad.append(rating_value)
                likeability_sad.append(rating_value)
            elif emotion_value == "fear":
                likeability_1_fear.append(rating_value)
                likeability_fear.append(rating_value)
        elif fulfilled_status == 0:
            likeability_0.append(rating_value)

            if emotion_value == "happiness":
                likeability_0_happy.append(rating_value)
                likeability_happy.append(rating_value)
            elif emotion_value == "sadness":
                likeability_0_sad.append(rating_value)
                likeability_sad.append(rating_value)
            elif emotion_value == "fear":
                likeability_0_fear.append(rating_value)
                likeability_fear.append(rating_value)

    # Extend lists with NaN values if they are shorter than required lengths

    likeability_1.extend([np.nan] * (30 - len(likeability_1)))
    likeability_0.extend([np.nan] * (20 - len(likeability_0)))

    likeability_1_happy.extend([np.nan] * (11 - len(likeability_1_happy)))
    likeability_1_sad.extend([np.nan] * (11 - len(likeability_1_sad)))
    likeability_1_fear.extend([np.nan] * (11 - len(likeability_1_fear)))

    likeability_happy.extend([np.nan] * (18 - len(likeability_happy)))
    likeability_sad.extend([np.nan] * (18 - len(likeability_sad)))
    likeability_fear.extend([np.nan] * (18 - len(likeability_fear)))

    likeability_0_happy.extend([np.nan] * (7 - len(likeability_0_happy)))
    likeability_0_sad.extend([np.nan] * (7 - len(likeability_0_sad)))
    likeability_0_fear.extend([np.nan] * (7 - len(likeability_0_fear)))

    # Insert lists into their respective DataFrames for the current subject
    df_likeability.loc[subject, :] = likeability

    df_likeability_happy.loc[subject, :] = likeability_happy
    df_likeability_sad.loc[subject, :] = likeability_sad
    df_likeability_fear.loc[subject, :] = likeability_fear

    df_likeability_fulfilled.loc[subject, :] = likeability_1
    df_likeability_no_fulfilled.loc[subject, :] = likeability_0

    df_likeability_fulfilled_happy.loc[subject, :] = likeability_1_happy
    df_likeability_fulfilled_sad.loc[subject, :] = likeability_1_sad
    df_likeability_fulfilled_fear.loc[subject, :] = likeability_1_fear

    df_likeability_no_fulfilled_happy.loc[subject, :] = likeability_0_happy
    df_likeability_no_fulfilled_sad.loc[subject, :] = likeability_0_sad
    df_likeability_no_fulfilled_fear.loc[subject, :] = likeability_0_fear


### Correlations

In [24]:
def row_correlation(row1, row2):
    """
    Calculate the correlation between two rows, handling constant data.

    If the standard deviation of either row is zero, it returns 1 if the rows are identical,
    otherwise returns 0. If both rows have non-zero standard deviations, it returns the
    Pearson correlation coefficient.

    Parameters:
    row1 (Series): The first row.
    row2 (Series): The second row.

    Returns:
    float: The correlation coefficient.
    """
    # Check if the standard deviation is zero
    if row1.std() == 0 or row2.std() == 0:
        # Handle the constant data case here
        return 1 if row1.equals(row2) else 0
    return row1.corr(row2)

# Calculate correlations for various conditions and datasets
correlations_all = [row_correlation(df_likeability.loc[index], df_rating_0.loc[index]) for index in df_likeability.index]
correlations_match = [row_correlation(df_likeability_fulfilled.loc[index], df_fulfilled_1.loc[index]) for index in df_likeability_fulfilled.index]
correlations_no_match = [row_correlation(df_likeability_no_fulfilled.loc[index], df_fulfilled_0.loc[index]) for index in df_likeability_no_fulfilled.index]
correlations_happy_match = [row_correlation(df_likeability_fulfilled_happy.loc[index], df_fulfilled_happy.loc[index]) for index in df_likeability_fulfilled_happy.index]
correlations_happy_no_match = [row_correlation(df_likeability_no_fulfilled_happy.loc[index], df_no_fulfilled_happy.loc[index]) for index in df_likeability_no_fulfilled_happy.index]
correlations_sad_match = [row_correlation(df_likeability_fulfilled_sad.loc[index], df_fulfilled_sad.loc[index]) for index in df_likeability_fulfilled_sad.index]
correlations_sad_no_match = [row_correlation(df_likeability_no_fulfilled_sad.loc[index], df_no_fulfilled_sad.loc[index]) for index in df_likeability_no_fulfilled_sad.index]
correlations_fear_match = [row_correlation(df_likeability_fulfilled_fear.loc[index], df_fulfilled_fear.loc[index]) for index in df_likeability_fulfilled_fear.index]
correlations_fear_no_match = [row_correlation(df_likeability_no_fulfilled_fear.loc[index], df_no_fulfilled_fear.loc[index]) for index in df_likeability_no_fulfilled_fear.index]
correlations_happy = [row_correlation(df_likeability_happy.loc[index], df_rating_happy.loc[index]) for index in df_likeability_happy.index]
correlations_sad = [row_correlation(df_likeability_sad.loc[index], df_rating_sad.loc[index]) for index in df_likeability_sad.index]
correlations_fear = [row_correlation(df_likeability_fear.loc[index], df_rating_fear.loc[index]) for index in df_likeability_fear.index]

# Create a DataFrame to store all correlation results
correlation_df = pd.DataFrame([
    correlations_all,
    correlations_match,
    correlations_no_match,
    correlations_happy_match,
    correlations_happy_no_match,
    correlations_sad_match,
    correlations_sad_no_match,
    correlations_fear_match,
    correlations_fear_no_match,
    (np.array(correlations_sad_match) + np.array(correlations_fear_match)) / 2,  # Average correlation for sad and fear match
    (np.array(correlations_sad_no_match) + np.array(correlations_fear_no_match)) / 2,  # Average correlation for sad and fear no match
    correlations_happy,
    correlations_sad,
    correlations_fear,
    (np.array(correlations_sad) + np.array(correlations_fear)) / 2  # Average correlation for sad and fear
]).transpose()

# Assign column names to the correlation DataFrame
correlation_df.columns = [
    "Cor_Pred_Like",
    "Cor_Pred_Like_Match",
    "Cor_Pred_Like_No_Match",
    "Cor_Pred_Like_Match_Happy",
    "Cor_Pred_Like_No_Match_Happy",
    "Cor_Pred_Like_Match_Sad",
    "Cor_Pred_Like_No_Match_Sad",
    "Cor_Pred_Like_Match_Fear",
    "Cor_Pred_Like_No_Match_Fear",
    "Cor_Pred_Like_Match_Negative",
    "Cor_Pred_Like_No_Match_Negative",
    "Cor_Pred_Like_Happy",
    "Cor_Pred_Like_Sad",
    "Cor_Pred_Like_Fear",
    "Cor_Pred_Like_Negative"
]

# Display the correlation DataFrame
correlation_df


  c /= stddev[:, None]
  c /= stddev[None, :]


Unnamed: 0,Cor_Pred_Like,Cor_Pred_Like_Match,Cor_Pred_Like_No_Match,Cor_Pred_Like_Match_Happy,Cor_Pred_Like_No_Match_Happy,Cor_Pred_Like_Match_Sad,Cor_Pred_Like_No_Match_Sad,Cor_Pred_Like_Match_Fear,Cor_Pred_Like_No_Match_Fear,Cor_Pred_Like_Match_Negative,Cor_Pred_Like_No_Match_Negative,Cor_Pred_Like_Happy,Cor_Pred_Like_Sad,Cor_Pred_Like_Fear,Cor_Pred_Like_Negative
0,0.113495,0.209359,0.103807,-2.500000e-01,0.542326,0.342381,-0.061430,0.196722,-0.188982,0.269552,-0.125206,0.517809,-0.111746,-0.010479,-0.061112
1,0.116469,0.291056,-0.051510,4.902903e-01,-0.632456,-0.292770,0.000000,0.258199,0.000000,-0.017286,0.000000,-0.129387,0.024807,0.053548,0.039177
2,0.250345,0.432551,0.139859,-2.294157e-01,0.800000,0.230940,0.000000,0.307692,-0.307148,0.269316,-0.153574,0.489720,-0.235056,-0.251638,-0.243347
3,-0.230171,-0.006600,-0.585369,1.409710e-02,0.000000,0.000000,-0.542326,-0.280000,-0.131306,-0.140000,-0.336816,0.143346,-0.041910,-0.308632,-0.175271
4,0.034141,0.294244,-0.409852,5.345225e-01,0.375000,-0.774597,-0.875000,-0.762493,-0.867722,-0.768545,-0.871361,0.534183,0.215294,-0.781323,-0.283014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,0.255697,0.281127,0.260715,9.433333e-02,0.727607,-0.036564,0.342997,0.463586,-0.316228,0.213511,0.013385,-0.159448,-0.512930,0.311421,-0.100755
146,0.607126,0.418214,0.346488,2.467162e-17,-0.108465,-0.142857,0.542326,0.304997,0.242536,0.081070,0.392431,0.348972,0.170523,0.320256,0.245390
147,0.438726,0.514426,0.000000,7.765164e-01,0.000000,0.346423,-0.121566,0.548795,-0.242536,0.447609,-0.182051,0.667781,-0.015347,0.132019,0.058336
148,0.366738,0.486755,-0.235897,1.000000e+00,0.612372,0.000000,-0.801784,0.202031,-0.534522,0.101015,-0.668153,0.173448,-0.544581,0.108465,-0.218058


### Average Rating

In [25]:

# Calculate the average predictions for different conditions and emotions
average_prediction = df_rating_0.mean(axis=1)

average_prediction_match = df_fulfilled_1.mean(axis=1)
average_prediction_no_match = df_fulfilled_0.mean(axis=1)

average_prediction_happy_match = df_fulfilled_happy.mean(axis=1)
average_prediction_happy_no_match = df_no_fulfilled_happy.mean(axis=1)

average_prediction_sad_match = df_fulfilled_sad.mean(axis=1)
average_prediction_sad_no_match = df_no_fulfilled_sad.mean(axis=1)

average_prediction_fear_match = df_fulfilled_fear.mean(axis=1)
average_prediction_fear_no_match = df_no_fulfilled_fear.mean(axis=1)

average_prediction_happy = df_rating_happy.mean(axis=1)
average_prediction_sad = df_rating_sad.mean(axis=1)
average_prediction_fear = df_rating_fear.mean(axis=1)

# Create a DataFrame to store the average predictions for different conditions and emotions
average_df = pd.DataFrame([
    average_prediction,
    average_prediction_match,
    average_prediction_no_match,
    average_prediction_happy_match,
    average_prediction_happy_no_match,
    average_prediction_sad_match,
    average_prediction_sad_no_match,
    average_prediction_fear_match,
    average_prediction_fear_no_match,
    (average_prediction_sad_match + average_prediction_fear_match) / 2,  # Average prediction for sad and fear match
    (average_prediction_fear_no_match + average_prediction_sad_no_match) / 2,  # Average prediction for sad and fear no match
    average_prediction_happy,
    average_prediction_sad,
    average_prediction_fear,
    (average_prediction_sad + average_prediction_fear) / 2  # Average prediction for sad and fear
]).transpose()

# Assign column names to the average prediction DataFrame
average_df.columns = [
    "Mean_Rating0",
    "Mean_Rating0_Match",
    "Mean_Rating0_No_Match",
    "Mean_Rating0_Match_Happy",
    "Mean_Rating0_No_Match_Happy",
    "Mean_Rating0_Match_Sad",
    "Mean_Rating0_No_Match_Sad",
    "Mean_Rating0_Match_Fear",
    "Mean_Rating0_No_Match_Fear",
    "Mean_Rating0_Match_Negative",  # Average prediction for sad and fear match
    "Mean_Rating0_No_Match_Negative",  # Average prediction for sad and fear no match
    "Mean_Rating0_Happy",
    "Mean_Rating0_Sad",
    "Mean_Rating0_Fear",
    "Mean_Rating0_Negative"  # Average prediction for sad and fear
]

# Display the average prediction DataFrame
average_df


Unnamed: 0,Mean_Rating0,Mean_Rating0_Match,Mean_Rating0_No_Match,Mean_Rating0_Match_Happy,Mean_Rating0_No_Match_Happy,Mean_Rating0_Match_Sad,Mean_Rating0_No_Match_Sad,Mean_Rating0_Match_Fear,Mean_Rating0_No_Match_Fear,Mean_Rating0_Match_Negative,Mean_Rating0_No_Match_Negative,Mean_Rating0_Happy,Mean_Rating0_Sad,Mean_Rating0_Fear,Mean_Rating0_Negative
0,2.888889,3.555556,1.888889,3.888889,1.500000,3.555556,2.166667,3.222222,2.000000,3.388889,2.083333,3.200000,2.733333,2.733333,2.733333
1,2.533333,3.000000,1.833333,3.333333,1.333333,2.555556,1.833333,3.111111,2.333333,2.833333,2.083333,2.733333,2.066667,2.800000,2.433333
2,2.311111,3.000000,1.277778,3.555556,1.166667,2.666667,1.500000,2.777778,1.166667,2.722222,1.333333,2.733333,2.066667,2.133333,2.100000
3,2.555556,3.148148,1.666667,3.222222,1.000000,3.000000,2.666667,3.222222,1.333333,3.111111,2.000000,3.000000,2.200000,2.466667,2.333333
4,2.266667,2.851852,1.388889,3.666667,1.166667,2.666667,1.333333,2.222222,1.666667,2.444444,1.500000,2.733333,2.066667,2.000000,2.033333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,2.088889,2.555556,1.388889,3.111111,1.500000,2.111111,1.333333,2.444444,1.333333,2.277778,1.333333,2.400000,1.866667,2.000000,1.933333
146,2.409091,3.076923,1.444444,3.666667,1.166667,2.750000,1.333333,2.777778,1.833333,2.763889,1.583333,2.733333,2.071429,2.400000,2.235714
147,2.613636,3.230769,1.722222,3.625000,1.000000,2.888889,1.833333,3.222222,2.333333,3.055556,2.083333,2.857143,2.133333,2.866667,2.500000
148,2.590909,3.222222,1.588235,3.777778,1.333333,3.000000,1.666667,2.888889,1.800000,2.944444,1.733333,2.933333,2.333333,2.500000,2.416667


### Difference Match No Match


In [26]:
# Calculate the differences between matched and non-matched average predictions
dif_match = average_prediction_match - average_prediction_no_match

# Calculate the differences between matched and non-matched average predictions for each emotion
dif_happy = average_prediction_happy_match - average_prediction_happy_no_match
dif_sad = average_prediction_sad_match - average_prediction_sad_no_match
dif_fear = average_prediction_fear_match - average_prediction_fear_no_match

# Create a DataFrame to store the differences
dif_df = pd.DataFrame([
    dif_match,
    dif_happy,
    dif_sad,
    dif_fear,
    (dif_sad + dif_fear) / 2  # Average difference for negative emotions (sad and fear)
]).transpose()

# Assign column names to the differences DataFrame
dif_df.columns = [
    "Dif_Match",
    "Dif_Happy",
    "Dif_Sad",
    "Dif_Fear",
    "Dif_Negative"  # Average difference for negative emotions (sad and fear)
]

# Display the differences DataFrame
dif_df


Unnamed: 0,Dif_Match,Dif_Happy,Dif_Sad,Dif_Fear,Dif_Negative
0,1.666667,2.388889,1.388889,1.222222,1.305556
1,1.166667,2.000000,0.722222,0.777778,0.750000
2,1.722222,2.388889,1.166667,1.611111,1.388889
3,1.481481,2.222222,0.333333,1.888889,1.111111
4,1.462963,2.500000,1.333333,0.555556,0.944444
...,...,...,...,...,...
145,1.166667,1.611111,0.777778,1.111111,0.944444
146,1.632479,2.500000,1.416667,0.944444,1.180556
147,1.508547,2.625000,1.055556,0.888889,0.972222
148,1.633987,2.444444,1.333333,1.088889,1.211111


In [27]:
theory_df = pd.concat([average_df,dif_df,correlation_df],axis=1)


## Joining Both Datasets

In [28]:
full_dataset = pd.concat([theory_df,time_df.drop("Subject",axis=1)],axis=1)
full_dataset.insert(0,"Subject",RETOS_BEBRASK_dataset["DataFile.Basename"])
full_dataset.to_excel('../Clustering_Predictive_Processing/All_Features_dataset.xlsx', index=False)