In [1]:
import pandas as pd
import numpy as np


In [2]:
RETOS_BEBRASK_dataset = pd.read_excel("RETOS_BEBRASK_long.xlsx")

In [3]:
def slope_intercept_regression(time_series):
    slopes = np.zeros(time_series.shape[0])
    intercepts = np.zeros(time_series.shape[0])
    
    # Original time stamps as the independent variable
    X_original = np.arange(1, time_series.shape[1] + 1)
    
    # Loop over each subject to fit a linear model and extract the slope
    for i in range(time_series.shape[0]):
        # Time series values for the current subject, dropping NaNs
        Y = time_series.iloc[i, :].dropna().values
    
        # Ensure Y is of numeric type
        Y = pd.to_numeric(Y, errors='coerce').astype('float64')
    
        # Filter X based on the non-NaN entries of Y to maintain correspondence
        X_filtered = X_original[~time_series.iloc[i, :].isna()]
    
        # Ensure X_filtered is of numeric type
        X_filtered = pd.to_numeric(X_filtered, errors='coerce').astype('float64')
    
        # Check again after conversion to avoid fitting a model with insufficient data
        if len(Y) > 1 and not np.isnan(Y).all():
            slope, intercept = np.polyfit(X_filtered, Y, 1)  # Fit a linear model
            slopes[i] = slope  # Store the slope
            intercepts[i] = intercept
        else:
            slopes[i] = np.nan  # Assign NaN if not enough data points or if conversion resulted in NaNs
            intercepts[i] = np.nan
    return slopes,intercepts

## Time series Features

Getting info on predictability

In [4]:
subject_id = RETOS_BEBRASK_dataset.iloc[:, 0]
subject_timeseries = RETOS_BEBRASK_dataset.iloc[:, 1:46]

rating_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Rating0' in col]
fulfilled_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Fulfilled' in col]
emotions_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'EvokedEmotion' in col]

subjects = RETOS_BEBRASK_dataset.index

df_rating_0 = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(45)])

df_fulfilled_1 = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(30)])
df_fulfilled_0 = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(20)])

# Initialize the DataFrames for each emotion with 'Fulfilled' status
df_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])

df_no_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_no_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_no_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])

# Iterate through each subject
for subject in subjects:
    ratings = []
    # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0
    ratings_1,ratings_0 = [], []
        # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0 for each emotion
    ratings_1_happy, ratings_1_sad, ratings_1_fear = [], [], []
    ratings_0_happy, ratings_0_sad, ratings_0_fear = [], [], []


    # Iterate through each 'Fulfilled' and 'Rating0' column pair
    for fulfilled_col, rating_col, emotion_col in zip(fulfilled_columns, rating_columns, emotions_columns):
        fulfilled_status = RETOS_BEBRASK_dataset.loc[subject, fulfilled_col]
        rating_value = RETOS_BEBRASK_dataset.loc[subject, rating_col]
        emotion_value = RETOS_BEBRASK_dataset.loc[subject, emotion_col]
        ratings.append(rating_value)
        if fulfilled_status == 1:
            ratings_1.append(rating_value)

            if emotion_value == "happiness":
                ratings_1_happy.append(rating_value)
            elif emotion_value == "sadness":
                ratings_1_sad.append(rating_value)
            elif emotion_value == "fear":
                ratings_1_fear.append(rating_value)
        elif fulfilled_status == 0:
            ratings_0.append(rating_value)

            if emotion_value == "happiness":
                ratings_0_happy.append(rating_value)
            elif emotion_value == "sadness":
                ratings_0_sad.append(rating_value)
            elif emotion_value == "fear":
                ratings_0_fear.append(rating_value)
    
    # Extend lists with NaN values if they are shorter than required lengths
    
    ratings_1.extend([np.nan] * (30 - len(ratings_1)))
    ratings_0.extend([np.nan] * (20 - len(ratings_0)))

    ratings_1_happy.extend([np.nan] * (11 - len(ratings_1_happy)))
    ratings_1_sad.extend([np.nan] * (11 - len(ratings_1_sad)))
    ratings_1_fear.extend([np.nan] * (11 - len(ratings_1_fear)))

    ratings_0_happy.extend([np.nan] * (7 - len(ratings_0_happy)))
    ratings_0_sad.extend([np.nan] * (7 - len(ratings_0_sad)))
    ratings_0_fear.extend([np.nan] * (7 - len(ratings_0_fear)))

    # Insert lists into their respective DataFrames for the current subject
    df_rating_0.loc[subject, :] = ratings
    
    df_fulfilled_1.loc[subject, :] = ratings_1
    df_fulfilled_0.loc[subject, :] = ratings_0

    df_fulfilled_happy.loc[subject, :] = ratings_1_happy
    df_fulfilled_sad.loc[subject, :] = ratings_1_sad
    df_fulfilled_fear.loc[subject, :] = ratings_1_fear

    df_no_fulfilled_happy.loc[subject, :] = ratings_0_happy
    df_no_fulfilled_sad.loc[subject, :] = ratings_0_sad
    df_no_fulfilled_fear.loc[subject, :] = ratings_0_fear

In [5]:
slopes_match, intercepts_match= slope_intercept_regression(df_fulfilled_1)
slopes_no_match, intercepts_no_match= slope_intercept_regression(df_fulfilled_0)

slopes_match_happy, intercepts_match_happy = slope_intercept_regression(df_fulfilled_happy)
slopes_match_no_happy, intercepts_match_no_happy = slope_intercept_regression(df_no_fulfilled_happy)

slopes_match_sad, intercepts_match_sad = slope_intercept_regression(df_fulfilled_sad)
slopes_match_no_sad, intercepts_match_no_sad = slope_intercept_regression(df_no_fulfilled_sad)

slopes_match_fear, intercepts_match_fear = slope_intercept_regression(df_fulfilled_fear)
slopes_match_no_fear, intercepts_match_no_fear = slope_intercept_regression(df_no_fulfilled_fear)


In [6]:
time_df = pd.DataFrame([slopes_match,slopes_no_match,intercepts_match,intercepts_no_match,slopes_match_happy,slopes_match_no_happy,intercepts_match_happy,intercepts_match_no_happy,slopes_match_sad,slopes_match_no_sad,intercepts_match_sad,intercepts_match_no_sad,
                               slopes_match_fear,slopes_match_no_fear,intercepts_match_fear,intercepts_match_no_fear,(slopes_match_sad+slopes_match_fear)/2,(slopes_match_no_sad+slopes_match_no_fear)/2,(intercepts_match_sad+intercepts_match_fear)/2,(intercepts_match_no_fear+intercepts_match_no_sad)/2]).transpose()

time_df.columns = ["Trend_Match","Trend_No_Match","Intercept_Match","Intercept_No_Match","Trend_Match_Happy","Trend_No_Match_Happy","Intercept_Match_Happy","Intercept_No_Match_Happy","Trend_Match_Sad","Trend_No_Match_Sad","Intercept_Match_Sad","Intercept_No_Match_Sad",
                   "Trend_Match_Fear","Trend_No_Match_Fear","Intercept_Match_Fear","Intercept_No_Match_Fear","Trend_Match_Negative","Trend_No_Match_Negative","Intercept_Match_Negative","Intercept_No_Match_Negative"]

time_df["Final_Value_Match"] = time_df["Intercept_Match"] + time_df["Trend_Match"]*27
time_df["Final_Value_No_Match"] = time_df["Intercept_No_Match"] + time_df["Trend_No_Match"]*18

time_df["Final_Value_Match_Happy"] = time_df["Intercept_Match_Happy"] + time_df[
    "Trend_Match_Happy"] * 9
time_df["Final_Value_No_Match_Happy"] = time_df["Intercept_No_Match_Happy"] + time_df[
    "Trend_No_Match_Happy"] * 6

time_df["Final_Value_Match_Sad"] = time_df["Intercept_Match_Sad"] + time_df["Trend_Match_Sad"] * 9
time_df["Final_Value_No_Match_Sad"] = time_df["Intercept_No_Match_Sad"] + time_df[
    "Trend_No_Match_Sad"] * 6

time_df["Final_Value_Match_Fear"] = time_df["Intercept_Match_Fear"] + time_df[
    "Trend_Match_Fear"] * 9
time_df["Final_Value_No_Match_Fear"] = time_df["Intercept_No_Match_Fear"] + time_df[
    "Trend_No_Match_Fear"] * 6

time_df["Final_Value_Match_Negative"] = time_df["Intercept_Match_Negative"] + time_df[
    "Trend_Match_Negative"] * 9
time_df["Final_Value_No_Match_Negative"] = time_df["Intercept_No_Match_Negative"] + time_df[
    "Trend_No_Match_Negative"] * 6


## Theory Driven Features

Getting info on likeability

In [7]:
fulfilled_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Fulfilled' in col]
emotions_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'EvokedEmotion' in col]
likeability_columns = [col for col in RETOS_BEBRASK_dataset.columns if 'Rating.' in col]

subjects = RETOS_BEBRASK_dataset.index


df_likeability = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(45)])

df_likeability_fulfilled = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(30)])
df_likeability_no_fulfilled = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(20)])

# Initialize the DataFrames for each emotion with 'Fulfilled' status
df_likeability_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_likeability_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])
df_likeability_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(11)])

df_likeability_no_fulfilled_happy = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_likeability_no_fulfilled_sad = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])
df_likeability_no_fulfilled_fear = pd.DataFrame(index=subjects, columns=[f"Time_{i+1}" for i in range(7)])

# Iterate through each subject
for subject in subjects:
    likeability = []
    # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0
    likeability_1,likeability_0 = [], []
        # Initialize empty lists for 'Fulfilled' = 1 and 'Fulfilled' = 0 for each emotion
    likeability_1_happy, likeability_1_sad, likeability_1_fear = [], [], []
    likeability_0_happy, likeability_0_sad, likeability_0_fear = [], [], []


    # Iterate through each 'Fulfilled' and 'Rating0' column pair
    for fulfilled_col, likeability_col, emotion_col in zip(fulfilled_columns, likeability_columns, emotions_columns):
        fulfilled_status = RETOS_BEBRASK_dataset.loc[subject, fulfilled_col]
        rating_value = RETOS_BEBRASK_dataset.loc[subject, likeability_col]
        emotion_value = RETOS_BEBRASK_dataset.loc[subject, emotion_col]
        likeability.append(rating_value)
        if fulfilled_status == 1:
            likeability_1.append(rating_value)

            if emotion_value == "happiness":
                likeability_1_happy.append(rating_value)
            elif emotion_value == "sadness":
                likeability_1_sad.append(rating_value)
            elif emotion_value == "fear":
                likeability_1_fear.append(rating_value)
        elif fulfilled_status == 0:
            likeability_0.append(rating_value)

            if emotion_value == "happiness":
                likeability_0_happy.append(rating_value)
            elif emotion_value == "sadness":
                likeability_0_sad.append(rating_value)
            elif emotion_value == "fear":
                likeability_0_fear.append(rating_value)
    
    # Extend lists with NaN values if they are shorter than required lengths
    
    likeability_1.extend([np.nan] * (30 - len(likeability_1)))
    likeability_0.extend([np.nan] * (20 - len(likeability_0)))

    likeability_1_happy.extend([np.nan] * (11 - len(likeability_1_happy)))
    likeability_1_sad.extend([np.nan] * (11 - len(likeability_1_sad)))
    likeability_1_fear.extend([np.nan] * (11 - len(likeability_1_fear)))

    likeability_0_happy.extend([np.nan] * (7 - len(likeability_0_happy)))
    likeability_0_sad.extend([np.nan] * (7 - len(likeability_0_sad)))
    likeability_0_fear.extend([np.nan] * (7 - len(likeability_0_fear)))

    # Insert lists into their respective DataFrames for the current subject
    df_likeability.loc[subject, :] = likeability
    
    df_likeability_fulfilled.loc[subject, :] = likeability_1
    df_likeability_no_fulfilled.loc[subject, :] = likeability_0

    df_likeability_fulfilled_happy.loc[subject, :] = likeability_1_happy
    df_likeability_fulfilled_sad.loc[subject, :] = likeability_1_sad
    df_likeability_fulfilled_fear.loc[subject, :] = likeability_1_fear

    df_likeability_no_fulfilled_happy.loc[subject, :] = likeability_0_happy
    df_likeability_no_fulfilled_sad.loc[subject, :] = likeability_0_sad
    df_likeability_no_fulfilled_fear.loc[subject, :] = likeability_0_fear

#### Correlations

In [12]:
(np.array(correlations_sad_match)+np.array(correlations_fear_match))/2

array([ 0.26955178,  0.22752307,  0.26931621, -0.14      , -0.30013572,
        0.60355339, -0.1434192 ,  0.16866188,  0.27753775,  0.2577625 ,
        0.13289671, -0.01798694, -0.35575624,  0.47947607,  0.09690108,
        0.36242341,  0.46105375,  0.43449976,  0.03722805, -0.29733116,
        0.46498133,  0.36950483, -0.00891093,  0.20791729,  0.93223732,
        0.09449112,  0.0144593 ,  0.78433361,  0.24876374, -0.46984964,
       -0.15857473, -0.375     ,  0.47437724, -0.01495552,  0.22613351,
        0.42892205, -0.10660036, -0.27386128,  0.0907458 ,  0.46535679,
       -0.275     ,  0.80975126,  0.24268134,  0.33525477,  0.31035288,
        0.70162774,  0.53395864,  0.55624169, -0.43680751, -0.3079493 ,
        0.36628552,  0.4527904 ,  0.07134067,  0.69643997,  0.275     ,
       -0.51434297, -0.4267767 , -0.49846571, -0.23150324, -0.15222396,
        0.33771919,  0.19780909,  0.        , -0.52119589, -0.05252488,
       -0.17290814, -0.25802342,  0.37067691, -0.27132163,  0.43

In [13]:
def row_correlation(row1, row2):
    # Check if the standard deviation is zero
    if row1.std() == 0 or row2.std() == 0:
        # Handle the constant data case here, e.g., return 1 or None
        return 1 if row1.equals(row2) else 0
    return row1.corr(row2)
# Apply the function row-wise using the DataFrame `apply` method
correlations_all = [row_correlation(df_likeability.loc[index], df_rating_0.loc[index]) for index in df_likeability.index]
correlations_match = [row_correlation(df_likeability_fulfilled.loc[index], df_fulfilled_1.loc[index]) for index in df_likeability_fulfilled.index]
correlations_no_match = [row_correlation(df_likeability_no_fulfilled.loc[index], df_fulfilled_0.loc[index]) for index in df_likeability_no_fulfilled.index]
correlations_happy_match = [row_correlation(df_likeability_fulfilled_happy.loc[index], df_fulfilled_happy.loc[index]) for index in df_likeability_fulfilled_happy.index]
correlations_happy_no_match = [row_correlation(df_likeability_no_fulfilled_happy.loc[index], df_no_fulfilled_happy.loc[index]) for index in df_likeability_no_fulfilled_happy.index]
correlations_sad_match = [row_correlation(df_likeability_fulfilled_sad.loc[index], df_fulfilled_sad.loc[index]) for index in df_likeability_fulfilled_sad.index]
correlations_sad_no_match = [row_correlation(df_likeability_no_fulfilled_sad.loc[index], df_no_fulfilled_sad.loc[index]) for index in df_likeability_no_fulfilled_sad.index]
correlations_fear_match = [row_correlation(df_likeability_fulfilled_fear.loc[index], df_fulfilled_fear.loc[index]) for index in df_likeability_fulfilled_fear.index]
correlations_fear_no_match = [row_correlation(df_likeability_no_fulfilled_fear.loc[index], df_no_fulfilled_fear.loc[index]) for index in df_likeability_no_fulfilled_fear.index]


correlation_df = pd.DataFrame([correlations_all,correlations_match,correlations_no_match,correlations_happy_match,correlations_happy_no_match,correlations_sad_match,correlations_sad_no_match,correlations_fear_match,correlations_fear_no_match,(np.array(correlations_sad_match)+np.array(correlations_fear_match))/2
,(np.array(correlations_sad_no_match)+np.array(correlations_fear_no_match))/2]).transpose()
correlation_df.columns = ["Cor_Pred_Like","Cor_Pred_Like_Match","Cor_Pred_Like_No_Match","Cor_Pred_Like_Match_Happy","Cor_Pred_Like_No_Match_Happy","Cor_Pred_Like_Match_Sad","Cor_Pred_Like_No_Match_Sad","Cor_Pred_Like_Match_Fear","Cor_Pred_Like_No_Match_Fear","Cor_Pred_Like_Match_Negative","Cor_Pred_Like_No_Match_Negative"]
correlation_df

Unnamed: 0,Cor_Pred_Like,Cor_Pred_Like_Match,Cor_Pred_Like_No_Match,Cor_Pred_Like_Match_Happy,Cor_Pred_Like_No_Match_Happy,Cor_Pred_Like_Match_Sad,Cor_Pred_Like_No_Match_Sad,Cor_Pred_Like_Match_Fear,Cor_Pred_Like_No_Match_Fear,Cor_Pred_Like_Match_Negative,Cor_Pred_Like_No_Match_Negative
0,0.113495,0.209359,0.103807,-2.500000e-01,0.542326,0.342381,-0.061430,0.196722,-0.188982,0.269552,-0.125206
1,0.214781,0.400320,-0.070711,4.902903e-01,-0.632456,0.059761,0.000000,0.395285,0.158114,0.227523,0.079057
2,0.250345,0.432551,0.139859,-2.294157e-01,0.800000,0.230940,0.000000,0.307692,-0.307148,0.269316,-0.153574
3,-0.230171,-0.006600,-0.585369,1.409710e-02,0.000000,0.000000,-0.542326,-0.280000,-0.131306,-0.140000,-0.336816
4,0.176201,0.334275,-0.194348,5.345225e-01,0.415227,0.162221,-0.387298,-0.762493,-0.867722,-0.300136,-0.627510
...,...,...,...,...,...,...,...,...,...,...,...
145,0.255697,0.281127,0.260715,9.433333e-02,0.727607,-0.036564,0.342997,0.463586,-0.316228,0.213511,0.013385
146,0.608424,0.418441,0.346488,2.467162e-17,-0.108465,-0.125000,0.542326,0.304997,0.242536,0.089999,0.392431
147,0.440557,0.508058,0.000000,7.581298e-01,0.000000,0.346423,-0.121566,0.548795,-0.242536,0.447609,-0.182051
148,0.376099,0.486755,-0.258969,1.000000e+00,0.612372,0.000000,-0.800000,0.202031,-0.542326,0.101015,-0.671163


#### Average Rating

In [14]:
average_prediction = df_rating_0.mean(axis=1)

average_prediction_match = df_fulfilled_1.mean(axis=1)
average_prediction_no_match = df_fulfilled_0.mean(axis=1)

average_prediction_happy_match = df_fulfilled_happy.mean(axis=1)
average_prediction_happy_no_match = df_no_fulfilled_happy.mean(axis=1)

average_prediction_sad_match = df_fulfilled_sad.mean(axis=1)
average_prediction_sad_no_match = df_no_fulfilled_sad.mean(axis=1)

average_prediction_fear_match = df_fulfilled_fear.mean(axis=1)
average_prediction_fear_no_match = df_no_fulfilled_fear.mean(axis=1)

average_df = pd.DataFrame([average_prediction,average_prediction_match,average_prediction_no_match,average_prediction_happy_match,average_prediction_happy_no_match,average_prediction_sad_match,average_prediction_sad_no_match,average_prediction_fear_match,average_prediction_fear_no_match,(average_prediction_sad_match+average_prediction_fear_match)/2,(average_prediction_fear_no_match+average_prediction_sad_no_match)/2]).transpose()
average_df.columns = ["Mean_Rating0","Mean_Rating0_Match","Mean_Rating0_No_Match","Mean_Rating0_Match_Happy","Mean_Rating0_No_Match_Happy","Mean_Rating0_Match_Sad","Mean_Rating0_No_Match_Sad","Mean_Rating0_Match_Fear","Mean_Rating0_No_Match_Fear","Mean_Rating0_Match_Negative","Mean_Rating0_No_Match_Negative"]
average_df

Unnamed: 0,Mean_Rating0,Mean_Rating0_Match,Mean_Rating0_No_Match,Mean_Rating0_Match_Happy,Mean_Rating0_No_Match_Happy,Mean_Rating0_Match_Sad,Mean_Rating0_No_Match_Sad,Mean_Rating0_Match_Fear,Mean_Rating0_No_Match_Fear,Mean_Rating0_Match_Negative,Mean_Rating0_No_Match_Negative
0,2.888889,3.555556,1.888889,3.888889,1.500000,3.555556,2.166667,3.222222,2.000000,3.388889,2.083333
1,2.533333,3.000000,1.833333,3.333333,1.333333,2.555556,1.833333,3.111111,2.333333,2.833333,2.083333
2,2.311111,3.000000,1.277778,3.555556,1.166667,2.666667,1.500000,2.777778,1.166667,2.722222,1.333333
3,2.555556,3.148148,1.666667,3.222222,1.000000,3.000000,2.666667,3.222222,1.333333,3.111111,2.000000
4,2.266667,2.851852,1.388889,3.666667,1.166667,2.666667,1.333333,2.222222,1.666667,2.444444,1.500000
...,...,...,...,...,...,...,...,...,...,...,...
145,2.088889,2.555556,1.388889,3.111111,1.500000,2.111111,1.333333,2.444444,1.333333,2.277778,1.333333
146,2.422222,3.074074,1.444444,3.666667,1.166667,2.777778,1.333333,2.777778,1.833333,2.777778,1.583333
147,2.622222,3.222222,1.722222,3.555556,1.000000,2.888889,1.833333,3.222222,2.333333,3.055556,2.083333
148,2.577778,3.222222,1.611111,3.777778,1.333333,3.000000,1.666667,2.888889,1.833333,2.944444,1.750000


#### Difference Match No Match

In [15]:
dif_match = average_prediction_match - average_prediction_no_match

dif_happy = average_prediction_happy_match - average_prediction_happy_no_match
dif_sad = average_prediction_sad_match - average_prediction_sad_no_match
dif_fear = average_prediction_fear_match - average_prediction_fear_no_match



In [16]:
dif_df = pd.DataFrame(
    [dif_match, dif_happy, dif_sad, dif_fear,(dif_sad+dif_fear)/2]).transpose()
dif_df.columns = ["Dif_Match","Dif_Happy","Dif_Sad","Dif_Fear","Dif_Negative"]
dif_df


Unnamed: 0,Dif_Match,Dif_Happy,Dif_Sad,Dif_Fear,Dif_Negative
0,1.666667,2.388889,1.388889,1.222222,1.305556
1,1.166667,2.000000,0.722222,0.777778,0.750000
2,1.722222,2.388889,1.166667,1.611111,1.388889
3,1.481481,2.222222,0.333333,1.888889,1.111111
4,1.462963,2.500000,1.333333,0.555556,0.944444
...,...,...,...,...,...
145,1.166667,1.611111,0.777778,1.111111,0.944444
146,1.629630,2.500000,1.444444,0.944444,1.194444
147,1.500000,2.555556,1.055556,0.888889,0.972222
148,1.611111,2.444444,1.333333,1.055556,1.194444


In [17]:
theory_df = pd.concat([average_df,dif_df,correlation_df],axis=1)

## Joining Both Datasets

In [18]:
full_dataset = pd.concat([theory_df,time_df],axis=1)


In [19]:
RETOS_BEBRASK_dataset["DataFile.Basename"]

0         PREDWELL_RETOS-1-1
1        PREDWELL_RETOS-10-1
2       PREDWELL_RETOS-101-1
3       PREDWELL_RETOS-102-1
4       PREDWELL_RETOS-103-1
               ...          
145    PREDWELL_RETOS-1124-1
146    PREDWELL_RETOS-1125-1
147    PREDWELL_RETOS-1126-1
148    PREDWELL_RETOS-1127-1
149    PREDWELL_RETOS-1128-1
Name: DataFile.Basename, Length: 150, dtype: object

In [20]:
full_dataset.insert(0,"Subject",RETOS_BEBRASK_dataset["DataFile.Basename"])

In [21]:
full_dataset.to_excel('../Clustering_Predictive_Processing/All_Features_dataset.xlsx', index=False)