# Analysis Code

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import statsmodels.api as sm

<h1> Logistic Regression</h1>

<p>Now based off the computed differences of the different criteria. We now will use a logistic regression to analyze
where if these differences are able to predict the probability of choosing the left monster (could also be right, this choice
is just arbitrary). In simpler terms, by using this statistical model, can we see if these criteria actually play a role in how they choose a monster.</p> 

In [14]:
dataset_folder = os.listdir('ProcessedData_ForAnalysis')

for participant_data in dataset_folder:
    data = pd.read_csv(f'ProcessedData_ForAnalysis/{participant_data}')
    data.drop(data.columns[0], axis=1) #there was a duplicate of the trial_index column called unnamed, dropping it here

    ppID = ''.join(character for character in participant_data if character.isdigit())

    #run the logistic regression model for each participant

    #defining variables
    independent_vars = data[['Color_d', 'Cuteness_d', 'Emotions(Valence)_d', 'Emotions(Intensity)_d', 'Personal_Preference_d', 'Shape_d','Size_d']]
    dependent_var = data['Chosen_Monster']

    #adding intercept
    intercept = sm.add_constant(independent_vars)

    logistic_model = sm.Logit(dependent_var, independent_vars)
    result = logistic_model.fit()

    model_result = pd.DataFrame(result.summary2().tables[1]) #create a dataframe to save the data
    model_result = model_result.reset_index() #turns the index column with our predictor names into a normal column

    #rename the last two columns
    col_renaming = {
        'index' : 'Predictors',
        '[0.025': 'Lower Bound',
        '0.975]': 'Upper Bound',
        'P>|z|' : 'pvalues'
    }

    model_result.rename(columns=col_renaming, inplace=True)

    probabilities = result.predict(independent_vars)


    #save the model results for each participant
    model_result.to_csv(f'LogisticModel_Results/LogisticM_Result_{ppID}.csv')


Optimization terminated successfully.
         Current function value: 0.455588
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.411430
         Iterations 6


In [13]:
model_result

Unnamed: 0,Predictors,Coef.,Std.Err.,z,pvalues,Lower Bound,Upper Bound
0,Color_d,0.004326,0.001498,2.887685,0.003881,0.00139,0.007263
1,Cuteness_d,0.003615,0.001405,2.572704,0.010091,0.000861,0.006369
2,Emotions(Valence)_d,0.000458,0.001285,0.356465,0.721492,-0.002061,0.002977
3,Emotions(Intensity)_d,-0.001392,0.001315,-1.05805,0.290033,-0.00397,0.001186
4,Personal_Preference_d,0.000134,0.001043,0.128236,0.897962,-0.001911,0.002179
5,Shape_d,0.00015,0.001428,0.104781,0.916549,-0.002649,0.002948
6,Size_d,0.001337,0.001195,1.118574,0.263322,-0.001006,0.00368


In [None]:
pd.DataFrame(result.summary2().tables[1])

Unnamed: 0,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Color_d,0.002943,0.002192,1.342426,0.179458,-0.001354,0.007239
Cuteness_d,-0.008977,0.003879,-2.314099,0.020662,-0.016581,-0.001374
Emotions_d,0.005814,0.002553,2.277662,0.022747,0.000811,0.010818
Personal_Preference_d,-0.016909,0.005552,-3.045306,0.002324,-0.027792,-0.006026
Shape_d,0.002047,0.002037,1.005122,0.314838,-0.001945,0.006039
Size_d,0.00197,0.001961,1.004606,0.315087,-0.001873,0.005813


<h1>Vector Correlation</h1>

<p>For each participant you have a vector of the number of time each monster was chosen. You just correlate those vectors across participants (e.g. [4 6 2 4 6 1 4 7 3... 3] for participant one and [5 2 4 6 1 4 2 6 3 ... 2] for participant two). The length for these vectors should  correspond to the number of monsters you have. The correlation should tell you how similar the likability profiles are across the two participants. 
 
If you have a set of participants you will end up with a matrix where each matrix entry ij will tell you how similar the ratings of participants i and j are to each other
</p>

In [47]:
#lets combine the participant dfs
folder_name = 'JatosExperiment_Results'
folder = os.listdir(folder_name)
n_participants = len(folder)

combined_df = pd.DataFrame([])

for index, file_name in enumerate(folder):
    data = pd.read_csv(f'{folder_name}/{file_name}')

    colstodrop = ['stimulus', 'url','success','plugin_version','view_history','sort_criteria', 'init_locations',
                  'final_locations', 'question_order','moves']

    firstBlock = data.loc[(data['trial_index'] >= 4) & (data['trial_index'] <=94)]
    firstBlock = firstBlock.drop(colstodrop, axis=1)

    #add participant ID for grouping purposes

    #create a new column which will store the monster chosen by participant
    insert_position = firstBlock.columns.get_loc('rightstim') + 1 #i want the chosen monster column to be inserted after the rightstim column
    firstBlock.insert(insert_position, 'Monster_Picked', 0) #named it to Monster_Picked to avoid confusion with the Chosen_Monster column with 1s and 0s
    firstBlock.insert(index, 'Participant_ID', index + 1)

    #assign the value of the chosen monster based off the key press
    firstBlock.loc[firstBlock['response'] == "ArrowRight", 'Monster_Picked'] = firstBlock['rightstim']
    firstBlock.loc[firstBlock['response'] == "ArrowLeft", 'Monster_Picked'] = firstBlock['leftstim']

    #the combined df will hold all the data for all the participants
    combined_df = pd.concat([combined_df,firstBlock], ignore_index=True)


In [79]:
combined_df

Unnamed: 0,Participant_ID,rt,response,trial_type,trial_index,time_elapsed,design_file,leftstim,rightstim,Monster_Picked
0,1,7357,ArrowLeft,html-keyboard-response,4,350412,PP003.csv,obj12.png,obj11.png,obj12.png
1,1,2967,ArrowRight,html-keyboard-response,5,354786,PP003.csv,obj14.png,obj06.png,obj06.png
2,1,2576,ArrowLeft,html-keyboard-response,6,358768,PP003.csv,obj04.png,obj12.png,obj04.png
3,1,15224,ArrowRight,html-keyboard-response,7,375398,PP003.csv,obj03.png,obj00.png,obj00.png
4,1,4147,ArrowLeft,html-keyboard-response,8,380950,PP003.csv,obj01.png,obj09.png,obj01.png
...,...,...,...,...,...,...,...,...,...,...
814,9,797,ArrowLeft,html-keyboard-response,90,283468,PP001.csv,obj03.png,obj01.png,obj03.png
815,9,704,ArrowRight,html-keyboard-response,91,285589,PP001.csv,obj03.png,obj04.png,obj04.png
816,9,583,ArrowRight,html-keyboard-response,92,287599,PP001.csv,obj09.png,obj11.png,obj11.png
817,9,538,ArrowLeft,html-keyboard-response,93,289554,PP001.csv,obj05.png,obj02.png,obj05.png


In [81]:
aggregated_df = combined_df.groupby(['Participant_ID', 'Monster_Picked']).size().reset_index(name='Count')
aggregated_df

Unnamed: 0,Participant_ID,Monster_Picked,Count
0,1,obj00.png,5
1,1,obj01.png,12
2,1,obj02.png,6
3,1,obj03.png,4
4,1,obj04.png,10
...,...,...,...
117,9,obj09.png,5
118,9,obj11.png,11
119,9,obj12.png,11
120,9,obj14.png,3


In [98]:
#correlation matrix for monsters
#pivot the aggregated dataframe 
df_pivot = aggregated_df.pivot(index='Participant_ID', columns='Monster_Picked', values='Count').fillna(0)
#compute the correlation matrix
correlation_matrix = df_pivot.corr(method="pearson")

correlation_matrix.to_csv('Monsters_Correlation_Matrix.csv')

In [99]:
##corelation matrix for participants
#pivot the aggregated dataframe 
df_pivot = aggregated_df.pivot(index='Participant_ID', columns='Monster_Picked', values='Count').fillna(0)
#compute the correlation matrix
correlation_matrix = df_pivot.T.corr(method="pearson")

correlation_matrix.to_csv('ParticipantChoice_Correlation_Matrix.csv')

In [89]:
correlation_matrix

Participant_ID,1,2,3,4,5,6,7,8,9
Participant_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1.0,0.319757,-0.396939,0.589499,-0.085208,0.469317,0.538877,0.282247,0.045927
2,0.319757,1.0,0.091095,0.655616,0.014493,-0.068539,0.415073,0.368666,0.24217
3,-0.396939,0.091095,1.0,-0.117192,-0.087332,-0.256806,-0.451012,-0.547351,-0.303827
4,0.589499,0.655616,-0.117192,1.0,0.155379,0.292967,0.44475,0.519651,0.343102
5,-0.085208,0.014493,-0.087332,0.155379,1.0,0.185178,-0.093897,0.393323,0.1078
6,0.469317,-0.068539,-0.256806,0.292967,0.185178,1.0,-0.193508,0.072305,-0.151326
7,0.538877,0.415073,-0.451012,0.44475,-0.093897,-0.193508,1.0,0.564383,0.285017
8,0.282247,0.368666,-0.547351,0.519651,0.393323,0.072305,0.564383,1.0,0.480734
9,0.045927,0.24217,-0.303827,0.343102,0.1078,-0.151326,0.285017,0.480734,1.0


<h1>Linear Regression </h1>

<p>Predict personal preference from the other predictors</p>