# Analysis Code

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import statsmodels.api as sm

<h1> Logistic Regression</h1>

<p>Now based off the computed differences of the different criteria. We now will use a logistic regression to analyze
where if these differences are able to predict the probability of choosing the left monster (could also be right, this choice
is just arbitrary). In simpler terms, by using this statistical model, can we see if these criteria actually play a role in how they choose a monster.</p> 

In [105]:
dataset_folder = os.listdir('ProcessedData_ForAnalysis')

for participant_data in dataset_folder:
    data = pd.read_csv(f'ProcessedData_ForAnalysis/{participant_data}')
    data.drop(data.columns[0], axis=1) #there was a duplicate of the trial_index column called unnamed, dropping it here

    ppID = ''.join(character for character in participant_data if character.isdigit())

    #run the logistic regression model for each participant

    #defining variables
    independent_vars = data[['Color_d', 'Cuteness_d', 'Emotions_d', 'Personal_Preference_d', 'Shape_d','Size_d']]
    dependent_var = data['Chosen_Monster']

    #adding intercept
    intercept = sm.add_constant(independent_vars)

    logistic_model = sm.Logit(dependent_var, independent_vars)
    result = logistic_model.fit()

    model_result = pd.DataFrame(result.summary2().tables[1]) #create a dataframe to save the data
    model_result = model_result.reset_index() #turns the index column with our predictor names into a normal column

    #rename the last two columns
    col_renaming = {
        'index' : 'Predictors',
        '[0.025': 'Lower Bound',
        '0.975]': 'Upper Bound',
        'P>|z|' : 'pvalues'
    }

    model_result.rename(columns=col_renaming, inplace=True)

    probabilities = result.predict(independent_vars)


    #save the model results for each participant
    model_result.to_csv(f'LogisticModel_Results/LogisticM_Result_{ppID}.csv')


Optimization terminated successfully.
         Current function value: 0.596684
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.163041
         Iterations 9


In [106]:
model_result

Unnamed: 0,Predictors,Coef.,Std.Err.,z,pvalues,Lower Bound,Upper Bound
0,Color_d,0.002943,0.002192,1.342426,0.179458,-0.001354,0.007239
1,Cuteness_d,-0.008977,0.003879,-2.314099,0.020662,-0.016581,-0.001374
2,Emotions_d,0.005814,0.002553,2.277662,0.022747,0.000811,0.010818
3,Personal_Preference_d,-0.016909,0.005552,-3.045306,0.002324,-0.027792,-0.006026
4,Shape_d,0.002047,0.002037,1.005122,0.314838,-0.001945,0.006039
5,Size_d,0.00197,0.001961,1.004606,0.315087,-0.001873,0.005813


In [None]:
pd.DataFrame(result.summary2().tables[1])

Unnamed: 0,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Color_d,0.002943,0.002192,1.342426,0.179458,-0.001354,0.007239
Cuteness_d,-0.008977,0.003879,-2.314099,0.020662,-0.016581,-0.001374
Emotions_d,0.005814,0.002553,2.277662,0.022747,0.000811,0.010818
Personal_Preference_d,-0.016909,0.005552,-3.045306,0.002324,-0.027792,-0.006026
Shape_d,0.002047,0.002037,1.005122,0.314838,-0.001945,0.006039
Size_d,0.00197,0.001961,1.004606,0.315087,-0.001873,0.005813
