In [17]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
from pylab import rcParams
from scipy import stats

import pingouin as pg

from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_kmo

from scipy.stats import f_oneway
from scipy.stats import ttest_ind
import statsmodels.api as sm
from statsmodels.formula.api import ols
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)

In [18]:
%matplotlib inline
warnings.filterwarnings("ignore")
rcParams['figure.figsize'] = 20,10
rcParams['font.size'] = 30
sns.set()
np.random.seed(8)

In [19]:
# Selected recipes
s1 = pd.read_csv("./Pro_1/selectedrecipes.csv")
s2 = pd.read_csv("./Pro_2/selectedrecipes.csv")
s3 = pd.read_csv("./Pro_3/selectedrecipes.csv")


# Demographic
demo1 = pd.read_csv("./Pro_1/demographics.csv")
demo2 = pd.read_csv("./Pro_2/demographics.csv")
demo3 = pd.read_csv("./Pro_3/demographics.csv")
md = pd.concat([demo1, demo2, demo3])

In [20]:
# Calculate mean for choice satisfaction for each participant
choice_sat = s1.loc[: , "choice_sat_q1":"choice_sat_q3"]
s1['choice_sat_mean'] = choice_sat.mean(axis=1)

choice_dif2 = s2.loc[: , "choice_sat_q1":"choice_sat_q3"] 
s2['choice_sat_mean'] = choice_dif2.mean(axis=1)

choice_dif3 = s3.loc[: , "choice_sat_q1":"choice_sat_q3"] 
s3['choice_sat_mean'] = choice_dif3.mean(axis=1)

In [21]:
# Add all data to one DF
ms = pd.concat([s1, s2, s3])

# Balance data (remove where user_id only selected 2 recipes)
ms = ms[ms['user_id'].map(ms['user_id'].value_counts()) > 2]

In [22]:
# Create a new column with names for each label
def label_name (row):
    if row['condition'] == 1 :
        return 'Nutriscore'
    if row['condition'] == 2 :
        return 'MTL'
    if row['condition'] == 3 :
        return 'No-label'

def rank_name (row):
    if row['variant'] == 1 :
        return 'Popular'
    if row['variant'] == 2 :
        return 'Random'
    if row['variant'] == 3 :
        return 'Health'

def numeric_nutriscore (row):
    if row['nutri_score'] == 'A' :
        return 1
    if row['nutri_score'] == 'B' :
        return 2
    if row['nutri_score'] == 'C' :
        return 3
    if row['nutri_score'] == 'D' :
        return 4
    if row['nutri_score'] == 'E' :
        return 5


ms['Label'] = ms.apply(lambda row: label_name(row), axis=1)
ms['Rank'] = ms.apply(lambda row: rank_name(row), axis=1)
ms['num_nutriscore'] = ms.apply(lambda row: numeric_nutriscore(row), axis=1)

In [24]:
ms.to_csv("ms_choice_sat.csv")

# Factoral analysis
* Determine if we can use the mean of the 3 choice satisfaction questions

http://www.let.rug.nl/nerbonne/teach/rema-stats-meth-seminar/student-papers/MHof-QuestionnaireEvaluation-2012-Cronbach-FactAnalysis.pdf

In [8]:
choice_sat = ms[["choice_sat_q1","choice_sat_q2", "choice_sat_q3"]]

kmo_all,kmo_model=calculate_kmo(choice_sat)

fa = FactorAnalyzer(1)
fa.fit(choice_sat)

loadings = pd.DataFrame(fa.loadings_, columns=['Factor 1'], index=choice_sat.columns)
print(kmo_all)
print(kmo_model)
print('Factor Loadings \n%s' %loadings)

loadings = pd.DataFrame(loadings)
loadings
print(loadings.to_latex(index=False))

[0.71804461 0.66000349 0.73525222]
0.7003136740267455
Factor Loadings 
               Factor 1
choice_sat_q1 -0.730872
choice_sat_q2 -0.859774
choice_sat_q3 -0.709326
\begin{tabular}{r}
\toprule
 Factor 1 \\
\midrule
-0.730872 \\
-0.859774 \\
-0.709326 \\
\bottomrule
\end{tabular}



## Cronbach’s Alpha
http://www.let.rug.nl/nerbonne/teach/rema-stats-meth-seminar/student-papers/MHof-QuestionnaireEvaluation-2012-Cronbach-FactAnalysis.pdf

In [45]:
# Choice satisfaction questions
choice_sat = ms[["choice_sat_q1","choice_sat_q2", "choice_sat_q3"]]
pg.cronbach_alpha(data=choice_sat)

(0.8059620045840863, array([0.791, 0.82 ]))

* The alpha coefficient for the four items is .805, suggesting that the items have relatively high internal consistency.  (Note that a reliability coefficient of .70 or higher is considered “acceptable” in most social science research situations.)
    * https://stats.idre.ucla.edu/spss/faq/what-does-cronbachs-alpha-mean/

* We used 

In [None]:
# Compare responses from Prolific and Mturk