In [None]:
import numpy as np
import pandas as pd
import pingouin as pg
from scipy import stats
from scipy.stats import chi2_contingency
import seaborn as sns
import matplotlib.pyplot as plt
from analysis_helper import analysis_tables, table_maker

In [None]:
#call_raw_tables
prior, data, numbers, words, participants_with_na_values = analysis_tables()

#call_task_tables
lambda_table = table_maker(3, 2, check_string='lambda')
isinstance_table = table_maker(0, 1, check_string='isinstance')
enumerate_table = table_maker(1, 0, check_string='enumerate')
sum_table = table_maker(2, 3, check_string='sum(')

In [None]:
#pre_analysis_tables
def no_preset_table(table_name):
    no_preset_table = table_name.loc[table_name.change.str.contains('No_preset', na=False),
                                     ['id_user', 'group', 'answer_block0', 'answer_block1', 'delta_time',
                                      'how_helpful', 'how_comfortable']]
    return no_preset_table


def no_set_table(table_name):
    no_set_table = table_name.loc[table_name.change.str.contains('No_set', na=False),
                                  ['id_user', 'group', 'answer_block0', 'answer_block1', 'delta_time',
                                   'how_helpful', 'how_comfortable']]
    return no_set_table


def solution_change_table(table_name, set_type):
    solution_change_table = table_name.loc[(table_name.change.isin([True, False])),
                                     ['id_user', 'group', 'answer_block0', 'answer_block1',
                                      "task_time_block0", "task_time_block1",
                                      'delta_time', 'change', 'how_helpful', 'how_comfortable']]
    solution_change_table['set_type'] = str(set_type)
    return solution_change_table

In [None]:
#to_see_the_tables

#lambda_table
#no_preset_table(lambda_table)
#solution_change_table(lambda_table, "lambda")
#no_set_table(lambda_table)

#isinstance_table
#no_preset_table(isinstance_table)
#no_set_table(isinstance_table)
#solution_change_table(isinstance_table, "isinstance")

#enumerate_table
#no_preset_table(enumerate_table)
#no_set_table(enumerate_table)
#solution_change_table(enumerate_table, "enumerate")

#sum_table
#no_preset_table(sum_table)
#no_set_table(sum_table)
#solution_change_table(sum_table, "sum")

In [None]:
#analysis_tables

#dalta or change corr with «how»questions
def corr_result(table_name, x, y):
    table_name = table_name.loc[table_name.group > 0].copy(deep=True)
    table_name['how_helpful'] = table_name['how_helpful'].astype(int)
    table_name['how_comfortable'] = table_name['how_comfortable'].astype(int)
    pcor = pg.partial_corr(data=table_name, x=x, y=y, covar='group').round(3)
    return pcor


#stat sig of the group effect on change presence
def chi2_result(table_name, set_type):
    data = solution_change_table(table_name, set_type)
    data_crosstab = pd.crosstab(data.group, data.change)
    return chi2_contingency(data_crosstab)


#stat sig of the group effect on delta_time
def anova_result(table_name):
    table_name = table_name[(np.abs(stats.zscore(table_name["delta_time"])) < 3)]
    aov = pg.anova(dv='delta_time', between='group', data=table_name, detailed=True)
    return aov

In [None]:
#plot_months_to_number_of_set_corr
sns.set_theme(style="whitegrid", palette="pastel")
ax = sns.catplot(x="set_count", y="months", kind="box", data=prior)
ax.set(ylabel="Python experience in months", xlabel="Number of exposed sets")

In [None]:
#plot_months_to_types_of_set_corr
participants_prior = prior.loc[(prior.exp == 1)]
participants = pd.DataFrame(participants_prior.set.str.split(',').tolist(), index=participants_prior.username).stack()
participants = participants.reset_index([0, 'username'])
participants.columns = ['username', 'set']
participants = pd.merge(participants_prior, participants, on=['username'], suffixes=('_prior', '_type') )
participants = participants.loc[:, ['username', 'months', 'set_count', 'set_type']]

sns.set_theme(style="whitegrid", palette="pastel")
ax = sns.catplot(x="set_type", y="months", data=participants, kind="box")
ax.set(ylabel="Python experience in months", xlabel="Exposed sets")

In [None]:
#plot_percentage_of_set_types
sns.set_theme(style="whitegrid")
ax = sns.displot(participants, x="set_type", hue="set_type", stat="density", alpha=0.55, legend=False)
ax.set(ylabel="Proportion of exposed sets", xlabel="Exposed sets")

In [None]:
#plot_change_freq_by_group
table1 = solution_change_table(isinstance_table, "isinstance")
table2 = solution_change_table(enumerate_table, "enumerate")
table3 = solution_change_table(lambda_table, "lambda")
table4 = solution_change_table(sum_table, "sum")
all_change_tables = pd.concat([table1, table2, table3, table4])
all_change_tables.group = all_change_tables.group.replace({0: 'Control', 1: 'Change', 2: "Change and Forget"})

sns.set_theme(style="whitegrid", palette="pastel")
g = sns.FacetGrid(all_change_tables, col="group", height=4, aspect=.5)
g.map_dataframe(sns.countplot, "change", order=[True, False])
g.set_axis_labels("Solution change", "Number of cases")
g.axes[0,0].set_xlabel('')
g.axes[0,1].set_xlabel('Solution change', labelpad=10)
g.axes[0,2].set_xlabel('')
g.set_titles(col_template="{col_name}\n group")

In [None]:
#plot_mean_time_for_each_block_by_group
time_table = pd.melt(all_change_tables, id_vars=['id_user', "group"], 
                     value_vars=['task_time_block0', 'task_time_block1'],
                     var_name='step', value_name='time')
time_table = time_table[(np.abs(stats.zscore(time_table["time"])) < 3)]

sns.set_theme(style="whitegrid", palette="pastel")
ax = sns.pointplot(x="step", y="time", hue="group", data=time_table, linestyles=["-", "--", ":"],
                   ci="sd", errwidth=1, capsize=.05, dodge=True)
ax.set(ylabel="Time in seconds", xlabel="Block of tasks")
ax.set_xticklabels(["First", "Second"])
plt.legend(title='Group')
plt.savefig("time.png")

In [None]:
#plot_time_delta_by_group
delta_time_table = all_change_tables[(np.abs(stats.zscore(all_change_tables["delta_time"])) < 3)]

ax = sns.catplot(x="group", y="delta_time", data=delta_time_table, kind="box")
ax.set(ylabel="Time delta in seconds", xlabel="Group")