# Required Imports and Constants

In [1]:
%matplotlib inline

import ast
import json
import os
import sys
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import lines
from matplotlib import patches

from scipy.stats import f_oneway
from scipy.stats import ttest_1samp

import warnings
warnings.filterwarnings("ignore")

CONDITIONS = ["CTRL", "WTHN", "BTWN", "BOTH"]
TASKS = ["hiring", "movies"]


CTRL_PIDS = [
    "0jjk2LtlRwXu",
    "BDIPCGcOdllu",
    "fimUvwh9JPRU",
    "jHPt9jOOmoXM",
    "nyEXxPkdgP81",
    "WvfHfFBIfx6m",
    "YqgrTs5hzcsj"
]

WTHN_PIDS = [
    "6BSJnNllOaUQ", 
    "8wC3YK6TgqRm", 
    "db2aF23Z9hnH", 
    "F0xDdtLRrhtQ", 
    "nRJBgq4Tg2LG", 
    "YvLb2lkRiYyh"
]

BTWN_PIDS = [
    "38vsTr4jwSxV",
    "fHevN3Wo38TA",
    "iswbEgz7w3KE",
    "kdSlRblQt77j",
    "mYSzFPXnkOdd",
    "QEpQRLrqn7CX",
    "Tcrfm9xpHN59"
]

BOTH_PIDS = [
    "1w4I0l6f60JG", 
    "7UR5LIyKmQiz", 
    "C9pIAv6kBqr6", 
    "KK2JFLnabEl9", 
    "MjcBTpGzYCD9", 
    "TlJaxdq1DM23"
]

ALL_PIDS = {
    'CTRL': CTRL_PIDS,
    'WTHN': WTHN_PIDS,
    'BTWN': BTWN_PIDS,
    'BOTH': BOTH_PIDS,
}

# Number of recommendations requested before submission between conditions - ANOVA

In [6]:
print("Number of recommendations requested before submission between conditions")
print("========================================================================")
print()

rows = []

for task in TASKS:
    for condition in CONDITIONS:
        PIDS = ALL_PIDS[condition]
        for pid in PIDS:
            basepath = os.path.join(condition, pid)  # basepath for PID
            df = pd.read_csv(os.path.join(basepath, 'interactions.csv'))

            ### NUMBER OF TIMES RECOMMENDATION WAS USED BEFORE SUBMISSION ###
            mask = (df["interactionType"] == "get_recommendation") & (df["appMode"] == task)
            get_rec_count = len(df[mask].index)

            # Add to list of rows to create DataFrame from
            rows.append([pid, task, condition, get_rec_count])

# Combine rows
df_get_rec = pd.DataFrame(
    rows,
    columns=['PID', 'Task', 'Condition', '# Recommendations']
)



### BETWEEN ALL CONDITIONS ###

print('Between conditions | across tasks')
print('---------------------------------')

# Show 5 number summary
print(df_get_rec.groupby('Condition')['# Recommendations'].describe())
print()

# Perform ANOVA 1-way test
vals = df_get_rec.groupby('Condition')['# Recommendations'].apply(list).tolist()
f_stat, p_val = f_oneway(*vals)
print(f"One-Way ANOVA\t|\tGroups: Condition\t|\tF Statistic: {f_stat:.04f}\t|\tp-value: {p_val:.04f}")
print()



### BETWEEN CONTROL AND INTERVENTION ###

print('Between CTRL (CONTROL) and WTHN + BTWN + BOTH (INTERVENTION) | across tasks')
print('---------------------------------------------------------------------------')

# Show 5 number summary
print(
    df_get_rec.replace(['WTHN', 'BTWN', 'BOTH'], 'INTV')
        .groupby('Condition')['# Recommendations']
        .describe()
)
print()

# Perform ANOVA 1-way test
vals = df_get_rec.replace(['WTHN', 'BTWN', 'BOTH'], 'INTV').groupby('Condition')['# Recommendations'].apply(list).tolist()
f_stat, p_val = f_oneway(*vals)
print(f"One-Way ANOVA\t|\tGroups: Condition\t|\tF Statistic: {f_stat:.04f}\t|\tp-value: {p_val:.04f}")
print()



### BETWEEN TASKS ###

print('Between tasks | across conditions')
print('---------------------------------')

# Show 5 number summary
print(df_get_rec.groupby('Task')['# Recommendations'].describe())
print()

# Perform ANOVA 1-way test
vals = df_get_rec.groupby('Task')['# Recommendations'].apply(list).tolist()
f_stat, p_val = f_oneway(*vals)
print(f"One-Way ANOVA\t|\tGroups: Condition\t|\tF Statistic: {f_stat:.04f}\t|\tp-value: {p_val:.04f}")
print()


Number of recommendations requested before submission between conditions

Between conditions | across tasks
---------------------------------
           count      mean       std  min  25%  50%   75%  max
Condition                                                     
BOTH        12.0  2.500000  2.713602  0.0  0.0  1.0  6.00  6.0
BTWN        14.0  1.357143  1.499084  0.0  0.0  1.0  2.75  4.0
CTRL        14.0  1.357143  1.645841  0.0  0.0  1.0  2.00  6.0
WTHN        12.0  2.333333  2.269695  0.0  0.0  2.0  4.00  6.0

One-Way ANOVA	|	Groups: Condition	|	F Statistic: 1.1617	|	p-value: 0.3341

Between CTRL (CONTROL) and WTHN + BTWN + BOTH (INTERVENTION) | across tasks
---------------------------------------------------------------------------
           count      mean       std  min  25%  50%  75%  max
Condition                                                    
CTRL        14.0  1.357143  1.645841  0.0  0.0  1.0  2.0  6.0
INTV        38.0  2.026316  2.187024  0.0  0.0  1.5  3.0  6.0

One

# TODO