In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.utils import resample
%matplotlib inline

### Load data (pre-processed in clean.ipynb) and define useful constants

In [3]:
# load data
# res = pd.read_csv("results/results_cleaned.csv")
res = pd.read_csv("results/results_cleaned_no_outliers.csv")
# required fields
ids = ['timestamp','postId','workerId','mode']

# timing
time_diff = ['timestamp','postId','workerId','mode','time_diff_build', 'time_diff_experiment',
       'time_diff_explore', 'time_diff_strategy', 'time_diff_training']

modes = ["List", "Category", "Pairwise"]

### Numer of Workers for each Mode

In [4]:
df =res.drop_duplicates(subset=['workerId'])
df[['workerId', 'mode']].groupby(['mode']).count()

Unnamed: 0_level_0,workerId
mode,Unnamed: 1_level_1
Category,45
List,49
Pairwise,50


### Questions

In [5]:
qs = {}
questions = ['question1', 'question2', 'question3', 'question4', 'question5']
for i,q in enumerate(questions):
    d = res.sort_values("timestamp", ascending=False).dropna(subset=[q])[["mode","workerId",q]].drop_duplicates()
    d.drop_duplicates(subset=["workerId"], keep='first', inplace=True)
    qs[q] = pd.DataFrame()
    qs[q]['condition'] = d['mode']
    qs[q]['measure'] = d[q]
    qs[q].to_csv("results/"+q+".csv", index=False)

### Time

In [13]:
def timeToCSV(df, t, filename):
    times = df.drop_duplicates(subset=['workerId', t]).groupby(['workerId','mode']).sum()
    times = times.reset_index().dropna()[['workerId', 'mode', t]]
    times[t] = times[t]/1000/60

    # write to csv for R script
    df0=pd.DataFrame()
    df0['measure'] = times[t]
    df0['condition'] = times['mode']
    df0.to_csv(filename, index=False)


In [14]:
timeToCSV(res[['mode','workerId', 'time_diff_build']], 'time_diff_build', 'results/build.csv')
timeToCSV(res[['mode','workerId', 'time_diff_explore']], 'time_diff_explore', 'results/explore.csv')

### Interactions

In [15]:
inter_l = ['timestamp','workerId','mode','interaction','UrlChanges'] 
inter_c = ['timestamp','workerId','mode','interaction','highUrlChanges', 'lowUrlChanges', 'medUrlChanges']
inter_p = ['timestamp','workerId','mode','interaction','highUrlChanges', 'lowUrlChanges']

ints = {}
ints['l'] = res[inter_l][res['mode']=='List'].drop_duplicates(subset=['workerId','interaction','UrlChanges'])
ints['c'] = res[inter_c][res['mode']=='Category'].drop_duplicates(subset=['workerId','interaction','highUrlChanges', 
                                                                'lowUrlChanges', 'medUrlChanges'])
ints['p'] = res[inter_p][res['mode']=='Pairwise'].drop_duplicates(subset=['workerId','interaction','highUrlChanges', 
                                                                'lowUrlChanges'])

adds={}
removes={}
ranks = {}
for i in ints:
    ints[i] = ints[i].groupby(['workerId']).apply(pd.DataFrame.sort_values, 'timestamp')
    ints[i] = ints[i].reset_index(drop=True)
    ints[i].dropna(subset =['interaction'])
    
adds = {}
adds['List'] = ints['l'][ints['l']['interaction'] == 'ADD'].groupby('workerId').count()['interaction']
adds['Category'] = ints['c'][ints['c']['interaction'].isin(['LOW ADD', 'HIGH ADD', 'MED ADD'])].groupby('workerId').count()['interaction']
adds['Pairwise'] = ints['p'][ints['p']['interaction'].isin(['LEFT ADD', 'RIGHT ADD'])].groupby('workerId').count()['interaction']

removes = {}
removes['List'] = ints['l'][ints['l']['interaction'] == 'REMOVE'].groupby('workerId').count()['interaction']
removes['Category'] = ints['c'][ints['c']['interaction'].isin(['LOW REMOVE', 'HIGH REMOVE', 'MED REMOVE'])].groupby('workerId').count()['interaction']
removes['Pairwise'] = ints['p'][ints['p']['interaction'].isin(['LEFT REMOVE', 'RIGHT REMOVE'])].groupby('workerId').count()['interaction']

In [8]:
#write adds to file 
df0=pd.DataFrame()
df0['measure'] = adds['List'].values
df0['condition'] = 'List'
df1=pd.DataFrame()
df1['measure'] = adds['Category'].values
df1['condition'] = 'Category'
df2=pd.DataFrame()
df2['measure'] = adds['Pairwise'].values
df2['condition'] = 'Pairwise'
df = pd.concat([df0,df1,df2])
df[['condition','measure']]
df.to_csv("results/adds.csv", index=False)

In [16]:
#write removes to file
df0=pd.DataFrame()
df0['measure'] = removes['List'].values
df0['condition'] = 'List'
df1=pd.DataFrame()
df1['measure'] = removes['Category'].values
df1['condition'] = 'Category'
df2=pd.DataFrame()
df2['measure'] = removes['Pairwise'].values
df2['condition'] = 'Pairwise'
df = pd.concat([df0,df1,df2])
df[['condition','measure']]
df.to_csv("results/removes.csv", index=False)