In [1]:
import pandas as pd
import numpy as np
import time
import importlib.machinery
es = importlib.machinery.SourceFileLoader('extrasense','/home/sac086/extrasensory/extrasense/extrasense.py').load_module()

In [2]:
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [5]:
# Load results from file
not_stratified_scores_df = pd.read_pickle("../results/2017-12-12_19_exp1_no_stratification.pickle")
stratified_scores_df = pd.read_pickle("../results/2017-12-12_19_exp1_with_stratification.pickle")


## Without Class Stratification

In [6]:
impersonal_mean_scores = []

for user_id in es.user_ids:
    user_impersonal_mean = not_stratified_scores_df[(not_stratified_scores_df['user_id'] == user_id) &\
                                     (not_stratified_scores_df['method'] == 'impersonal') &\
                                     (not_stratified_scores_df['training_size'] == 5)]['accuracy'].mean()
    impersonal_mean_scores.append(user_impersonal_mean)

In [7]:
# Results without stratification
print("Impersonal: M=%.3f, SD=%.3f\n" % (np.mean(impersonal_mean_scores), np.std(impersonal_mean_scores)))

user_ids = not_stratified_scores_df['user_id'].unique()
training_sizes = [5,10,20,30,40]

all_personal_scores = []
all_personal_sizes = []

all_hybrid_scores = []
all_hybrid_sizes = []
for ts in training_sizes:
    personal_mean_scores = []
    hybrid_mean_scores = []
    
    for user_id in es.user_ids:
        user_personal_mean = not_stratified_scores_df[(not_stratified_scores_df['user_id'] == user_id) &\
              (not_stratified_scores_df['method'] == 'personal') &\
              (not_stratified_scores_df['training_size'] == ts)]['accuracy'].mean()
        user_hybrid_mean = not_stratified_scores_df[(not_stratified_scores_df['user_id'] == user_id) &\
              (not_stratified_scores_df['method'] == 'hybrid') &\
              (not_stratified_scores_df['training_size'] == ts)]['accuracy'].mean()
        personal_mean_scores.append(user_personal_mean)
        hybrid_mean_scores.append(user_hybrid_mean)
    
    print("Training Size : %s" % ts)
    print("\tPersonal: M=%.3f, SD=%.3f" % (np.mean(personal_mean_scores), np.std(personal_mean_scores)))
    print("\tHybrid: M=%.3f, SD=%.3f" % (np.mean(hybrid_mean_scores), np.std(hybrid_mean_scores)))
    
    all_personal_scores +=  personal_mean_scores
    all_personal_sizes += [ts] * len(personal_mean_scores)
    
    all_hybrid_scores += hybrid_mean_scores
    all_hybrid_sizes += [ts] * len(hybrid_mean_scores)


Impersonal: M=0.600, SD=0.105

Training Size : 5
	Personal: M=0.564, SD=0.099
	Hybrid: M=0.620, SD=0.090
Training Size : 10
	Personal: M=0.618, SD=0.096
	Hybrid: M=0.630, SD=0.088
Training Size : 20
	Personal: M=0.657, SD=0.086
	Hybrid: M=0.647, SD=0.082
Training Size : 30
	Personal: M=0.679, SD=0.089
	Hybrid: M=0.664, SD=0.071
Training Size : 40
	Personal: M=0.695, SD=0.081
	Hybrid: M=0.670, SD=0.074


In [8]:
impersonal_trace = go.Box(y=impersonal_mean_scores,
                          x=0,
                          boxpoints='all',
                          jitter=0.8,
                          pointpos=-1,
                          name="Impersonal")


personal_trace = go.Box(y=all_personal_scores,
                       x=all_personal_sizes,
                        boxpoints='all',
                          jitter=0.8,
                          pointpos=-1,
                       name="personal")

hybrid_trace = go.Box(y=all_hybrid_scores,
                     x=all_hybrid_sizes,
                      boxpoints='all',
                          jitter=0.8,
                          pointpos=-1,
                     name="hybrid")

data = [impersonal_trace, personal_trace, hybrid_trace]
layout = go.Layout(yaxis=dict(title='Accuracy', range=[0,1]),
                   xaxis=dict(title='Amount of personal training data'),
                   boxmode='group',
                   title="Accuracy Scores By Method Without Class Stratification"
                  )
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [9]:
with open("accuracy_scores_by_method_without_class_stratification.html", "w") as fOut:
    fOut.write(plot(fig, output_type='div', include_plotlyjs=True))

## With Class Stratification

In [10]:
impersonal_mean_scores = []

for user_id in es.user_ids:
    user_impersonal_mean = stratified_scores_df[(stratified_scores_df['user_id'] == user_id) &\
                                     (stratified_scores_df['method'] == 'impersonal') &\
                                     (stratified_scores_df['training_size'] == 5)]['accuracy'].mean()
    if not np.isnan(user_impersonal_mean):
        impersonal_mean_scores.append(user_impersonal_mean)

In [11]:
# Results without stratification
print("Impersonal: M=%.3f, SD=%.3f\n" % (np.mean(impersonal_mean_scores), np.std(impersonal_mean_scores)))

user_ids = stratified_scores_df['user_id'].unique()
training_sizes = [5,10,20,30,40]

all_personal_scores = []
all_personal_sizes = []

all_hybrid_scores = []
all_hybrid_sizes = []
for ts in training_sizes:
    personal_mean_scores = []
    hybrid_mean_scores = []
    
    for user_id in es.user_ids:
        user_personal_mean = stratified_scores_df[(stratified_scores_df['user_id'] == user_id) &\
              (stratified_scores_df['method'] == 'personal') &\
              (stratified_scores_df['training_size'] == ts)]['accuracy'].mean()
        user_hybrid_mean = stratified_scores_df[(stratified_scores_df['user_id'] == user_id) &\
              (stratified_scores_df['method'] == 'hybrid') &\
              (stratified_scores_df['training_size'] == ts)]['accuracy'].mean()
        
        if not np.isnan(user_personal_mean):
            personal_mean_scores.append(user_personal_mean)
        if not np.isnan(user_hybrid_mean):
            hybrid_mean_scores.append(user_hybrid_mean)
       
    print("Training Size : %s" % ts)
    print("\tPersonal: M=%.3f, SD=%.3f" % (np.mean(personal_mean_scores), np.std(personal_mean_scores)))
    print("\tHybrid: M=%.3f, SD=%.3f" % (np.mean(hybrid_mean_scores), np.std(hybrid_mean_scores)))
    
    all_personal_scores +=  personal_mean_scores
    all_personal_sizes += [ts] * len(personal_mean_scores)
    
    all_hybrid_scores += hybrid_mean_scores
    all_hybrid_sizes += [ts] * len(hybrid_mean_scores)


Impersonal: M=0.596, SD=0.104

Training Size : 5
	Personal: M=0.586, SD=0.101
	Hybrid: M=0.616, SD=0.091
Training Size : 10
	Personal: M=0.634, SD=0.086
	Hybrid: M=0.630, SD=0.086
Training Size : 20
	Personal: M=0.673, SD=0.090
	Hybrid: M=0.652, SD=0.082
Training Size : 30
	Personal: M=0.694, SD=0.088
	Hybrid: M=0.668, SD=0.079
Training Size : 40
	Personal: M=0.703, SD=0.083
	Hybrid: M=0.673, SD=0.072


In [13]:
impersonal_trace = go.Box(y=impersonal_mean_scores,
                          x=0,
                          boxpoints='all',
                          jitter=0.8,
                          pointpos=-1,
                          name="Impersonal")


personal_trace = go.Box(y=all_personal_scores,
                       x=all_personal_sizes,
                        boxpoints='all',
                          jitter=0.8,
                          pointpos=-1,
                       name="personal")

hybrid_trace = go.Box(y=all_hybrid_scores,
                     x=all_hybrid_sizes,
                      boxpoints='all',
                          jitter=0.8,
                          pointpos=-1,
                     name="hybrid")

data = [impersonal_trace, personal_trace, hybrid_trace]
layout = go.Layout(yaxis=dict(title='Accuracy', range=[0,1]),
                   xaxis=dict(title='Amount of personal training data'),
                   boxmode='group',
                   title="Accuracy Scores By Method With Class Stratification"
                  )
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
with open("accuracy_scores_by_method_with_class_stratification.html", "w") as fOut:
    fOut.write(plot(fig, output_type='div', include_plotlyjs=True))

# Testing whether stratification helped for most users

In [34]:
user_id = es.user_ids[3]

In [35]:
user_score_df = not_stratified_scores_df[(not_stratified_scores_df['user_id'] == user_id) &\
                                         (not_stratified_scores_df['method'] == 'personal') &\
                                         (not_stratified_scores_df['training_size'] == 5)]

In [36]:
user_score_df.describe()

Unnamed: 0,accuracy,run_num,training_size
count,5.0,5.0,5.0
mean,0.412,3.0,5.0
std,0.048683,1.581139,0.0
min,0.37,1.0,5.0
25%,0.38,2.0,5.0
50%,0.38,3.0,5.0
75%,0.46,4.0,5.0
max,0.47,5.0,5.0


In [37]:
user_stratified_score_df = stratified_scores_df[(stratified_scores_df['user_id'] == user_id) &\
                                         (stratified_scores_df['method'] == 'personal') &\
                                         (stratified_scores_df['training_size'] == 5)]

In [38]:
user_stratified_score_df.describe()

Unnamed: 0,accuracy,run_num,training_size
count,5.0,5.0,5.0
mean,0.502,3.0,5.0
std,0.104499,1.581139,0.0
min,0.34,1.0,5.0
25%,0.49,2.0,5.0
50%,0.5,3.0,5.0
75%,0.56,4.0,5.0
max,0.62,5.0,5.0
