In [3]:
ls -l results

total 76
drwxrwxr-x. 2 sac086 sac086 4096 Aug 21 16:57 [0m[01;34mexperiment_08-17[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 24 10:01 [01;34mexperiment_08-21_v2_dataset[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 23 13:55 [01;34mexperiment_08-23_train_v1_test_v2[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 24 16:10 [01;34mexperiment_08-24_train_v1_test_v2[0m/
drwxrwxr-x. 2 sac086 sac086   10 Aug 29 18:48 [01;34mexperiment_08-24_train_v2[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 28 17:22 [01;34mexperiment_08-28_train_v1_test_v2_active1[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 28 15:37 [01;34mexperiment_08-28_train_v1_test_v2_unstratified[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 30 09:21 [01;34mexperiment_08-29_train_v1_test_v2_active1[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 30 09:43 [01;34mexperiment_08-29_train_v1_test_v2_active2[0m/
drwxrwxr-x. 2 sac086 sac086 4096 Aug 31 18:53 [01;34mexperiment_08-31_train_v1_test_v2_random[0m/
drwxrwxr-x. 2 sac086 sac086 4096 

In [28]:
from wisdm import wisdm
import random
import numpy as np
import pandas as pd
from collections import Counter
import time
from scipy import stats
import os
import json

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff

In [10]:
experiment1_dir = "./results/experiment1_10-4-17/"
experiment1_files = [experiment1_dir+f for f in os.listdir(experiment1_dir)]

results = []

for f in experiment1_files:
    results.append(pd.read_pickle(f))

results_df = pd.concat(results)

In [14]:
wisdm.set_data(version="2", make_compatible=True)

# Distribution of number of classes labeled

In [24]:
# number of labels by participant
user_num_labels = {user_id : len(wisdm.data_df[wisdm.data_df['user'] == user_id]['class'].unique()) for user_id in wisdm.user_ids}

In [25]:
hist_data = [l for l in user_num_labels.values() if l > 0]
group_labels = ['number of users']
#fig = ff.create_distplot(hist_data, group_labels, show_rug=False, show_curve=False, histnorm='probability density')
data = [go.Histogram(x=hist_data)]
layout = go.Layout(yaxis=dict(title="Number Of Users"),
                   xaxis=dict(title="Number Of Classes Labeled"))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [15]:
users_to_ignore = []
for user_id in wisdm.user_ids:
    user_df = wisdm.remove_all_nan(wisdm.get_user_set(user_id))
    num_features = len(user_df)
    num_unique_labels = len(user_df['class'].unique())
    
    if num_features < 20:
        users_to_ignore.append(user_id)
    elif num_unique_labels < 2:
        users_to_ignore.append(user_id)


# Distribution of Classes in Lab Data

In [47]:
wisdm.set_data(version="1", make_compatible=True)
clean_df1 = wisdm.remove_all_nan(wisdm.data_df)
class_labels = [cl.decode("utf-8") for cl in clean_df1['class'].unique()]

class_counter1 = {key.decode("utf-8") : val for key, val in Counter(clean_df1['class']).items()}

wisdm.set_data(version="2", make_compatible=True)
clean_df2 = wisdm.remove_all_nan(wisdm.data_df[~wisdm.data_df['user'].isin(users_to_ignore)])
class_counter2 = {key.decode("utf-8") : val for key, val in Counter(clean_df2['class']).items()}

In [52]:
lab_trace = go.Bar(
    x=class_labels,
    y=[class_counter1[cl] for cl in class_labels],
    name="WISDM Lab data v1.1"
)

field_trace = go.Bar(
    x=class_labels,
    y=[class_counter2[cl] for cl in class_labels],
    name="WISDM Lab data v2.0"
)

traces = [lab_trace, field_trace]
layout = go.Layout(yaxis=dict(title="number of instances"),
                   xaxis=dict(title="activity"))

fig = go.Figure(data=traces,layout=layout)
iplot(fig)

In [54]:
wisdm.set_data(version="1", make_compatible=False)

In [55]:
wisdm.data_df['class'].unique()

array([b'Jogging', b'Walking', b'Upstairs', b'Downstairs', b'Sitting',
       b'Standing'], dtype=object)

In [56]:
wisdm.set_data(version="2", make_compatible=False)

In [57]:
wisdm.data_df['class'].unique()

array([b'Standing', b'Sitting', b'Stairs', b'LyingDown', b'Walking',
       b'Jogging'], dtype=object)

In [59]:
len(users_to_ignore)

32

In [60]:
impersonal_means = []
personal_means = []
hybrid_means = []

training_sizes = results_df['training size'].unique()

for training_size in training_sizes:
    for user_id in wisdm.user_ids:
        if user_id in users_to_ignore:
            continue
        user_df = results_df[(results_df['user_id'] == user_id) & \
                             (results_df['training size'] == training_size)]
        if training_size == 8:
            impersonal_means.append((0, user_df['impersonal'].mean()))
        personal_means.append((training_size, user_df['personal'].mean()))
        hybrid_means.append((training_size, user_df['hybrid'].mean()))
        
impersonal_x, impersonal_y = zip(*impersonal_means)
impersonal_trace = go.Box(y=impersonal_y,
                          x=impersonal_x,
                          name="Impersonal",
                          marker=dict(color="red"),
                          boxpoints='all',
                          jitter=0.3,
                          pointpos=-1)

personal_x, personal_y = zip(*personal_means)
personal_trace = go.Box(y=personal_y,
                          x=personal_x,
                          name="Personal",
                          marker=dict(color="blue"),
                          boxpoints='all',
                          jitter=0.3,
                          pointpos=-1)

hybrid_x, hybrid_y = zip(*hybrid_means)
hybrid_trace = go.Box(y=hybrid_y,
                          x=hybrid_x,
                          name="Hybrid",
                          marker=dict(color="green"),
                          boxpoints='all',
                          jitter=0.3,
                          pointpos=-1)

layout=go.Layout(boxmode="group", xaxis=dict(title="number of personal training samples"), yaxis=dict(title="accuracy"))

data = [impersonal_trace, personal_trace, hybrid_trace]

fig = go.Figure(data=data, layout=layout)

iplot(fig)