# Initializing the environment and clusters

In [1]:
import ipyparallel as ipp
c = ipp.Client()
dview = c[:]

In [None]:
%%px --local
import sys
sys.path.append("/home/sac086/wisdm_model_personalization/")
import warnings
import os
from wisdm import wisdm
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, StratifiedKFold, StratifiedShuffleSplit, ShuffleSplit
from sklearn.cluster import KMeans
from scipy.stats import mode
from collections import Counter
import time

In [None]:
%%px --local
from imp import reload
reload(wisdm)

# Experiment #1

In [None]:
description = '''
Training set = WISDM v1.1
Test Set = WISDM v1.1
validation = leave-one user out, class label stratified 10-fold cross-validation within user where the training folds
are used as the pool from which we can actively sample
Sampling Methods = Least-Certain Sampling, Random Sampling
Modeling Methods = Impersonal, Sampled Personal, Sampled Personal data + All Impersonal Data, 
                    Sampled Personal Data + nearest cluster of Impersonal data,
                    Sampled Personal Data + Impersonal Data selected and weighted with Garcia-Ceja approach
'''

In [None]:
%%px --local
experiment_name = "experiment_09-01_train_v1_with_random_and_least_certain/"
experiment_output_path = "/home/sac086/wisdm_model_personalization/results/" + experiment_name

In [None]:
if not os.path.exists(experiment_output_path):
    os.makedirs(experiment_output_path)
with open(experiment_output_path + "README.md", "w") as fOut:
    fOut.write(description)

In [None]:
wisdm.set_data(version="1")

In [None]:
command = """wisdm.pipeline1('1', '%s', user_ids)""" % experiment_output_path

In [None]:
# divide the user ids up among the different cores
scatter_result = dview.scatter("user_ids", wisdm.user_ids)

start = time.time()
dview.block = True
results = dview.execute(command)
finish = time.time()
print("Finished all models in %s minutes" % ((finish - start) / 60.))

In [None]:
user_ids = [wisdm.user_ids[0]]

In [None]:
wisdm.pipeline1('1', experiment_output_path, user_ids)

# Experiment #1

In [None]:
%%px --local
experiment_name = "experiment_09-01_train_v1_random/"

description = '''
Training and calibrating probability estimations on the WISDM v1.1 dataset. 
Probability estimation was done with stream sampling where those samples whose most probable class label
was did not exceed some threshold (where the threshold is some amount greater than the uniform probability for the class) were
actively labeled.  Testing on WISDM v1.1 with one participant held out'''

experiment_output_path = "/home/sac086/wisdm_model_personalization/results/" + experiment_name

In [None]:
if not os.path.exists(experiment_output_path):
    os.makedirs(experiment_output_path)
with open(experiment_output_path + "README.md", "w") as fOut:
    fOut.write(description)

In [None]:
command = """wisdm.pipeline1('1', '%s', user_ids)""" % experiment_output_path

In [None]:
wisdm.set_data(version="1")

In [None]:
# divide the user ids up among the different cores
scatter_result = dview.scatter("user_ids", wisdm.user_ids)

start = time.time()
dview.block = True
results = dview.execute(command)
finish = time.time()
print("Finished all models in %s minutes" % ((finish - start) / 60.))

### Experiment #1 Results


In [None]:
results_df = viz.get_results_dataframe(experiment_output_path)

In [None]:
viz.set_data(version="1")
fig = viz.accuracy_means_by_training_size()
iplot(fig)

In [None]:
fig = viz.plotUserBests()
fig.layout['title'] = "# of users whose best performance came from a particular approach"
iplot(fig)

# Experiment 2

In [None]:
description = '''
Training set = WISDM v2.0
Test Set = WISDM v2.0
validation = leave-one user out, class label stratified 10-fold cross-validation within user where the training folds
are used as the pool from which we can actively sample
Sampling Methods = Least-Certain Sampling, Random Sampling
Modeling Methods = Impersonal, Sampled Personal, Sampled Personal data + All Impersonal Data, 
                    Sampled Personal Data + nearest cluster of Impersonal data,
                    Sampled Personal Data + Impersonal Data selected and weighted with Garcia-Ceja approach
'''

In [None]:
if not os.path.exists(experiment_output_path):
    os.makedirs(experiment_output_path)
with open(experiment_output_path + "README.md", "w") as fOut:
    fOut.write(description)

In [None]:
command = """wisdm.pipeline1('2', '%s', user_ids)""" % experiment_output_path

In [None]:
wisdm.set_data(version="2")

In [None]:
# divide the user ids up among the different cores
scatter_result = dview.scatter("user_ids", wisdm.user_ids)

start = time.time()
dview.block = True
results = dview.execute(command)
finish = time.time()
print("Finished all models in %s minutes" % ((finish - start) / 60.))

In [None]:
%%px --local
experiment_name = "experiment_09-01_train_v2_random/"

description = '''
Training and calibrating probability estimations on the WISDM v2.0 dataset. 
Probability estimation was done with stream sampling where those samples whose most probable class label
was did not exceed some threshold (where the threshold is some amount greater than the uniform probability for the class) were
actively labeled.  Testing on WISDM v2.0 with one participant held out'''

experiment_output_path = "/home/sac086/wisdm_model_personalization/results/" + experiment_name

In [None]:
if not os.path.exists(experiment_output_path):
    os.makedirs(experiment_output_path)
with open(experiment_output_path + "README.md", "w") as fOut:
    fOut.write(description)

In [None]:
command = """wisdm.pipeline1('2', '%s', user_ids)""" % experiment_output_path

In [None]:
wisdm.set_data(version="2")

In [None]:
# divide the user ids up among the different cores
scatter_result = dview.scatter("user_ids", wisdm.user_ids)

start = time.time()
dview.block = True
results = dview.execute(command)
finish = time.time()
print("Finished all models in %s minutes" % ((finish - start) / 60.))

### Experiment #2 Results

In [None]:
results_df = viz.get_results_dataframe(experiment_output_path)

In [None]:
viz.set_data(version="2")
fig = viz.accuracy_means_by_training_size()
iplot(fig)

In [None]:
fig = viz.plotUserBests()
fig.layout['title'] = "# of users whose best performance came from a particular approach"
iplot(fig)

# Experiment #3
In this experiment we compare differing amounts of personal data selected at random to combine with the impersonal 
training set as well as a cluster of the impersonal training set

In [None]:
description = '''
Training set = WISDM v1.1
Test Set = WISDM v2.0
validation = leave-one user out, class label stratified 10-fold cross-validation within user where the training folds
are used as the pool from which we can actively sample
Sampling Methods = Least-Certain Sampling, Random Sampling
Modeling Methods = Impersonal, Sampled Personal, Sampled Personal data + All Impersonal Data, 
                    Sampled Personal Data + nearest cluster of Impersonal data,
                    Sampled Personal Data + Impersonal Data selected and weighted with Garcia-Ceja approach
'''

In [None]:
%%px --local
experiment_name = "experiment_08-31_train_v1_test_v2_random/"

description = '''
Training and calibrating probability estimations on the WISDM v1.1 dataset. 
Probability estimation was done with stream sampling where those samples whose most probable class label
was did not exceed some threshold (where the threshold is some amount greater than the uniform probability for the class) were
actively labeled.  Testing on WISDM 2.0 with one participant held out'''

experiment_output_path = "/home/sac086/wisdm_model_personalization/results/" + experiment_name

In [None]:
if not os.path.exists(experiment_output_path):
    os.makedirs(experiment_output_path)
with open(experiment_output_path + "README.md", "w") as fOut:
    fOut.write(description)

In [None]:
command = """wisdm.pipeline2('%s', user_ids)""" % experiment_output_path

In [None]:
wisdm.set_data(version="2", make_compatible=True)

In [None]:
# divide the user ids up among the different cores
scatter_result = dview.scatter("user_ids", wisdm.user_ids)

start = time.time()
dview.block = True
results = dview.execute(command)
finish = time.time()
print("Finished all models in %s minutes" % ((finish - start) / 60.))

### Visualize Results for Experiment #3

In [None]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
from plotly.graph_objs import *

In [None]:
from imp import reload

In [None]:
from wisdm import viz

In [None]:
reload(viz)

In [None]:
results_df = viz.get_results_dataframe(experiment_output_path)

In [None]:
fig = viz.accuracy_means_by_training_size()
iplot(fig)

In [None]:
fig = viz.plotUserBests()
iplot(fig)

# Trained and tested on lab collected data

In [None]:
experiment_output_path = '/home/sac086/wisdm_model_personalization/results/

In [None]:
experim
results_df = viz.get_results_dataframe(experiment_output_path)