# Experiment 1

In [1]:
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit, ShuffleSplit
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import brier_score_loss, accuracy_score, precision_recall_curve, average_precision_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler, label_binarize

In [2]:
from wisdm import wisdm
import random
import numpy as np
import pandas as pd
from collections import Counter
import time
from scipy import stats

In [3]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go

In [4]:
n_trees = 10000
n_cores = 30
oob_score = False

In [5]:
def shuffle_rows(features, labels):
    permutation = np.random.permutation(features.shape[0])
    return features[permutation], labels[permutation]

In [6]:
def impersonal_pred(model, test_features):
    scaled_test_features = impersonal_scaler.transform(test_features)
    
    impersonal_predictions = impersonal_clf.predict(scaled_test_features)
    impersonal_probabilities = impersonal_clf.predict_proba(scaled_test_features)
    return impersonal_predictions, impersonal_probabilities

In [7]:
def personal_pred(personal_features, personal_labels, test_features):
    # build personal model and predict
    personal_scaler = StandardScaler().fit(personal_features)
    scaled_personal_features = personal_scaler.transform(personal_features)
    scaled_test_features = personal_scaler.transform(test_features)

    personal_clf = wisdm.weka_RF()
    personal_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)
    personal_clf.fit(scaled_personal_features, random_personal_labels)
    
    personal_predictions = personal_clf.predict(scaled_test_features)
    personal_probabilities = personal_clf.predict_proba(scaled_test_features)
    return personal_predictions, personal_probabilities

In [20]:
def hybrid_pred(impersonal_features, impersonal_labels, \
                personal_features, personal_labels, \
                test_features, number_of_samples = None, \
                probabilities=None, \
                sampling_function=None):
    if sampling_function == None:
        hybrid_labels = np.hstack((personal_labels, impersonal_labels))
        hybrid_features = np.vstack((personal_features, impersonal_features))
        hybrid_features, hybrid_labels = shuffle_rows(hybrid_features, hybrid_labels)
    else:
        hybrid_features, hybrid_labels = sampling_function(personal_features, personal_labels, probabilities, number=number_of_samples)
    
    hybrid_scaler = StandardScaler().fit(hybrid_features)
    scaled_hybrid_features = hybrid_scaler.transform(hybrid_features)
    scaled_test_features = hybrid_scaler.transform(test_features)

    hybrid_clf = wisdm.weka_RF()
    hybrid_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)
    hybrid_clf.fit(scaled_hybrid_features, hybrid_labels)
    
    hybrid_predictions = hybrid_clf.predict(scaled_test_features)
    hybrid_probabilities = hybrid_clf.predict
    
    return hybrid_predictions, hybrid_probabilities

In [9]:
def confidence_sample(features, labels, probabilities, number=None, top=False):
    confidence_ranking = np.argsort(np.max(probabilities, axis=1))
    
    if not number:
        return features[confidence_ranking], labels[confidence_ranking]
    
    if top:
        return features[confidence_ranking[-number:]], labels[confidence_ranking[-number:]]
    return features[confidence_ranking[:number]], labels[confidence_ranking[:number]]

In [23]:
#experiment setup
number_of_personal_samples = 10
test_size = 30

wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

number_of_personal_samples = 10
ignored_users = []
print("predicting...")
for user_id in wisdm.user_ids:
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    if len(user_df) < 40:
        print("Not Enough Data, skipping...")
        ignored_users.append(user_id)
        continue
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    sss = StratifiedShuffleSplit(n_splits=4, test_size=test_size, train_size = number_of_personal_samples)
    
    personal_scores = []
    impersonal_scores = []
    hybrid_scores = []
    
    shuffle_count = 0
    try:
        for train_index, test_index in sss.split(personal_features, personal_labels):
            # data for personal model
            random_personal_features = personal_features[train_index]
            random_personal_labels = personal_labels[train_index]

            # create an active pool of everything not in the test set for active learning / hybrid model
            active_pool_mask = np.ones(personal_labels.shape, dtype=bool)
            active_pool_mask[test_index] = False
            active_pool_features = personal_features[active_pool_mask]
            active_pool_labels = personal_labels[active_pool_mask]

            # test set
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # build personal model and predict
            personal_predictions, _ = personal_pred(random_personal_features, random_personal_labels, test_features)
            personal_score = accuracy_score(test_labels, personal_predictions)
            personal_scores.append(personal_score)

            # build impersonal model and predict
            impersonal_predictions, _ = impersonal_pred(impersonal_clf, test_features)
            impersonal_score = accuracy_score(test_labels, impersonal_predictions)
            impersonal_scores.append(impersonal_score)

            # build hybrid model and predict
            impersonal_probabilities = impersonal_clf.predict_proba(active_pool_features)
            hybrid_predictions, _ = hybrid_pred(impersonal_features, impersonal_labels, \
                                                random_personal_features, random_personal_labels, \
                                                test_features)

            hybrid_score = accuracy_score(test_labels, hybrid_predictions)
            hybrid_scores.append(hybrid_score)
            print("\t impersonal acc : %.3f" % impersonal_score)
            print("\t personal acc : %.3f" % personal_score)
            print("\t hybrid acc : %.3f" % hybrid_score)
            print("")

            result_row = {"user_id" : user_id,
                          "shuffle" : shuffle_count,
                          "impersonal" : impersonal_score,
                          "personal" : personal_score,
                          "hybrid" : hybrid_score}
            result_rows.append(result_row)
            shuffle_count += 1
    except ValueError as ve:
        if "The least populated class" in ve.args[0]:
            print("\tNot enough labeled data for %s" % user_id)
            ignored_users.append(user_id)
            continue
        else:
            raise ve

finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

results_df_exp1_10 = pd.DataFrame(result_rows)

Training...
Finished Training in 23.606879711151123 seconds
predicting...
User : 194
	 impersonal acc : 0.633
	 personal acc : 0.733
	 hybrid acc : 0.700

	 impersonal acc : 0.600
	 personal acc : 0.933
	 hybrid acc : 0.867

	 impersonal acc : 0.633
	 personal acc : 0.933
	 hybrid acc : 0.933

	 impersonal acc : 0.733
	 personal acc : 0.967
	 hybrid acc : 0.967

User : 998
	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.933

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.967

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

User : 1097
Not Enough Data, skipping...
User : 1104
	 impersonal acc : 0.600
	 personal acc : 0.867
	 hybrid acc : 0.900

	 impersonal acc : 0.533
	 personal acc : 0.833
	 hybrid acc : 0.833

	 impersonal acc : 0.567
	 personal acc : 0.633
	 hybrid acc : 0.933

	 impersonal acc : 0.567
	 personal acc : 0.800
	 hybrid acc : 0.900

User : 1117

	 impersonal acc : 0.067
	 personal acc : 0.933
	 hybrid acc : 0.900

	 impersonal acc : 0.033
	 personal acc : 1.000
	 hybrid acc : 0.933

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.100
	 personal acc : 0.867
	 hybrid acc : 0.867

User : 1813
Not Enough Data, skipping...
User : 1814
Not Enough Data, skipping...
Finished predicting in 5194.14306139946 seconds
Users without enough data : ['1097', '1205', '1247', '1269', '1276', '1277', '1280', '1480', '1491', '1511', '1518', '1531', '1554', '1679', '1683', '1696', '1723', '1724', '1726', '1745', '1750', '1757', '1758', '1761', '1763', '1797', '1802', '1813', '1814']


In [25]:
results_df_exp1_10.head()

Unnamed: 0,hybrid,impersonal,personal,shuffle,user_id
0,0.7,0.633333,0.733333,0,194
1,0.866667,0.6,0.933333,1,194
2,0.933333,0.633333,0.933333,2,194
3,0.966667,0.733333,0.966667,3,194
4,0.933333,0.0,1.0,0,998


In [27]:
print("Impersonal Accuracies M=%.3f, SD=%.3f" % (results_df_exp1_10['impersonal'].mean(), results_df_exp1_10['impersonal'].std()))
print("Personal Accuracies M=%.3f, SD=%.3f" % (results_df_exp1_10['personal'].mean(), results_df_exp1_10['personal'].std()))
print("Hybrid Accuracies M=%.3f, SD=%.3f" % (results_df_exp1_10['hybrid'].mean(), results_df_exp1_10['hybrid'].std()))

Impersonal Accuracies M=0.366, SD=0.297
Personal Accuracies M=0.903, SD=0.115
Hybrid Accuracies M=0.917, SD=0.105


In [29]:
mean_impersonal = []
mean_personal = []
mean_hybrid = []

for user_id in results_df_exp1_10['user_id'].unique():
    user_df = results_df_exp1_10[results_df_exp1_10['user_id']==user_id]
    mean_impersonal.append(user_df['impersonal'].mean())
    mean_personal.append(user_df['personal'].mean())
    mean_hybrid.append(user_df['hybrid'].mean())

trace0 = go.Box(
    y=mean_impersonal,
    name='Impersonal',
    marker=dict(
        color='red',
    ),
    boxpoints='all',
    jitter=0.3,
    pointpos=-0.5
)

trace1 = go.Box(
    y=mean_personal,
    name='Personal (10 Samples)',
    marker=dict(
        color='blue',
    ),
    boxpoints='all',
    jitter=0.1,
    pointpos=-0.5
)

trace2 = go.Box(
    y=mean_hybrid,
    name='Hybrid (10 Samples)',
    marker=dict(
        color='green',
    ),
    boxpoints='all',
    jitter=0.1,
    pointpos=-0.5
)
data = [trace0, trace1, trace2]
layout = go.Layout(yaxis=dict(title="Accuracy"), showlegend=False)
fig=go.Figure(data=data,layout=layout)
iplot(fig)

# Experiment 1 with 30 samples

In [None]:
#experiment setup
number_of_personal_samples = 10
test_size = 30

wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

number_of_personal_samples = 10
ignored_users = []
print("predicting...")
for user_id in wisdm.user_ids:
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    if len(user_df) < 40:
        print("Not Enough Data, skipping...")
        ignored_users.append(user_id)
        continue
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    sss = StratifiedShuffleSplit(n_splits=4, test_size=test_size, train_size = number_of_personal_samples)
    
    personal_scores = []
    impersonal_scores = []
    hybrid_scores = []
    
    shuffle_count = 0
    try:
        for train_index, test_index in sss.split(personal_features, personal_labels):
            # data for personal model
            random_personal_features = personal_features[train_index]
            random_personal_labels = personal_labels[train_index]

            # create an active pool of everything not in the test set for active learning / hybrid model
            active_pool_mask = np.ones(personal_labels.shape, dtype=bool)
            active_pool_mask[test_index] = False
            active_pool_features = personal_features[active_pool_mask]
            active_pool_labels = personal_labels[active_pool_mask]

            # test set
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # build personal model and predict
            personal_predictions, _ = personal_pred(random_personal_features, random_personal_labels, test_features)
            personal_score = accuracy_score(test_labels, personal_predictions)
            personal_scores.append(personal_score)

            # build impersonal model and predict
            impersonal_predictions, _ = impersonal_pred(impersonal_clf, test_features)
            impersonal_score = accuracy_score(test_labels, impersonal_predictions)
            impersonal_scores.append(impersonal_score)

            # build hybrid model and predict
            impersonal_probabilities = impersonal_clf.predict_proba(active_pool_features)
            hybrid_predictions, _ = hybrid_pred(impersonal_features, impersonal_labels, \
                                                random_personal_features, random_personal_labels, \
                                                test_features)

            hybrid_score = accuracy_score(test_labels, hybrid_predictions)
            hybrid_scores.append(hybrid_score)
            print("\t impersonal acc : %.3f" % impersonal_score)
            print("\t personal acc : %.3f" % personal_score)
            print("\t hybrid acc : %.3f" % hybrid_score)
            print("")

            result_row = {"user_id" : user_id,
                          "shuffle" : shuffle_count,
                          "impersonal" : impersonal_score,
                          "personal" : personal_score,
                          "hybrid" : hybrid_score}
            result_rows.append(result_row)
            shuffle_count += 1
    except ValueError as ve:
        if "The least populated class" in ve.args[0]:
            print("\tNot enough labeled data for %s" % user_id)
            ignored_users.append(user_id)
            continue
        else:
            raise ve

finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

results_df_exp1_30 = pd.DataFrame(result_rows)

Training...
Finished Training in 23.799126148223877 seconds
predicting...
User : 194
	 impersonal acc : 0.767
	 personal acc : 0.967
	 hybrid acc : 0.933

	 impersonal acc : 0.733
	 personal acc : 0.900
	 hybrid acc : 0.900

	 impersonal acc : 0.667
	 personal acc : 0.800
	 hybrid acc : 0.767

	 impersonal acc : 0.767
	 personal acc : 0.933
	 hybrid acc : 0.967

User : 998
	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.967

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

User : 1097
Not Enough Data, skipping...
User : 1104
	 impersonal acc : 0.600
	 personal acc : 0.733
	 hybrid acc : 0.833

	 impersonal acc : 0.667
	 personal acc : 0.733
	 hybrid acc : 0.733

	 impersonal acc : 0.500
	 personal acc : 0.800
	 hybrid acc : 0.800

	 impersonal acc : 0.800
	 personal acc : 0.867
	 hybrid acc : 0.933

User : 1117

In [None]:
mean_impersonal = []
mean_personal = []
mean_hybrid = []

for user_id in results_df_exp1_30['user_id'].unique():
    user_df = results_df_exp1_30[results_df_exp1_30['user_id']==user_id]
    mean_impersonal.append(user_df['impersonal'].mean())
    mean_personal.append(user_df['personal'].mean())
    mean_hybrid.append(user_df['hybrid'].mean())

trace0 = go.Box(
    y=mean_impersonal,
    name='Impersonal',
    marker=dict(
        color='red',
    ),
    boxpoints='all',
    jitter=0.3,
    pointpos=-0.5
)

trace1 = go.Box(
    y=mean_personal,
    name='Personal (10 Samples)',
    marker=dict(
        color='blue',
    ),
    boxpoints='all',
    jitter=0.1,
    pointpos=-0.5
)

trace2 = go.Box(
    y=mean_hybrid,
    name='Hybrid (10 Samples)',
    marker=dict(
        color='green',
    ),
    boxpoints='all',
    jitter=0.1,
    pointpos=-0.5
)
data = [trace0, trace1, trace2]
layout = go.Layout(yaxis=dict(title="Accuracy"), showlegend=False)
fig=go.Figure(data=data,layout=layout)
iplot(fig)

# Experiment 2

In [None]:
#experiment setup
wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

number_of_personal_samples = 10
ignored_users = []
print("predicting...")

for user_id in wisdm.user_ids:
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    if len(user_df) < 40:
        print("Not Enough Data, skipping...")
        ignored_users.append(user_id)
        continue
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    scaled_personal_features = impersonal_scaler.transform(personal_features)
    
    impersonal_probabilities = impersonal_clf.predict_prob(personal_features)
    impersonal_predictions = impersonal_clf.predict(personal_features)
    
    # get ranking
    confidence_ranking = np.argsort(np.max(impersonal_probabilities, axis=1))
    
    ranked_predictions = impersonal_predictions[confidence_ranking]
    ranked_truth = personal_labels[confidence_ranking]
    
    result_row = {"user_id" : user_id,
                  "top30" : accuracy_score(ranked_truth[-30:], ranked_predictions[-30:]),
                  "bottom30" : accuracy_score(ranked_truth[:30], ranked_predictions[:30]),
                  "overall" : accuracy_score(personal_labels, impersonal_predictions)}
    result_rows.append(result_row)
    
finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

results_df_exp2 = pd.DataFrame(result_rows)

# Experiment 3