In [55]:
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit, ShuffleSplit
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import brier_score_loss, accuracy_score, precision_recall_curve, average_precision_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler, label_binarize

In [179]:
from wisdm import wisdm
import random
import numpy as np
import pandas as pd
from collections import Counter
import time
from scipy import stats

In [3]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go

# Uncertainty Sampling

# How good is an impersonal model at assessing the probability of it's prediction being correct

When we think about a model's ability to assess the probability of it's prediction being correct, we should perhaps think of this in terms of precision. Generally, when we talk about precision, we mean the number predictions we made
for a particular label that were correct divided by the number of predictions we made for that same label that were false.  If our classifier's confidence is well calibrated, then the high confidence predictions should be nearly all correct and it's low confidence predictions should be nearly all incorrect and the precision among the high confidence predictions should be higher than it's precision with all predictions.

To understand how a model that dynamically becomes more personalized as soon as the user starts using it, we'll have to first assess the impersonal model's confidence function.

If we think of uncertainty sampling as an information retrieval problem, then we want a method that his high precision (selected instances are truly impactful to the classifier) and lower recall (so that the user is not labeling too many instances).

In [7]:
# these are the users that were ignored in the personal analysis in the first section for having too few data
users_to_ignore = ['1097','1205','1247','1269','1276','1277','1280','1480','1491','1511','1518','1531','1554',
                   '1679','1683','1696', '1723','1724','1726','1745','1750','1757','1761','1763','1797','1802',
                   '1813','1814']

In [216]:
wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=10000, n_jobs=26)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
results = {}

print("predicting...")
for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    #print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    personal_label_counts = Counter(personal_labels)

    scaled_test_X = impersonal_scaler.transform(personal_features)
    
    predictions = impersonal_clf.predict(scaled_test_X)
    prediction_probas = impersonal_clf.predict_proba(scaled_test_X)
    
    results[user_id] = {"probabilities" : prediction_probas,
                        "predictions" : predictions,
                        "true_labels" : personal_labels}
finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))

Training...
Finished Training in 23.314424991607666 seconds
predicting...
Finished predicting in 111.67179775238037 seconds


# In aggregate : How accurate are our most confident predictions?

In [219]:
top_30_accs = {}
top_30_confidence = []

bottom_30_accs = {}
bottom_30_confidence = []

scores = []

for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    
    probas = results[user_id]['probabilities']
    preds = results[user_id]['predictions']
    true = results[user_id]['true_labels']
    
    score = accuracy_score(true, preds)
    scores.append(score)
    
    confidence_ranking = np.argsort(np.max(probas, axis=1))
    
    most_confident = np.mean(np.max(probas[confidence_ranking[-30:]],axis=1))
    top_30_confidence.append(most_confident)
    most_confident_preds = preds[confidence_ranking[-30:]]
    most_confident_truth = true[confidence_ranking[-30:]]
    top_30_accs[user_id] = accuracy_score(most_confident_truth, most_confident_preds)
    
    least_confident =  np.mean(np.max(probas[confidence_ranking[:30]],axis=1))
    bottom_30_confidence.append(least_confident)
    least_confident_preds = preds[confidence_ranking[:30]]
    least_confident_truth = true[confidence_ranking[:30]]
    bottom_30_accs[user_id] = accuracy_score(least_confident_truth, least_confident_preds)

m = np.mean(scores)
sd = stats.sem(scores)
print("All Accuracy : M=%.3f, SEM=%.3f" % (m,stats.sem(scores)))
top_30_acc_vals = [v for v in top_30_accs.values()]
print("Top 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(top_30_acc_vals), stats.sem(top_30_acc_vals)))
bottom_30_acc_vals = [v for v in bottom_30_accs.values()]
print("Bottom 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(bottom_30_acc_vals), stats.sem(bottom_30_acc_vals)))

All Accuracy : M=0.379, SEM=0.060
Top 30 Confident Accuracy : M=0.583, SD=0.077
Bottom 30 Confident Accuracy : M=0.199, SD=0.054


In [220]:
np.mean(top_30_confidence)

0.52335897435897438

In [221]:
np.mean(bottom_30_confidence)

0.37373705128205131

In [222]:
trace = go.Bar(y=[np.mean(scores), np.mean(top_30_acc_vals), np.mean(bottom_30_acc_vals)],
               x=["All Values", "30 Most Confident", "30 Least Confident"],
               error_y=dict(type="data", 
                            array=[stats.sem(scores), stats.sem(top_30_acc_vals), stats.sem(bottom_30_acc_vals)],
                            visible=True)
              )
data=[trace]

layout=go.Layout(yaxis=dict(range=[0, 1.0], title="accuracy"))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

# [Calibrating The Model](http://scikit-learn.org/stable/modules/calibration.html)

In [159]:
wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()

impersonal_clf.set_params(n_estimators=10000, n_jobs=30)

start=time.time()
print("Training...")
impersonal_clf = CalibratedClassifierCV(impersonal_clf, method="sigmoid")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)

results = {}

print("predicting...")
for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    #print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    personal_label_counts = Counter(personal_labels)

    scaled_test_X = impersonal_scaler.transform(personal_features)
    
    predictions = impersonal_clf.predict(scaled_test_X)
    prediction_probas = impersonal_clf.predict_proba(scaled_test_X)
    
    results[user_id] = {"probabilities" : prediction_probas,
                        "predictions" : predictions,
                        "true_labels" : personal_labels}
finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))

Training...
Finished Training in 74.36170840263367 seconds
predicting...
User : 194
User : 998
User : 1104
User : 1117
User : 1238
User : 1246
User : 1253
User : 1274
User : 1319
User : 1320
User : 1477
User : 1512
User : 1559
User : 1603
User : 1676
User : 1703
User : 1707
User : 1742
User : 1758
User : 1759
User : 1774
User : 1775
User : 1778
User : 1793
User : 1799
User : 1809
Finished predicting in 329.64492988586426 seconds


In [187]:
calibrated_top_30_accs = {}
calibrated_top_30_confidence = []

calibrated_bottom_30_accs = {}
calibrated_bottom_30_confidence = []

calibrated_scores = []

for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    
    probas = results[user_id]['probabilities']
    preds = results[user_id]['predictions']
    true = results[user_id]['true_labels']
    
    score = accuracy_score(true, preds)
    calibrated_scores.append(score)
    
    confidence_ranking = np.argsort(np.max(probas, axis=1))
    
    most_confident = np.max(probas[confidence_ranking[-30:]],axis=1)
    calibrated_top_30_confidence.append(most_confident)
    most_confident_preds = preds[confidence_ranking[-30:]]
    most_confident_truth = true[confidence_ranking[-30:]]
    calibrated_top_30_accs[user_id] = accuracy_score(most_confident_truth, most_confident_preds)
    
    least_confident = np.max(probas[confidence_ranking[:30]],axis=1)
    calibrated_bottom_30_confidence.append(least_confident)
    least_confident_preds = preds[confidence_ranking[:30]]
    least_confident_truth = true[confidence_ranking[:30]]
    calibrated_bottom_30_accs[user_id] = accuracy_score(least_confident_truth, least_confident_preds)

m = np.mean(calibrated_scores)
sd = stats.sem(calibrated_scores)
print("All Accuracy : M=%.3f, SD=%.3f" % (m,sd))
calibrated_top_30_acc_vals = [v for v in calibrated_top_30_accs.values()]
print("Top 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(calibrated_top_30_acc_vals), stats.sem(calibrated_top_30_acc_vals)))
calibrated_bottom_30_acc_vals = [v for v in calibrated_bottom_30_accs.values()]
print("Bottom 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(calibrated_bottom_30_acc_vals), stats.sem(calibrated_bottom_30_acc_vals)))

All Accuracy : M=0.410, SD=0.058
Top 30 Confident Accuracy : M=0.572, SD=0.070
Bottom 30 Confident Accuracy : M=0.227, SD=0.056


In [192]:
np.mean([np.mean(c) for c in calibrated_top_30_confidence])

0.64298118474689592

In [193]:
np.mean([np.mean(c) for c in calibrated_bottom_30_confidence])

0.44295347213634523

In [184]:
trace = go.Bar(y=[np.mean(calibrated_scores), np.mean(calibrated_top_30_acc_vals), np.mean(calibrated_bottom_30_acc_vals)],
               x=["All Values", "30 Most Confident", "30 Least Confident"],
               error_y=dict(type="data", 
                            array=[stats.sem(calibrated_scores), stats.sem(calibrated_top_30_acc_vals), stats.sem(calibrated_bottom_30_acc_vals)],
                            visible=True)
              )
data=[trace]

layout=go.Layout(yaxis=dict(range=[0, 1.0], title="accuracy"))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [185]:
calibration_trace = go.Bar(y=[np.mean(calibrated_scores), np.mean(calibrated_top_30_acc_vals), np.mean(calibrated_bottom_30_acc_vals)],
               x=["All Values", "30 Most Confident", "30 Least Confident"],
               error_y=dict(type="data", 
                            array=[stats.sem(calibrated_scores), stats.sem(calibrated_top_30_acc_vals), stats.sem(calibrated_bottom_30_acc_vals)],
                            visible=True),
               name="No Calibration"
              )

no_calibration_trace = go.Bar(y=[np.mean(scores), np.mean(top_30_acc_vals), np.mean(bottom_30_acc_vals)],
               x=["All Values", "30 Most Confident", "30 Least Confident"],
               error_y=dict(type="data", 
                            array=[stats.sem(scores), stats.sem(top_30_acc_vals), stats.sem(bottom_30_acc_vals)],
                            visible=True),
               name="With Calibration"
              )
data=[no_calibration_trace, calibration_trace]

layout=go.Layout(yaxis=dict(range=[0, 1.0], title="accuracy"),
                 barmode='group')
fig = go.Figure(data=data, layout=layout)
iplot(fig)

We can use this probability calibration method, but it does not appear to make much of a difference

# Determine mean accuracy, precision, recall for participants

In [168]:
uniform_prob = 1. / len(labels)
uniform_prob # equal probability of a label being true

0.2

In [170]:
def uniform_margin_ranking(probs):
    diffs_from_uniform = []
    
    for ind,prob in enumerate(probs):
        max_prob = np.max(prob)
        diffs_from_uniform.append(max_prob - uniform_prob)
    rankings = np.argsort(diffs_from_uniform)
    return rankings

In [198]:
def top_margin_ranking(probs):
    diffs_from_second = []
    
    for ind, prob in enumerate(probs):
        sorted_probs = np.argsort(prob)
        max_prob = prob[sorted_probs[-1]]
        second_max_prob = prob[sorted_probs[-2]]
        diffs_from_second.append(max_prob - second_max_prob)
    rankings = np.argsort(diffs_from_second)
    return rankings

In [205]:
def entropy_ranking(probs):
    entropies = []
    
    for ind, prob in enumerate(probs):
        en = [p*np.log(p) for p in prob]
        entropies.append(np.sum(en))
    rankings = np.argsort(entropies)
    return rankings

In [207]:
uniform_margin_top_30_accs = {}
uniform_margin_top_30_confidence = []

uniform_margin_bottom_30_accs = {}
uniform_margin_bottom_30_confidence = []

scores = []

for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    
    probas = np.array(results[user_id]['probabilities'])
    preds = np.array(results[user_id]['predictions'])
    true = np.array(results[user_id]['true_labels'])
    
    score = accuracy_score(true, preds)
    scores.append(score)
    
    confidence_ranking = uniform_margin_ranking(probas)
    
    most_confident = np.max(probas[confidence_ranking[-30:]], axis=1)
    uniform_margin_top_30_confidence.append(most_confident)
    most_confident_preds = preds[confidence_ranking[-30:]]
    most_confident_truth = true[confidence_ranking[-30:]]
    uniform_margin_top_30_accs[user_id] = accuracy_score(most_confident_truth, most_confident_preds)
    
    uniform_margin_least_confident = np.max(probas[confidence_ranking[:30]], axis=1)
    uniform_margin_bottom_30_confidence.append(least_confident)
    least_confident_preds = preds[confidence_ranking[:30]]
    least_confident_truth = true[confidence_ranking[:30]]
    uniform_margin_bottom_30_accs[user_id] = accuracy_score(least_confident_truth, least_confident_preds)

m = np.mean(scores)
sd = np.std(scores)
print("All Accuracy : M=%.3f, SEM=%.3f" % (m,stats.sem(scores)))
uniform_margin_top_30_acc_vals = [v for v in uniform_margin_top_30_accs.values()]
print("Top 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(uniform_margin_top_30_acc_vals), np.std(uniform_margin_top_30_acc_vals)))
uniform_margin_bottom_30_acc_vals = [v for v in uniform_margin_bottom_30_accs.values()]
print("Bottom 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(uniform_margin_bottom_30_acc_vals), np.std(uniform_margin_bottom_30_acc_vals)))

All Accuracy : M=0.410, SEM=0.058
Top 30 Confident Accuracy : M=0.572, SD=0.351
Bottom 30 Confident Accuracy : M=0.227, SD=0.278


In [208]:
top_margin_top_30_accs = {}
top_margin_top_30_confidence = []

top_margin_bottom_30_accs = {}
top_margin_bottom_30_confidence = []

scores = []

for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    
    probas = np.array(results[user_id]['probabilities'])
    preds = np.array(results[user_id]['predictions'])
    true = np.array(results[user_id]['true_labels'])
    
    score = accuracy_score(true, preds)
    scores.append(score)
    
    confidence_ranking = top_margin_ranking(probas)
    
    most_confident = np.max(probas[confidence_ranking[-30:]], axis=1)
    top_margin_top_30_confidence.append(most_confident)
    most_confident_preds = preds[confidence_ranking[-30:]]
    most_confident_truth = true[confidence_ranking[-30:]]
    top_margin_top_30_accs[user_id] = accuracy_score(most_confident_truth, most_confident_preds)
    
    top_margin_least_confident = np.max(probas[confidence_ranking[:30]], axis=1)
    top_margin_bottom_30_confidence.append(least_confident)
    least_confident_preds = preds[confidence_ranking[:30]]
    least_confident_truth = true[confidence_ranking[:30]]
    top_margin_bottom_30_accs[user_id] = accuracy_score(least_confident_truth, least_confident_preds)

m = np.mean(scores)
sd = np.std(scores)
print("All Accuracy : M=%.3f, SEM=%.3f" % (m,stats.sem(scores)))
top_margin_top_30_acc_vals = [v for v in top_margin_top_30_accs.values()]
print("Top 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(top_margin_top_30_acc_vals), np.std(top_margin_top_30_acc_vals)))
top_margin_bottom_30_acc_vals = [v for v in top_margin_bottom_30_accs.values()]
print("Bottom 30 Confident Accuracy : M=%.3f, SD=%.3f" % (np.mean(top_margin_bottom_30_acc_vals), np.std(top_margin_bottom_30_acc_vals)))

All Accuracy : M=0.410, SEM=0.058
Top 30 Confident Accuracy : M=0.473, SD=0.378
Bottom 30 Confident Accuracy : M=0.401, SD=0.300


In [210]:
entropy_top_30_accs = {}
entropy_top_30_confidence = []

entropy_bottom_30_accs = {}
entropy_bottom_30_confidence = []

scores = []

for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    
    probas = np.array(results[user_id]['probabilities'])
    preds = np.array(results[user_id]['predictions'])
    true = np.array(results[user_id]['true_labels'])
    
    score = accuracy_score(true, preds)
    scores.append(score)
    
    confidence_ranking = entropy_ranking(probas)
    
    most_confident = np.max(probas[confidence_ranking[-30:]], axis=1)
    entropy_top_30_confidence.append(most_confident)
    most_confident_preds = preds[confidence_ranking[-30:]]
    most_confident_truth = true[confidence_ranking[-30:]]
    entropy_top_30_accs[user_id] = accuracy_score(most_confident_truth, most_confident_preds)
    
    entropy_least_confident = np.max(probas[confidence_ranking[:30]], axis=1)
    entropy_bottom_30_confidence.append(least_confident)
    least_confident_preds = preds[confidence_ranking[:30]]
    least_confident_truth = true[confidence_ranking[:30]]
    entropy_bottom_30_accs[user_id] = accuracy_score(least_confident_truth, least_confident_preds)

m = np.mean(scores)
sd = np.std(scores)
print("All Accuracy : M=%.3f, SEM=%.3f" % (m,stats.sem(scores)))
entropy_top_30_acc_vals = [v for v in entropy_top_30_accs.values()]
print("Top 30 Confident Accuracy : M=%.3f, sem=%.3f" % (np.mean(entropy_top_30_acc_vals), stats.sem(entropy_top_30_acc_vals)))
entropy_bottom_30_acc_vals = [v for v in entropy_bottom_30_accs.values()]
print("Bottom 30 Confident Accuracy : M=%.3f, sem=%.3f" % (np.mean(entropy_bottom_30_acc_vals), stats.sem(entropy_bottom_30_acc_vals)))

All Accuracy : M=0.410, SEM=0.058
Top 30 Confident Accuracy : M=0.618, sem=0.068
Bottom 30 Confident Accuracy : M=0.201, sem=0.053


In [238]:
x = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])
mask = np.ones(x.shape[0], dtype=bool)
mask[[1]] = False
x[mask]

array([[1, 2, 3],
       [7, 8, 9]])

# Uncertainty Sampling

In [228]:
def shuffle_rows(features, labels):
    permutation = np.random.permutation(features.shape[0])
    return features[permutation], labels[permutation]

## The number of personal samples = 30
   ### For either the personal or hybrid model

In [260]:
wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=2000, n_jobs=4)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

number_of_personal_samples = 30
ignored_users = []
print("predicting...")
for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    sss = StratifiedShuffleSplit(n_splits=4, test_size=30, train_size = number_of_personal_samples)
    
    personal_scores = []
    impersonal_scores = []
    hybrid_scores = []
    
    shuffle_count = 0
    try:
        for train_index, test_index in sss.split(personal_features, personal_labels):
            # data for personal model
            random_personal_features = personal_features[train_index]
            random_personal_labels = personal_labels[train_index]

            # create an active pool of everything not in the test set for active learning / hybrid model
            active_pool_mask = np.ones(personal_labels.shape, dtype=bool)
            active_pool_mask[test_index] = False
            active_pool_features = personal_features[active_pool_mask]
            active_pool_labels = personal_labels[active_pool_mask]

            # test set
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # build personal model and predict
            personal_scaler = StandardScaler().fit(random_personal_features)
            scaled_personal_features = personal_scaler.transform(random_personal_features)
            scaled_test_features = personal_scaler.transform(test_features)

            personal_clf = wisdm.weka_RF()
            personal_clf.set_params(n_estimators=2000, n_jobs=4)
            personal_clf.fit(scaled_personal_features, random_personal_labels)
            personal_score = accuracy_score(test_labels, personal_clf.predict(scaled_test_features))
            personal_scores.append(personal_score)

            # build impersonal model and predict
            scaled_test_features = impersonal_scaler.transform(test_features)
            impersonal_score = accuracy_score(test_labels, impersonal_clf.predict(scaled_test_features))
            impersonal_scores.append(impersonal_score)

            # determine active samples
            scaled_active_pool_features = impersonal_scaler.transform(active_pool_features)
            impersonal_probabilities = impersonal_clf.predict_proba(scaled_active_pool_features)

            confidence_ranking = np.argsort(np.max(impersonal_probabilities, axis=1))

            least_certain_features = active_pool_features[confidence_ranking[:number_of_personal_samples]]
            least_certain_labels = active_pool_labels[confidence_ranking[:number_of_personal_samples]]

            hybrid_features = np.vstack((impersonal_features, least_certain_features))
            hybrid_labels = np.hstack((impersonal_labels, least_certain_labels))
            hybrid_features, hybrid_labels = shuffle_rows(hybrid_features, hybrid_labels)

            hybrid_scaler = StandardScaler().fit(hybrid_features)
            scaled_hybrid_features = hybrid_scaler.transform(hybrid_features)
            scaled_test_features = hybrid_scaler.transform(test_features)

            hybrid_clf = wisdm.weka_RF()
            hybrid_clf.set_params(n_estimators=2000, n_jobs=4)
            hybrid_clf.fit(scaled_hybrid_features, hybrid_labels)

            hybrid_score = accuracy_score(test_labels, hybrid_clf.predict(scaled_test_features))
            hybrid_scores.append(hybrid_score)
            print("\t impersonal acc : %.3f" % impersonal_score)
            print("\t personal acc : %.3f" % personal_score)
            print("\t hybrid acc : %.3f" % hybrid_score)
            print("")

            result_row = {"user_id" : user_id,
                          "shuffle" : shuffle_count,
                          "impersonal" : impersonal_score,
                          "personal" : personal_score,
                          "hybrid" : hybrid_score}
            result_rows.append(result_row)
            shuffle_count += 1
    except ValueError as ve:
        if "The least populated class" in ve.args[0]:
            print("\tNot enough labeled data for %s" % user_id)
            ignored_users.append(user_id)
            continue
        else:
            raise ve

finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

Training...
Finished Training in 6.937714099884033 seconds
predicting...
User : 194
	 impersonal acc : 0.867
	 personal acc : 0.933
	 hybrid acc : 0.800

	 impersonal acc : 0.800
	 personal acc : 1.000
	 hybrid acc : 0.667

	 impersonal acc : 0.600
	 personal acc : 1.000
	 hybrid acc : 0.667

	 impersonal acc : 0.733
	 personal acc : 0.933
	 hybrid acc : 0.800

User : 998
	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

User : 1104
	 impersonal acc : 0.600
	 personal acc : 0.867
	 hybrid acc : 0.467

	 impersonal acc : 0.733
	 personal acc : 0.867
	 hybrid acc : 0.533

	 impersonal acc : 0.467
	 personal acc : 1.000
	 hybrid acc : 0.400

	 impersonal acc : 0.400
	 personal acc : 1.000
	 hybrid acc : 0.400

User : 1117
	 impersonal acc : 0.533
	 personal acc :

In [261]:
results_df_30 = pd.DataFrame(result_rows)

In [262]:
results_df_30.describe()

Unnamed: 0,hybrid,impersonal,personal,shuffle
count,100.0,100.0,100.0,100.0
mean,0.800667,0.362,0.960667,1.5
std,0.203559,0.300431,0.078764,1.123666
min,0.4,0.0,0.6,0.0
25%,0.666667,0.066667,0.933333,0.75
50%,0.866667,0.333333,1.0,1.5
75%,1.0,0.533333,1.0,2.25
max,1.0,1.0,1.0,3.0


In [263]:
mean_hybrid_scores_by_user = []
mean_impersonal_scores_by_user = []
mean_personal_scores_by_user = []

for user_id in results_df_30['user_id'].unique():
    user_df = results_df_30[results_df_30['user_id'] == user_id]
    mean_hybrid_scores_by_user.append(user_df['hybrid'].mean())
    mean_impersonal_scores_by_user.append(user_df['impersonal'].mean())
    mean_personal_scores_by_user.append(user_df['personal'].mean())
    
impersonal_trace = go.Box(y=mean_impersonal_scores_by_user,
                          name="Impersonal",
                          marker=dict(color='red'),
                          boxpoints='all',
                          jitter=0.3,
                          pointpos=-0.5)

personal_trace = go.Box(y=mean_personal_scores_by_user,
                        name="Personal",
                        marker=dict(color="blue"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

hybrid_trace = go.Box(y=mean_hybrid_scores_by_user,
                        name="Hybrid with 30 least certain samples",
                        marker=dict(color="purple"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

data = [impersonal_trace, personal_trace, hybrid_trace]
layout = go.Layout(yaxis=dict(title="Accuracy"), showlegend=False)
fig=go.Figure(data=data,layout=layout)
iplot(fig)

## The number of personal samples = 10
   ### For either the personal or hybrid model

In [290]:
wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=2000, n_jobs=4)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

number_of_personal_samples = 10
ignored_users = []
print("predicting...")
for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    sss = StratifiedShuffleSplit(n_splits=4, test_size=30, train_size = number_of_personal_samples)
    
    personal_scores = []
    impersonal_scores = []
    hybrid_scores = []
    
    shuffle_count = 0
    try:
        for train_index, test_index in sss.split(personal_features, personal_labels):
            # data for personal model
            random_personal_features = personal_features[train_index]
            random_personal_labels = personal_labels[train_index]

            # create an active pool of everything not in the test set for active learning / hybrid model
            active_pool_mask = np.ones(personal_labels.shape, dtype=bool)
            active_pool_mask[test_index] = False
            active_pool_features = personal_features[active_pool_mask]
            active_pool_labels = personal_labels[active_pool_mask]

            # test set
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # build personal model and predict
            personal_scaler = StandardScaler().fit(random_personal_features)
            scaled_personal_features = personal_scaler.transform(random_personal_features)
            scaled_test_features = personal_scaler.transform(test_features)

            personal_clf = wisdm.weka_RF()
            personal_clf.set_params(n_estimators=2000, n_jobs=4)
            personal_clf.fit(scaled_personal_features, random_personal_labels)
            personal_score = accuracy_score(test_labels, personal_clf.predict(scaled_test_features))
            personal_scores.append(personal_score)

            # build impersonal model and predict
            scaled_test_features = impersonal_scaler.transform(test_features)
            impersonal_score = accuracy_score(test_labels, impersonal_clf.predict(scaled_test_features))
            impersonal_scores.append(impersonal_score)

            # determine active samples
            scaled_active_pool_features = impersonal_scaler.transform(active_pool_features)
            impersonal_probabilities = impersonal_clf.predict_proba(scaled_active_pool_features)

            confidence_ranking = np.argsort(np.max(impersonal_probabilities, axis=1))

            least_certain_features = active_pool_features[confidence_ranking[:number_of_personal_samples]]
            least_certain_labels = active_pool_labels[confidence_ranking[:number_of_personal_samples]]

            hybrid_features = np.vstack((impersonal_features, least_certain_features))
            hybrid_labels = np.hstack((impersonal_labels, least_certain_labels))
            hybrid_features, hybrid_labels = shuffle_rows(hybrid_features, hybrid_labels)

            hybrid_scaler = StandardScaler().fit(hybrid_features)
            scaled_hybrid_features = hybrid_scaler.transform(hybrid_features)
            scaled_test_features = hybrid_scaler.transform(test_features)

            hybrid_clf = wisdm.weka_RF()
            hybrid_clf.set_params(n_estimators=2000, n_jobs=4)
            hybrid_clf.fit(scaled_hybrid_features, hybrid_labels)

            hybrid_score = accuracy_score(test_labels, hybrid_clf.predict(scaled_test_features))
            hybrid_scores.append(hybrid_score)
            print("\t impersonal acc : %.3f" % impersonal_score)
            print("\t personal acc : %.3f" % personal_score)
            print("\t hybrid acc : %.3f" % hybrid_score)
            print("")

            result_row = {"user_id" : user_id,
                          "shuffle" : shuffle_count,
                          "impersonal" : impersonal_score,
                          "personal" : personal_score,
                          "hybrid" : hybrid_score}
            result_rows.append(result_row)
            shuffle_count += 1
    except ValueError as ve:
        if "The least populated class" in ve.args[0]:
            print("\tNot enough labeled data for %s" % user_id)
            ignored_users.append(user_id)
            continue
        else:
            raise ve

finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

Training...
Finished Training in 8.901245594024658 seconds
predicting...
User : 194
	 impersonal acc : 0.633
	 personal acc : 0.967
	 hybrid acc : 0.667

	 impersonal acc : 0.767
	 personal acc : 0.900
	 hybrid acc : 0.733

	 impersonal acc : 0.733
	 personal acc : 0.933
	 hybrid acc : 0.733

	 impersonal acc : 0.767
	 personal acc : 1.000
	 hybrid acc : 0.767

User : 998
	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.133

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.067

User : 1104
	 impersonal acc : 0.533
	 personal acc : 0.800
	 hybrid acc : 0.433

	 impersonal acc : 0.633
	 personal acc : 0.933
	 hybrid acc : 0.500

	 impersonal acc : 0.633
	 personal acc : 0.900
	 hybrid acc : 0.400

	 impersonal acc : 0.567
	 personal acc : 0.833
	 hybrid acc : 0.533

User : 1117
	 impersonal acc : 0.633
	 personal acc :

In [291]:
results_df_10 = pd.DataFrame(result_rows)

In [292]:
results_df_10.describe()

Unnamed: 0,hybrid,impersonal,personal,shuffle
count,100.0,100.0,100.0,100.0
mean,0.689333,0.370333,0.921667,1.5
std,0.276416,0.309552,0.101213,1.123666
min,0.0,0.0,0.5,0.0
25%,0.466667,0.033333,0.9,0.75
50%,0.733333,0.416667,0.966667,1.5
75%,0.933333,0.633333,1.0,2.25
max,1.0,1.0,1.0,3.0


In [293]:
mean_hybrid_scores_by_user = []
mean_impersonal_scores_by_user = []
mean_personal_scores_by_user = []

for user_id in results_df_10['user_id'].unique():
    user_df = results_df_10[results_df_10['user_id'] == user_id]
    mean_hybrid_scores_by_user.append(user_df['hybrid'].mean())
    mean_impersonal_scores_by_user.append(user_df['impersonal'].mean())
    mean_personal_scores_by_user.append(user_df['personal'].mean())
    
impersonal_trace = go.Box(y=mean_impersonal_scores_by_user,
                          name="Impersonal",
                          marker=dict(color='red'),
                          boxpoints='all',
                          jitter=0.3,
                          pointpos=-0.5)

personal_trace = go.Box(y=mean_personal_scores_by_user,
                        name="Personal",
                        marker=dict(color="blue"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

hybrid_trace = go.Box(y=mean_hybrid_scores_by_user,
                        name="Hybrid with 10 least certain samples",
                        marker=dict(color="purple"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

data = [impersonal_trace, personal_trace, hybrid_trace]
layout = go.Layout(yaxis=dict(title="Accuracy"), showlegend=False)
fig=go.Figure(data=data,layout=layout)
iplot(fig)

In [295]:
personal_is_best_10 = []
impersonal_is_best_10 = []
hybrid_is_best_10 = []

user_ids = results_df_10['user_id'].unique()

for user_id in user_ids:
    user_df = results_df_10[results_df_10['user_id'] == user_id]
    accs = [user_df['personal'].mean(), user_df['impersonal'].mean(), user_df['hybrid'].mean()]
    #print(accs)
    best = np.argmax(accs)
    #print(best)
    if best == 0:
        personal_is_best_10.append(user_id)
    if best == 1:
        impersonal_is_best_10.append(user_id)
    if best == 2:
        hybrid_is_best_10.append(user_id)
print("%s users get best from personal" % len(personal_is_best_10))
print("%s users get best from impersonal" % len(impersonal_is_best_10))
print("%s users get best from hybrid" % len(hybrid_is_best_10))

personal_is_best_30 = []
impersonal_is_best_30 = []
hybrid_is_best_30 = []

user_ids = results_df_30['user_id'].unique()

for user_id in user_ids:
    user_df = results_df_30[results_df_30['user_id'] == user_id]
    accs = [user_df['personal'].mean(), user_df['impersonal'].mean(), user_df['hybrid'].mean()]
    #print(accs)
    best = np.argmax(accs)
    #print(best)
    if best == 0:
        personal_is_best_30.append(user_id)
    if best == 1:
        impersonal_is_best_30.append(user_id)
    if best == 2:
        hybrid_is_best_30.append(user_id)
print("%s users get best from personal" % len(personal_is_best_30))
print("%s users get best from impersonal" % len(impersonal_is_best_30))
print("%s users get best from hybrid" % len(hybrid_is_best_30))

trace1 = go.Bar(
    x=['10 samples', '30 samples'],
    y=[len(personal_is_best_10), len(personal_is_best_30)],
    marker=dict(color="blue"),
    name='Personal Data'
)

trace2 = go.Bar(
    x=['10 samples', '30 samples'],
    y=[len(hybrid_is_best_10), len(hybrid_is_best_30)],
    marker=dict(color="green"),
    name='Hybrid Data'
)

data = [trace1, trace2]
layout = go.Layout(
    barmode='stack',
    xaxis=dict(title="# of personal samples"),
    yaxis=dict(title="# of users")
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stacked-bar')


23 users get best from personal
0 users get best from impersonal
2 users get best from hybrid


NameError: name 'results_df_30' is not defined

# Most Confident Sampling?

## The number of personal samples = 10
   ### For either the personal or hybrid model

In [272]:
wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=2000, n_jobs=4)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

number_of_personal_samples = 10
ignored_users = []
print("predicting...")
for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    sss = StratifiedShuffleSplit(n_splits=4, test_size=30, train_size = number_of_personal_samples)
    
    personal_scores = []
    impersonal_scores = []
    hybrid_scores = []
    
    shuffle_count = 0
    try:
        for train_index, test_index in sss.split(personal_features, personal_labels):
            # data for personal model
            random_personal_features = personal_features[train_index]
            random_personal_labels = personal_labels[train_index]

            # create an active pool of everything not in the test set for active learning / hybrid model
            active_pool_mask = np.ones(personal_labels.shape, dtype=bool)
            active_pool_mask[test_index] = False
            active_pool_features = personal_features[active_pool_mask]
            active_pool_labels = personal_labels[active_pool_mask]

            # test set
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # build personal model and predict
            personal_scaler = StandardScaler().fit(random_personal_features)
            scaled_personal_features = personal_scaler.transform(random_personal_features)
            scaled_test_features = personal_scaler.transform(test_features)

            personal_clf = wisdm.weka_RF()
            personal_clf.set_params(n_estimators=2000, n_jobs=4)
            personal_clf.fit(scaled_personal_features, random_personal_labels)
            personal_score = accuracy_score(test_labels, personal_clf.predict(scaled_test_features))
            personal_scores.append(personal_score)

            # build impersonal model and predict
            scaled_test_features = impersonal_scaler.transform(test_features)
            impersonal_score = accuracy_score(test_labels, impersonal_clf.predict(scaled_test_features))
            impersonal_scores.append(impersonal_score)

            # determine active samples
            scaled_active_pool_features = impersonal_scaler.transform(active_pool_features)
            impersonal_probabilities = impersonal_clf.predict_proba(scaled_active_pool_features)

            confidence_ranking = np.argsort(np.max(impersonal_probabilities, axis=1))

            least_certain_features = active_pool_features[confidence_ranking[-number_of_personal_samples:]]
            least_certain_labels = active_pool_labels[confidence_ranking[-number_of_personal_samples:]]

            hybrid_features = np.vstack((impersonal_features, least_certain_features))
            hybrid_labels = np.hstack((impersonal_labels, least_certain_labels))
            hybrid_features, hybrid_labels = shuffle_rows(hybrid_features, hybrid_labels)

            hybrid_scaler = StandardScaler().fit(hybrid_features)
            scaled_hybrid_features = hybrid_scaler.transform(hybrid_features)
            scaled_test_features = hybrid_scaler.transform(test_features)

            hybrid_clf = wisdm.weka_RF()
            hybrid_clf.set_params(n_estimators=2000, n_jobs=4)
            hybrid_clf.fit(scaled_hybrid_features, hybrid_labels)

            hybrid_score = accuracy_score(test_labels, hybrid_clf.predict(scaled_test_features))
            hybrid_scores.append(hybrid_score)
            print("\t impersonal acc : %.3f" % impersonal_score)
            print("\t personal acc : %.3f" % personal_score)
            print("\t hybrid acc : %.3f" % hybrid_score)
            print("")

            result_row = {"user_id" : user_id,
                          "shuffle" : shuffle_count,
                          "impersonal" : impersonal_score,
                          "personal" : personal_score,
                          "hybrid" : hybrid_score}
            result_rows.append(result_row)
            shuffle_count += 1
    except ValueError as ve:
        if "The least populated class" in ve.args[0]:
            print("\tNot enough labeled data for %s" % user_id)
            ignored_users.append(user_id)
            continue
        else:
            raise ve

finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

Training...
Finished Training in 6.9923577308654785 seconds
predicting...
User : 194
	 impersonal acc : 0.733
	 personal acc : 0.867
	 hybrid acc : 0.700

	 impersonal acc : 0.767
	 personal acc : 0.967
	 hybrid acc : 0.733

	 impersonal acc : 0.667
	 personal acc : 0.933
	 hybrid acc : 0.700

	 impersonal acc : 0.667
	 personal acc : 0.900
	 hybrid acc : 0.700

User : 998
	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.933

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.967

User : 1104
	 impersonal acc : 0.600
	 personal acc : 0.767
	 hybrid acc : 0.633

	 impersonal acc : 0.567
	 personal acc : 0.900
	 hybrid acc : 0.500

	 impersonal acc : 0.600
	 personal acc : 0.933
	 hybrid acc : 0.567

	 impersonal acc : 0.600
	 personal acc : 0.800
	 hybrid acc : 0.600

User : 1117
	 impersonal acc : 0.633
	 personal acc 

In [276]:
results_df = pd.DataFrame(result_rows)

In [277]:
results_df.describe()

Unnamed: 0,hybrid,impersonal,personal,shuffle
count,100.0,100.0,100.0,100.0
mean,0.592333,0.371667,0.916333,1.5
std,0.265479,0.305409,0.099211,1.123666
min,0.033333,0.0,0.533333,0.0
25%,0.466667,0.058333,0.866667,0.75
50%,0.6,0.4,0.95,1.5
75%,0.733333,0.608333,1.0,2.25
max,1.0,1.0,1.0,3.0


In [279]:
mean_hybrid_scores_by_user = []
mean_impersonal_scores_by_user = []
mean_personal_scores_by_user = []

for user_id in results_df['user_id'].unique():
    user_df = results_df[results_df['user_id'] == user_id]
    mean_hybrid_scores_by_user.append(user_df['hybrid'].mean())
    mean_impersonal_scores_by_user.append(user_df['impersonal'].mean())
    mean_personal_scores_by_user.append(user_df['personal'].mean())
    
impersonal_trace = go.Box(y=mean_impersonal_scores_by_user,
                          name="Impersonal",
                          marker=dict(color='red'),
                          boxpoints='all',
                          jitter=0.3,
                          pointpos=-0.5)

personal_trace = go.Box(y=mean_personal_scores_by_user,
                        name="Personal",
                        marker=dict(color="blue"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

hybrid_trace = go.Box(y=mean_hybrid_scores_by_user,
                        name="Hybrid with 10 most certain samples",
                        marker=dict(color="purple"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

data = [impersonal_trace, personal_trace, hybrid_trace]
layout = go.Layout(yaxis=dict(title="Accuracy"))
fig=go.Figure(data=data,layout=layout)
iplot(fig)

In [281]:
number_of_personal_samples = 30
n_trees = 3000
n_cores = 8

wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

ignored_users = []
print("predicting...")
for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    sss = StratifiedShuffleSplit(n_splits=4, test_size=30, train_size = number_of_personal_samples)
    
    personal_scores = []
    impersonal_scores = []
    hybrid_scores = []
    
    shuffle_count = 0
    try:
        for train_index, test_index in sss.split(personal_features, personal_labels):
            # data for personal model
            random_personal_features = personal_features[train_index]
            random_personal_labels = personal_labels[train_index]

            # create an active pool of everything not in the test set for active learning / hybrid model
            active_pool_mask = np.ones(personal_labels.shape, dtype=bool)
            active_pool_mask[test_index] = False
            active_pool_features = personal_features[active_pool_mask]
            active_pool_labels = personal_labels[active_pool_mask]

            # test set
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # build personal model and predict
            personal_scaler = StandardScaler().fit(random_personal_features)
            scaled_personal_features = personal_scaler.transform(random_personal_features)
            scaled_test_features = personal_scaler.transform(test_features)

            personal_clf = wisdm.weka_RF()
            personal_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)
            personal_clf.fit(scaled_personal_features, random_personal_labels)
            personal_score = accuracy_score(test_labels, personal_clf.predict(scaled_test_features))
            personal_scores.append(personal_score)

            # build impersonal model and predict
            scaled_test_features = impersonal_scaler.transform(test_features)
            impersonal_score = accuracy_score(test_labels, impersonal_clf.predict(scaled_test_features))
            impersonal_scores.append(impersonal_score)

            # determine active samples
            scaled_active_pool_features = impersonal_scaler.transform(active_pool_features)
            impersonal_probabilities = impersonal_clf.predict_proba(scaled_active_pool_features)

            confidence_ranking = np.argsort(np.max(impersonal_probabilities, axis=1))

            least_certain_features = active_pool_features[confidence_ranking[-number_of_personal_samples:]]
            least_certain_labels = active_pool_labels[confidence_ranking[-number_of_personal_samples:]]

            hybrid_features = np.vstack((impersonal_features, least_certain_features))
            hybrid_labels = np.hstack((impersonal_labels, least_certain_labels))
            hybrid_features, hybrid_labels = shuffle_rows(hybrid_features, hybrid_labels)

            hybrid_scaler = StandardScaler().fit(hybrid_features)
            scaled_hybrid_features = hybrid_scaler.transform(hybrid_features)
            scaled_test_features = hybrid_scaler.transform(test_features)

            hybrid_clf = wisdm.weka_RF()
            hybrid_clf.set_params(n_estimators=n_trees, n_jobs=n_cores)
            hybrid_clf.fit(scaled_hybrid_features, hybrid_labels)

            hybrid_score = accuracy_score(test_labels, hybrid_clf.predict(scaled_test_features))
            hybrid_scores.append(hybrid_score)
            print("\t impersonal acc : %.3f" % impersonal_score)
            print("\t personal acc : %.3f" % personal_score)
            print("\t hybrid acc : %.3f" % hybrid_score)
            print("")

            result_row = {"user_id" : user_id,
                          "shuffle" : shuffle_count,
                          "impersonal" : impersonal_score,
                          "personal" : personal_score,
                          "hybrid" : hybrid_score}
            result_rows.append(result_row)
            shuffle_count += 1
    except ValueError as ve:
        if "The least populated class" in ve.args[0]:
            print("\tNot enough labeled data for %s" % user_id)
            ignored_users.append(user_id)
            continue
        else:
            raise ve

finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

Training...
Finished Training in 8.983384132385254 seconds
predicting...
User : 194
	 impersonal acc : 0.667
	 personal acc : 0.967
	 hybrid acc : 0.733

	 impersonal acc : 0.667
	 personal acc : 0.967
	 hybrid acc : 0.767

	 impersonal acc : 0.767
	 personal acc : 0.933
	 hybrid acc : 0.800

	 impersonal acc : 0.733
	 personal acc : 1.000
	 hybrid acc : 0.733

User : 998
	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.967

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 1.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.967

User : 1104
	 impersonal acc : 0.600
	 personal acc : 0.967
	 hybrid acc : 0.600

	 impersonal acc : 0.633
	 personal acc : 0.967
	 hybrid acc : 0.633

	 impersonal acc : 0.667
	 personal acc : 1.000
	 hybrid acc : 0.633

	 impersonal acc : 0.533
	 personal acc : 1.000
	 hybrid acc : 0.600

User : 1117
	 impersonal acc : 0.633
	 personal acc :

ValueError: The sum of train_size and test_size = 60, should be smaller than the number of samples 51. Reduce test_size and/or train_size.

In [282]:
results_df = pd.DataFrame(result_rows)

In [284]:
mean_hybrid_scores_by_user = []
mean_impersonal_scores_by_user = []
mean_personal_scores_by_user = []

for user_id in results_df['user_id'].unique():
    user_df = results_df[results_df['user_id'] == user_id]
    mean_hybrid_scores_by_user.append(user_df['hybrid'].mean())
    mean_impersonal_scores_by_user.append(user_df['impersonal'].mean())
    mean_personal_scores_by_user.append(user_df['personal'].mean())
    
impersonal_trace = go.Box(y=mean_impersonal_scores_by_user,
                          name="Impersonal",
                          marker=dict(color='red'),
                          boxpoints='all',
                          jitter=0.3,
                          pointpos=-0.5)

personal_trace = go.Box(y=mean_personal_scores_by_user,
                        name="Personal",
                        marker=dict(color="blue"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

hybrid_trace = go.Box(y=mean_hybrid_scores_by_user,
                        name="Hybrid with 30 most certain samples",
                        marker=dict(color="purple"),
                        boxpoints="all",
                        jitter=0.3,
                        pointpos=-0.5)

data = [impersonal_trace, personal_trace, hybrid_trace]
layout = go.Layout(yaxis=dict(title="Accuracy"))
fig=go.Figure(data=data,layout=layout)
iplot(fig)

# Try with out stratification

In [268]:
wisdm.set_data(version="1", make_compatible=True)
impersonal_df = wisdm.remove_all_nan(wisdm.data_df)
impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_df['class'].as_matrix()])
impersonal_features = impersonal_df.as_matrix(columns=[impersonal_df.columns[1:-1]])
impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_train_X = impersonal_scaler.transform(impersonal_features)
impersonal_clf = wisdm.weka_RF()
impersonal_clf.set_params(n_estimators=2000, n_jobs=4)

start=time.time()
print("Training...")
impersonal_clf.fit(scaled_train_X, impersonal_labels)
finished_training = time.time()
print("Finished Training in %s seconds" % (finished_training - start))
wisdm.set_data(version="2", make_compatible=True)
result_rows = []

number_of_personal_samples = 10
ignored_users = []
print("predicting...")
for user_id in wisdm.user_ids:
    if user_id in users_to_ignore:
        continue
    print("User : %s" % user_id)
    user_df = wisdm.data_df[wisdm.data_df['user'] == user_id]
    
    personal_labels = np.array([t.decode("utf-8") for t in user_df['class'].as_matrix()])
    personal_features = user_df.as_matrix(columns=[user_df.columns[1:-1]])
    
    sss = ShuffleSplit(n_splits=4, test_size=30, train_size = number_of_personal_samples)
    
    personal_scores = []
    impersonal_scores = []
    hybrid_scores = []
    
    shuffle_count = 0
    try:
        for train_index, test_index in sss.split(personal_features):
            # data for personal model
            random_personal_features = personal_features[train_index]
            random_personal_labels = personal_labels[train_index]

            # create an active pool of everything not in the test set for active learning / hybrid model
            active_pool_mask = np.ones(personal_labels.shape, dtype=bool)
            active_pool_mask[test_index] = False
            active_pool_features = personal_features[active_pool_mask]
            active_pool_labels = personal_labels[active_pool_mask]

            # test set
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # build personal model and predict
            personal_scaler = StandardScaler().fit(random_personal_features)
            scaled_personal_features = personal_scaler.transform(random_personal_features)
            scaled_test_features = personal_scaler.transform(test_features)

            personal_clf = wisdm.weka_RF()
            personal_clf.set_params(n_estimators=2000, n_jobs=4)
            personal_clf.fit(scaled_personal_features, random_personal_labels)
            personal_score = accuracy_score(test_labels, personal_clf.predict(scaled_test_features))
            personal_scores.append(personal_score)

            # build impersonal model and predict
            scaled_test_features = impersonal_scaler.transform(test_features)
            impersonal_score = accuracy_score(test_labels, impersonal_clf.predict(scaled_test_features))
            impersonal_scores.append(impersonal_score)

            # determine active samples
            scaled_active_pool_features = impersonal_scaler.transform(active_pool_features)
            impersonal_probabilities = impersonal_clf.predict_proba(scaled_active_pool_features)

            confidence_ranking = np.argsort(np.max(impersonal_probabilities, axis=1))

            least_certain_features = active_pool_features[confidence_ranking[:number_of_personal_samples]]
            least_certain_labels = active_pool_labels[confidence_ranking[:number_of_personal_samples]]

            hybrid_features = np.vstack((impersonal_features, least_certain_features))
            hybrid_labels = np.hstack((impersonal_labels, least_certain_labels))
            hybrid_features, hybrid_labels = shuffle_rows(hybrid_features, hybrid_labels)

            hybrid_scaler = StandardScaler().fit(hybrid_features)
            scaled_hybrid_features = hybrid_scaler.transform(hybrid_features)
            scaled_test_features = hybrid_scaler.transform(test_features)

            hybrid_clf = wisdm.weka_RF()
            hybrid_clf.set_params(n_estimators=2000, n_jobs=4)
            hybrid_clf.fit(scaled_hybrid_features, hybrid_labels)

            hybrid_score = accuracy_score(test_labels, hybrid_clf.predict(scaled_test_features))
            hybrid_scores.append(hybrid_score)
            print("\t impersonal acc : %.3f" % impersonal_score)
            print("\t personal acc : %.3f" % personal_score)
            print("\t hybrid acc : %.3f" % hybrid_score)
            print("")

            result_row = {"user_id" : user_id,
                          "shuffle" : shuffle_count,
                          "impersonal" : impersonal_score,
                          "personal" : personal_score,
                          "hybrid" : hybrid_score}
            result_rows.append(result_row)
            shuffle_count += 1
    except ValueError as ve:
        if "The least populated class" in ve.args[0]:
            print("\tNot enough labeled data for %s" % user_id)
            ignored_users.append(user_id)
            continue
        else:
            raise ve

finished_predicting = time.time()
print("Finished predicting in %s seconds" % (finished_predicting - finished_training))
print("Users without enough data : %s" % ignored_users)

Training...
Finished Training in 7.001519203186035 seconds
predicting...
User : 194
	 impersonal acc : 0.600
	 personal acc : 0.633
	 hybrid acc : 0.767

	 impersonal acc : 0.700
	 personal acc : 0.667
	 hybrid acc : 0.600

	 impersonal acc : 0.667
	 personal acc : 0.867
	 hybrid acc : 0.733

	 impersonal acc : 0.667
	 personal acc : 0.900
	 hybrid acc : 0.767

User : 998
	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.033

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.000

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.067

	 impersonal acc : 0.000
	 personal acc : 1.000
	 hybrid acc : 0.067

User : 1104
	 impersonal acc : 0.467
	 personal acc : 0.900
	 hybrid acc : 0.533

	 impersonal acc : 0.500
	 personal acc : 0.800
	 hybrid acc : 0.367

	 impersonal acc : 0.500
	 personal acc : 0.800
	 hybrid acc : 0.467

	 impersonal acc : 0.433
	 personal acc : 0.767
	 hybrid acc : 0.400

User : 1117
	 impersonal acc : 0.400
	 personal acc :

In [269]:
results_df_10_no_strat = pd.DataFrame(result_rows)

In [270]:
results_df_10_no_strat.describe()

Unnamed: 0,hybrid,impersonal,personal,shuffle
count,104.0,104.0,104.0,104.0
mean,0.683013,0.361859,0.871474,1.5
std,0.300269,0.291979,0.132962,1.123448
min,0.0,0.0,0.3,0.0
25%,0.458333,0.091667,0.791667,0.75
50%,0.766667,0.35,0.9,1.5
75%,1.0,0.575,1.0,2.25
max,1.0,1.0,1.0,3.0


# Case in which user corrects the model upon noticing error