In [3]:
from wisdm import wisdm
wisdm.set_data(version="1", make_compatible=True)

In [7]:
import numpy as np
import pandas as pd

In [17]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, StratifiedKFold, StratifiedShuffleSplit, ShuffleSplit

In [21]:
user_id = wisdm.user_ids[0]

personal_set = wisdm.get_user_set(user_id)
personal_set = wisdm.remove_all_nan(personal_set)

personal_labels = np.array([t.decode("utf-8") for t in personal_set['class'].as_matrix()])
personal_features = personal_set.as_matrix(columns=[personal_set.columns[1:-1]])

impersonal_set = wisdm.data_df[wisdm.data_df['user'] != user_id]
impersonal_set = wisdm.remove_all_nan(impersonal_set)

impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_set['class'].as_matrix()])
impersonal_features = impersonal_set.as_matrix(columns=[impersonal_set.columns[1:-1]])

impersonal_scaler = StandardScaler().fit(impersonal_features)
scaled_impersonal_features = impersonal_scaler.transform(impersonal_features)

impersonal_clf = wisdm.weka_RF()
impersonal_clf.fit(scaled_impersonal_features, impersonal_labels)

skf = StratifiedKFold(n_splits=10)
skf_generator = skf.split(personal_features, personal_labels)

In [22]:
personal_scores = []
impersonal_scores = []
hybrid_all_scores = []

for active_index, test_index in skf_generator:
    active_features = personal_features[active_index]
    active_labels = personal_labels[active_index]
    
    test_features = personal_features[test_index]
    test_labels = personal_labels[test_index]
    
    # impersonal model
    impersonal_scaled_test_features = impersonal_scaler.transform(test_features)
    impersonal_score = impersonal_clf.score(impersonal_scaled_test_features, test_labels)
    impersonal_scores.append(impersonal_score)
    
    #personal model
    personal_scaler = StandardScaler().fit(active_features)
    personal_scaled_training_features = personal_scaler.transform(active_features)
    personal_scaled_test_features = personal_scaler.transform(test_features)
    personal_clf = wisdm.weka_RF().fit(personal_scaled_training_features, active_labels)
    personal_score = personal_clf.score(personal_scaled_test_features, test_labels)
    personal_scores.append(personal_score)
    
    #personalized impersonal model
    training_features = np.vstack((impersonal_features,active_features))
    training_labels = np.hstack((impersonal_labels,active_labels))
    
    scaler = StandardScaler().fit(training_features)
    scaled_training_features = personal_scaler.transform(training_features)
    scaled_test_features = personal_scaler.transform(test_features)
    clf = wisdm.weka_RF().fit(personal_scaled_training_features, active_labels)
    score = personal_clf.score(personal_scaled_test_features, test_labels)
    hybrid_all_scores.append(score)



In [27]:
np.mean(personal_scores)

0.97953431372549016

In [28]:
np.mean(impersonal_scores)

0.87642507002801118

In [29]:
np.mean(hybrid_all_scores)

0.97953431372549016

# PCA

In [31]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
from plotly.graph_objs import *

In [73]:
from sklearn.decomposition import PCA
import colorlover as cl
from IPython.display import HTML

In [37]:
pca = PCA(n_components=2)

universal_set = wisdm.data_df
universal_set = wisdm.remove_all_nan(universal_set)
universal_labels = np.array([t.decode("utf-8") for t in universal_set['class'].as_matrix()])
universal_features = universal_set.as_matrix(columns=[universal_set.columns[1:-1]])

activity_classes = list(set(universal_labels))

qualitative_colors = cl.scales['5']['qual']['Set1']

activity_colors = {activity_classes[i] : color for i,color in enumerate(qualitative_colors)}

features_pca_space = pca.fit(universal_features).transform(universal_features)

# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
      % str(pca.explained_variance_ratio_))

trace = Scatter(x=[d[0] for d in features_pca_space],
                y=[d[1] for d in features_pca_space],
                mode='markers',
                marker=dict(color=[activity_colors[activity] for activity in universal_labels],
                            opacity=0.5))
data = [trace]
iplot(data,filename="pca")

explained variance ratio (first two components): [ 0.45997851  0.31854678]


## Color By User

In [88]:
def pca_by_user(user_id):
    pca = PCA(n_components=2)
    
    personal_set = wisdm.get_user_set(user_id)
    personal_set = wisdm.remove_all_nan(personal_set)

    personal_labels = np.array([t.decode("utf-8") for t in personal_set['class'].as_matrix()])
    personal_features = personal_set.as_matrix(columns=[personal_set.columns[1:-1]])

    impersonal_set = wisdm.data_df[wisdm.data_df['user'] != user_id]
    impersonal_set = wisdm.remove_all_nan(impersonal_set)

    impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_set['class'].as_matrix()])
    impersonal_features = impersonal_set.as_matrix(columns=[impersonal_set.columns[1:-1]])
    
    all_features = np.vstack((impersonal_features, personal_features))

    activity_classes = list(set(impersonal_labels))

    qualitative_colors = cl.scales['5']['qual']['Set1']
    
    activity_colors = {activity_classes[i] : color for i,color in enumerate(qualitative_colors)}

    features_pca_space = pca.fit(all_features).transform(all_features)

    # Percentage of variance explained for each components
    print('explained variance ratio (first two components): %s'
          % str(pca.explained_variance_ratio_))

    impersonal_trace = Scatter(x=[d[0] for d in features_pca_space[:len(impersonal_features)]],
                            y=[d[1] for d in features_pca_space[:len(impersonal_features)]],
                            mode='markers',
                            marker=dict(color=[activity_colors[activity] for activity in impersonal_labels],
                                opacity=0.2))
    
    personal_trace = Scatter(x=[d[0] for d in features_pca_space[len(impersonal_features):]],
                            y=[d[1] for d in features_pca_space[len(impersonal_features):]],
                            mode='markers',
                            marker=dict(color=[activity_colors[activity] for activity in personal_labels],
                                opacity=1.0,
                                symbol='cross',
                                size=12
                                       ))
    data = [impersonal_trace, personal_trace]
    iplot(data,filename="pca")

In [89]:
pca_by_user(user_id)

explained variance ratio (first two components): [ 0.45997851  0.31854678]


In [52]:
def pca_by_user_and_activity(user_id, activity_label):
    pca = PCA(n_components=2)
    
    personal_set = wisdm.get_user_set(user_id)
    personal_set = wisdm.remove_all_nan(personal_set)
    
    personal_labels = np.array([t.decode("utf-8") for t in personal_set['class'].as_matrix()])
    personal_features = personal_set.as_matrix(columns=[personal_set.columns[1:-1]])
    
    personal_activity_features = [row for ind, row in enumerate(personal_features) if personal_labels[ind] == activity_label]
    
    impersonal_set = wisdm.data_df[wisdm.data_df['user'] != user_id]
    impersonal_set = wisdm.remove_all_nan(impersonal_set)

    impersonal_labels = np.array([t.decode("utf-8") for t in impersonal_set['class'].as_matrix()])
    impersonal_features = impersonal_set.as_matrix(columns=[impersonal_set.columns[1:-1]])
    
    impersonal_activity_features = [row for ind, row in enumerate(impersonal_features) if impersonal_labels[ind] == activity_label]
    
    all_features = np.vstack((impersonal_activity_features, personal_activity_features))

    impersonal_color = cl.scales['5']['qual']['Set1'][1]
    personal_color =cl.scales['5']['qual']['Set1'][0]

    features_pca_space = pca.fit(all_features).transform(all_features)

    # Percentage of variance explained for each components
    print('explained variance ratio (first two components): %s'
          % str(pca.explained_variance_ratio_))

    impersonal_trace = Scatter(x=[d[0] for d in features_pca_space[:len(impersonal_activity_features)]],
                            y=[d[1] for d in features_pca_space[:len(impersonal_activity_features)]],
                            mode='markers',
                            marker=dict(color=[impersonal_color for activity in impersonal_activity_features],
                                opacity=0.5))
    
    personal_trace = Scatter(x=[d[0] for d in features_pca_space[len(impersonal_activity_features):]],
                            y=[d[1] for d in features_pca_space[len(impersonal_activity_features):]],
                            mode='markers',
                            marker=dict(color=[personal_color for activity in personal_activity_features],
                                opacity=1.0))
    data = [impersonal_trace, personal_trace]
    iplot(data)

In [53]:
pca_by_user_and_activity(user_id, 'Stairs')

explained variance ratio (first two components): [ 0.49375633  0.29950086]
