In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from pycaret.regression import setup, compare_models, pull, finalize_model, save_model, load_model, predict_model
from sklearn.cluster import KMeans

In [None]:

#To test on a new user, the specific model to use must be identified
# To do this, the closest existing user to the new user is identifed via cluster.
# the model for the closest existing user is used to predict mood for the new user
res_imp_feat = pd.read_csv('final_dataset.csv', engine= 'python')
new_user = res_imp_feat[res_imp_feat['id']== 'AS14.33']
res_imp_feat = res_imp_feat[res_imp_feat['id'] != 'AS14.33']

res_clus = res_imp_feat.drop(['Date','mood'], axis =1)
res_clus = res_clus.groupby("id").mean().reset_index()
X = np.array(res_clus.drop(['id'], 1).astype(float))


In [None]:
#Get the optimum number of clusters using elbow method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

In [None]:
#build cluster model and show plot
kmeans = KMeans(n_clusters=4, init='k-means++', max_iter=300, n_init=10, random_state=0)
pred_y = kmeans.fit_predict(X)
plt.scatter(X[:,0], X[:,1])
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red')
plt.show()

In [None]:
#append clsuter ids to the dataset of existing users
res_clus['clusterID'] = kmeans.labels_

In [None]:
#An existing  user with cluster id = 0
res_clus[res_clus['clusterID']== 0].head(1)

In [None]:
#Testing the model on a new user data
from pycaret.regression import load_model, predict_model
all_score_df = []
val_new_user_sim = val[val['id']=='AS14.06']
for i in tqdm(val_new_user_sim['id'].unique()):
    l = load_model('trained_models' + str(i), verbose=False)
    p = predict_model(l, data=new_user)
    p['id'] = i
    all_score_df.append(p)
concat_val = pd.concat(all_score_df, axis=0)
concat_val['id']= 'AS14.33'
concat_val['mood'] = concat_val['mood'].astype(int)
concat_val['Label'] = concat_val['Label'].astype(int)
concat_val = concat_val.rename(columns={"Label": "Predicted Mood", 'mood': 'Actual Mood'})

In [None]:
#print mae 
print(mean_absolute_error( concat_val[ 'Actual Mood'],concat_val[ "Predicted Mood"]))
print(mean_squared_error ( concat_val[ 'Actual Mood'],concat_val["Predicted Mood"]))
print(mean_squared_error ( concat_val[ 'Actual Mood'],concat_val["Predicted Mood"],squared=False))

In [None]:
concat_val.head(1)

In [None]:
import plotly.express as px
for i in concat_val['id'].unique():
    sub_df = concat_val[concat_val['id'] == i]
    fig = px.line(sub_df, x="Date", y=['Actual Mood', 'Predicted Mood'], title='Actual Average Mood Versus Predicted Average Mood-Pycaret Model', template = 'presentation')
    fig.show()

In [None]:
# Plot Predictions Versus Actual Moods 
import plotly.express as px
for i in final_val_df['id'].unique():
    sub_df = final_val_df[final_val_df['id'] == i]
    fig = px.line(sub_df, x="Date", y=['mood', 'Label'], title=i, template = 'plotly_dark')
    fig.show()