# Running app recommendations

In [43]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.utils import plot_model
from keras.models import Sequential
from keras.layers import Dense, Input, Embedding, Flatten, Concatenate
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate, train_test_split
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import accuracy_score, auc, roc_curve

pd.options.display.max_columns = 1000
import warnings
warnings.filterwarnings("ignore")

In [17]:
df = pd.read_csv('MDF_final.csv')
df = df.drop_duplicates()
df.reset_index(drop=True, inplace=True)

x = df.iloc[:, 3:]
y = df['rating']
input_dim = len(x.columns)
n_users = df.user.nunique()
n_items = df.item.nunique()

for col in 'music_active speaker_on headset_connected connected is_weekend is_holiday'.split():
    x[col] = x[col].astype(int)
    
print(f"rating with value 1: {df[df.rating == 1]['rating'].count() * 100 / len(df)} %")
print(f"number of users: {n_users} \t number of items: {n_items} \t number of rating: {len(x)}")

rating with value 1: 67.09481132724845 %
number of users: 30 	 number of items: 338 	 number of rating: 72639


## Matrix factorization

In [49]:
def MF(latent_dim=50):
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')

    MF_Embedding_User = Embedding(input_dim = n_users+1, output_dim = latent_dim, name = 'user_embedding', input_length=1)
    MF_Embedding_Item = Embedding(input_dim = n_items+1, output_dim = latent_dim, name = 'item_embedding', input_length=1)   
    
    # flatten an embedding vector
    user_latent = Flatten()(MF_Embedding_User(user_input))
    item_latent = Flatten()(MF_Embedding_Item(item_input))
    
    # Element-wise product of user and item embeddings 
    predict_vector = tf.keras.layers.Dot(axes=1)([user_latent, item_latent])
    
    # Final prediction layer
    prediction = Dense(1, activation='sigmoid', name = 'prediction')(predict_vector)
    
    model = keras.Model([user_input, item_input], prediction)
    model.compile(loss='binary_crossentropy')

    return model

In [51]:
mf = MF()
x_train, x_test, y_train, y_test = train_test_split(df[['user', 'item']], df['rating'], test_size=0.20, random_state=42)
mf.fit([x_train.user, x_train.item], y_train, epochs=20, batch_size=64, verbose=False)
y_pred = mf.predict([x_test.user, x_test.item]).flatten()
print(accuracy_score(y_test, y_pred.round()))
fpr, tpr, thresholds = roc_curve(y_test, y_pred.round())
print(auc(fpr, tpr))

0.8705258810572687
0.8954871669001743


## Classifier

In [26]:
def mobile_model(learn_rate):
    model = Sequential()
    model.add(Dense(200, input_dim=input_dim, activation='relu'))
    model.add(Dense(100, input_dim=input_dim, activation='relu'))
    model.add(Dense(50, input_dim=input_dim, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=learn_rate))
    return model

In [28]:
ff_net = KerasClassifier(build_fn=mobile_model, verbose=False)

# define hyperparameters to tune
learn_rate = [0.001]
batch_size = [128]
epochs = [10]

# Make a dictionary of the grid search parameters
param_grid = dict(learn_rate=learn_rate, batch_size=batch_size, epochs=epochs)

# create and fit gridsearch
grid = GridSearchCV(estimator=ff_net, scoring=['accuracy', 'roc_auc'], refit=False, param_grid=param_grid, 
                    cv=KFold(shuffle=True, n_splits=2, random_state=42), verbose=True)
grid_results = grid.fit(x, y)

mean_accuracy = grid_results.cv_results_['mean_test_accuracy']
mean_auc = grid_results.cv_results_['mean_test_roc_auc']
params = grid_results.cv_results_['params']

for accuracy, auc, params in zip(mean_accuracy, mean_auc, params):
    print(f'ACCURACY = {accuracy*100} % \t AUC = {auc} \t PARAMS = {params}') 

Fitting 2 folds for each of 1 candidates, totalling 2 fits
ACCURACY = 90.03427533288301 % 	 AUC = 0.9660312888515299 	 PARAMS = {'batch_size': 128, 'epochs': 10, 'learn_rate': 0.001}
