# Running app recommendations

In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.utils import plot_model
from keras.models import Sequential
from keras.layers import Dense, Input, Embedding, Flatten, Concatenate, Lambda
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate, train_test_split
from sklearn.model_selection import GridSearchCV, KFold
import rs_models
from kerashypetune import KerasGridSearch
from operator import add

pd.options.display.max_columns = 1000
import warnings
warnings.filterwarnings("ignore")

## Open Dataset

In [None]:
df = pd.read_csv('MDF_final.csv')
df = df.drop_duplicates()
df.reset_index(drop=True, inplace=True)

item_labels = [i for i in list(df.columns) if i.find("category") == 0] # labels that describe an item
context_labels = list(set(df.iloc[:, 3:]) - set(item_labels)) # takes all the columns after user, item rating and remove item labels

n_users = df.user.nunique()
n_items = df.item.nunique()
n_contexts = len(context_labels)
    
print(f"rating with value 1: {df[df.rating == 1]['rating'].count() * 100 / len(df)} %")
print(f"users: {n_users} \t items: {n_items} \t rating: {len(df)} \t items_features: {len(item_labels)} \t contexts_features: {n_contexts} \t ")

## Grid search

In [None]:
param_grid = {
    'n_users': n_users,
    'n_items': n_items,
    'n_contexts': n_contexts,
    'epochs': [5, 10, 15, 20], 
    'batch_size': [64, 128, 256],
    'learn_rate': [0.0001, 0.001, 0.01]
}
open('grid_search_result.txt', 'w').close()

### NeuMF

In [None]:
print("grid search on NeuMF...")
x_train, x_test, y_train, y_test = train_test_split(df[['user', 'item']], df['rating'], test_size=0.20, random_state=42)
neumf = rs_models.NeuMF
kgs = KerasGridSearch(neumf, param_grid, monitor='val_auc', greater_is_better=True, tuner_verbose=0)
kgs.search([x_train.user, x_train.item], y_train, validation_data=([x_test.user, x_test.item], y_test))
print(f'NeuMF best AUC: {kgs.best_score} using {kgs.best_params}', file=open("grid_search_result.txt", "a"))
print("Done!")

### ECAM NeuMF

In [None]:
print("grid search on ECAM NeuMF...")
x_train, x_test, y_train, y_test = train_test_split(df[['user', 'item'] + context_labels], df['rating'], test_size=0.20, random_state=42)
ecam_neumf = rs_models.ECAM_NeuMF
kgs = KerasGridSearch(ecam_neumf, param_grid, monitor='val_auc', greater_is_better=True, tuner_verbose=0)
kgs.search([x_train.user, x_train.item, x_train[context_labels]], y_train, validation_data=([x_test.user, x_test.item, x_test[context_labels]], y_test))
print(f'ECAM NeuMF best AUC: {kgs.best_score} using {kgs.best_params}', file=open("grid_search_result.txt", "a"))
print("Done!")

### Classifier

In [None]:
print("grid search on feed-forward network...")

x = df[item_labels + context_labels]
y = df['rating']

ff_net = KerasClassifier(build_fn=rs_models.mobile_model, verbose=False)

param_grid = {
    'learn_rate': [0.0005, 0.001, 0.005],
    'batch_size': [64, 128, 256],
    'epochs': [5, 10, 15, 20, 30], 
    'layers': [4],
    'neurons': [200]
}

# create and fit gridsearch
grid = GridSearchCV(estimator=ff_net, scoring=['accuracy', 'roc_auc'], refit='roc_auc', param_grid=param_grid, 
                    cv=KFold(shuffle=True, n_splits=2, random_state=42), verbose=True)
grid_results = grid.fit(x, y)

mean_accuracy = grid_results.cv_results_['mean_test_accuracy']
mean_auc = grid_results.cv_results_['mean_test_roc_auc']
params = grid_results.cv_results_['params']

print(f'FFnet best AUC: {grid_results.best_score_} using {grid_results.best_params_}', file=open("grid_search_result.txt", "a"))
print("Done!")

In [None]:
!jupyter nbconvert --to script apps_recommender.ipynb # convert to python script

## Train final models

In [29]:
def kfold_split(df, x, y, n_splits=6):
    kf = KFold(n_splits=n_splits, random_state=42, shuffle=True)

    for train_index, test_index in kf.split(df[x], df[y]):
        x_train, x_test = df[x].loc[train_index, :], df[x].loc[test_index, :]
        y_train, y_test = df[y].loc[train_index], df[y].loc[test_index]
        yield x_train, y_train, x_test, y_test


def kfold_train(model, param, context_labels=[], n_splits=2):
    x_labels = ['user', 'item'] 
    y_labels = 'rating'
    kfold = kfold_split(df, x_labels+context_labels, y_labels, n_splits) # generator that returns training and test index
    idx = 0

    for x_train, y_train, x_test, y_test in kfold:
        net = model(param)

        input_list = [x_train[e] for e in x_labels] # split user, item input
        input_list = [input_list + [x_train[context_labels]] if context_labels else input_list] # add context if it's available
        net.fit(input_list, y_train, epochs=param['epochs'], batch_size=param['batch_size'], verbose=False)

        input_list = [x_test[e] for e in x_labels] # same split for test values
        input_list = [input_list + [x_test[context_labels]] if context_labels else input_list]
        if idx == 0: # if it is the first fold, create results array
            results = np.array(net.evaluate(input_list, y_test, batch_size=128))
        else: # else add new results to array
            results = np.add(results, net.evaluate(input_list, y_test, batch_size=128))
        idx = idx + 1
    return results/idx


param = {
    'n_users': n_users,
    'n_items': n_items,
    'n_contexts': n_contexts,
    'learn_rate': 0.001,
    'batch_size': 1024,
    'epochs': 1
}   

# kfold_train(rs_models.ECAM_NeuMF, param, context_labels=context_labels, n_splits=4)
std_dev, accuracy, auc = kfold_train(rs_models.NeuMF, param)


