In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from keras.utils import plot_model
from keras.models import Sequential
from keras.layers import Dense, Input, Embedding, Flatten, Concatenate, Lambda
from keras.optimizers import Adam

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate, train_test_split
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.model_selection import ParameterGrid

import rs_models
from kerashypetune import KerasGridSearch

from implicit.evaluation import train_test_split as implicit_train_test_split
from implicit.als import AlternatingLeastSquares
from scipy.sparse import coo_matrix, csr_matrix

pd.options.display.max_columns = 1000
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('Datasets/MDF_social/MDF_with_social_features.csv')
#df = pd.read_csv('Datasets/MDF_social/social_datasets/MDF_user2.csv')
df_mf = pd.read_csv('Datasets/MDF_matrix_factorization.csv')
df = df.drop(columns='time')
df = df.drop_duplicates()
# df = df[df.item != 2]
# df = df.drop(['place_type_food_and_drink', 'place_type_health', 'place_type_home', 'place_type_lodging','place_type_outdoors', 'place_type_point_of_interest_establishment','place_type_public_transport_station', 'place_type_school','place_type_service', 'place_type_store', 'place_type_workplace'], axis = 1)
df = df.reset_index(drop=True)
context_labels = list(df.columns[3:66])
item_labels = list(df.columns[66:92])
user_labels = list(df.columns[92:106])
social_labels = list(df.columns[106:])

In [None]:
n_users = df.user.nunique()
n_items = df.item.nunique()
n_contexts = len(context_labels)

print(f"rating with value 1: {df[df.rating == 1]['rating'].count() * 100 / len(df)} %")
print(f"users: {n_users} \t items: {n_items} \t rating: {len(df)}")
print(f"user_features: {len(user_labels)} \t items_features: {len(item_labels)} \t social_features: {len(social_labels)} \t contexts_features: {n_contexts} \t ")

In [None]:
"""
param_grid = {
    'learn_rate': [0.0001, 0.001, 0.005, 0.01],
    'batch_size': [64, 128, 256],
    'epochs': [5, 10, 15, 20, 30], 
    'layers': [3],
    'neurons': [100]
}
"""

open('grid_search_result.txt', 'w').close()

param_grid = {
    'learn_rate': [0.01],
    'batch_size': [128],
    'epochs': [20], 
    'layers': [3],
    'neurons': [100]
}


train_labels = [item_labels+user_labels, 
                item_labels+user_labels+social_labels, 
                item_labels+user_labels+context_labels,
                item_labels+user_labels+context_labels+social_labels]

for idx, x_labels in enumerate(train_labels):
    print('-'*10 + f" Starting grid search {idx} " + '-'*10)
    x = df[x_labels]     # get only some features
    y = df['rating']     # the value to predict is always the same
    ff_net = KerasClassifier(build_fn=rs_models.mobile_model, verbose=False)
    # create and fit gridsearch
    grid = GridSearchCV(estimator=ff_net, scoring=['accuracy', 'roc_auc'], refit='roc_auc', param_grid=param_grid, 
                        cv=KFold(shuffle=True, n_splits=2, random_state=42), verbose=True)
    grid_results = grid.fit(x, y)

    mean_accuracy = grid_results.cv_results_['mean_test_accuracy']
    mean_auc = grid_results.cv_results_['mean_test_roc_auc']
    params = grid_results.cv_results_['params']

    print(f'best AUC: {grid_results.best_score_} using {grid_results.best_params_}', file=open("grid_search_result.txt", "a"))

print("Done!")