In [37]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Input, Embedding, Flatten, Concatenate
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate, train_test_split
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import accuracy_score

pd.options.display.max_columns = 1000
import warnings
warnings.filterwarnings("ignore")

In [33]:
df = pd.read_csv('MDF_final.csv')

x = df.iloc[:, 3:]
y = df['rating']
input_dim = len(x.columns)
n_users = df.user.nunique()
n_items = df.item.nunique()

for col in 'music_active speaker_on headset_connected connected is_weekend is_holiday'.split():
    x[col] = x[col].astype(int)
    
print(f"rating with value 1: {df[df.rating == 1]['rating'].count() * 100 / len(df)} %")
print(f"number of users: {n_users} \t number of items: {n_items} \t number of rating: {len(x)}")

rating with value 1: 68.37503224142378 %
number of users: 30 	 number of items: 338 	 number of rating: 77540


In [35]:
def mobile_model(learn_rate):
    model = Sequential()
    model.add(Dense(200, input_dim=input_dim, activation='relu'))
    model.add(Dense(100, input_dim=input_dim, activation='relu'))
    model.add(Dense(50, input_dim=input_dim, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=learn_rate))
    return model

In [52]:
ff_net = KerasClassifier(build_fn=mobile_model, verbose=False)

# define hyperparameters to tune
learn_rate = [0.001]
batch_size = [128]
epochs = [10]

# Make a dictionary of the grid search parameters
param_grid = dict(learn_rate=learn_rate, batch_size=batch_size, epochs=epochs)

# create and fit gridsearch
grid = GridSearchCV(estimator=ff_net, scoring=['accuracy', 'roc_auc'], refit=False, param_grid=param_grid, cv=KFold(shuffle=True, n_splits=2, random_state=42), verbose=True)
grid_results = grid.fit(x, y)

mean_accuracy = grid_results.cv_results_['mean_test_accuracy']
mean_auc = grid_results.cv_results_['mean_test_roc_auc']
params = grid_results.cv_results_['params']

for accuracy, auc, params in zip(mean_accuracy, mean_auc, params):
    print(f'ACCURACY = {accuracy} \t AUC = {auc} \t PARAMS = {params}') 

Fitting 2 folds for each of 1 candidates, totalling 2 fits
ACCURACY = 0.902656693319577 	 AUC = 0.9665578599607316 	 PARAMS = {'batch_size': 128, 'epochs': 10, 'learn_rate': 0.001}
