# CARS recommender system
Implementation of the deep NN model described in the paper "Context-Aware Recommendations Based on Deep
Learning Frameworks".
https://dl.acm.org/doi/10.1145/3386243

Datasets:
- Frappe
- Yelp


## Import

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # to split dataset in two parts
from sklearn.model_selection import KFold # to split dataset using  k-fold cross validation
from sklearn.metrics import * # evaluation metrics
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, BatchNormalization, Dropout, Input, Embedding, Flatten, Concatenate
from keras.optimizers import Adam
import matplotlib
import matplotlib.pyplot as plt # for creating chart
import requests # for downloading the dataset
from collections import deque # queue data structure
from scipy.cluster.hierarchy import * # for hierarchical clustering
from sklearn.cluster import AgglomerativeClustering
import itertools
from sklearn.decomposition import PCA
from surprise import Reader, Dataset, SVD
from surprise.model_selection.validation import cross_validate
import os.path



## Some functions



In [None]:
# plot loss based on history of model.fit, ymin and ymax are the minimum and maximum values of the y axis
def plot_loss(history, ymin=0, ymax=1):
  plt.plot(history.history['loss'], label='loss')
  plt.ylim([ymin, ymax])
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.grid(True)

# plot chart of true values on predictions
def plot_predictions(test_y, pred_y):
  a = plt.axes(aspect='equal')
  plt.scatter(test_y, pred_y)
  plt.xlabel('True Values')
  plt.ylabel('Predictions')
  lims = [0, 5]
  plt.xlim(lims)
  plt.ylim(lims)
  _ = plt.plot(lims, lims)

def sigmoid(x):
   return 1 / ( 1 + np.exp(-x))

# k-fold cross validation object
kf = KFold(n_splits=2, random_state=42, shuffle=True)

def kfold_train(model, epochs, batch_size, verbose, df, x_labels, y_label, 
                kf, using_context=False, context_labels=None):
    '''
    Train a model using K-fold CV

    Parameters:
        model: the model to be trained
        epochs: training epochs for each fold
        batch_size: batch size for each fold
        verbose: show training batch and loss
        df: the dataframe on which the model will be trained
        x_labels: features labels
        y_label: desired output labels
        using_context: if the model incorporates context
        context_labels: contextual features labels
        kf: sklearn kfold object
    '''
    idx = 0
    rmse = np.empty(kf.n_splits)
    mae = np.empty(kf.n_splits)

    for train_index, test_index in kf.split(df):
        print(f'Training on fold {idx}...')
        train_x = df.loc[train_index, x_labels]  # get a dataset subset with df.loc[rows, columns]
        train_y = df.loc[train_index, y_label]
        if using_context: # if the model supports contextual features
            train_context = df.loc[train_index, context_labels]
            model.fit([train_x.user, train_x.item, train_context], train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
        else:
            model.fit([train_x.user, train_x.item], train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)

        print(f'Testing on fold {idx}...')
        test_x = df.loc[test_index, x_labels]
        test_y = df.loc[test_index, y_label]
        if using_context:
            test_context = df.loc[test_index, context_labels]
            pred_y = model.predict([test_x.user, test_x.item, test_context]).flatten()
        else:
            pred_y = model.predict([test_x.user, test_x.item]).flatten()

        rmse[idx] = mean_squared_error(test_y, pred_y, squared = False)
        mae[idx] = mean_absolute_error(test_y, pred_y)
        print(f'RMSE = {rmse[idx]}    MAE = {mae[idx]}')

        idx = idx + 1
    
    return np.mean(rmse), np.mean(mae)

## Load frappe dataset

In [None]:
df = pd.read_csv('final datasets/frappe_final.csv', sep=",")

In [None]:
x_labels = list(df.columns[0:2])
y_label = df.columns[2]
context_labels = list(df.columns[3:])

In [None]:
# count number of unique users and items
n_users, n_items = len(df.user.unique()), len(df.item.unique())
n_context = len(context_labels)

f'Number of users: {n_users}      Number of apps: {n_items}     Number of context features: {n_context}'

## Load Yelp dataset

In [None]:
df = pd.read_csv('final datasets/yelp_final.csv', sep=",")
df = df.sample(n=100000)
df[df.columns[12:34]] = df[df.columns[12:34]].astype('uint8') # convert one hot encoded columns to uint8
df = df.reset_index(drop=True)

In [None]:
x_labels = list(df.columns[0:2])
y_label = df.columns[2]
context_labels = list(df.columns[3:])

In [None]:
# count number of unique users and items
n_users, n_items = len(df.user.unique()), len(df.item.unique())
n_context = len(context_labels)

# RISCALARE INDICI USER E ITEM ALTRIMENTI NON VA

f'Number of users: {n_users}      Number of business: {n_items}     Number of context features: {n_context}'

### Latent context extraction


#### With autoencoder

In [None]:
train_context_AE, test_context_AE = train_test_split(df.loc[:,context_labels], test_size=0.2)

In [None]:
# size of the encoded representation
n_latent_context = 11

# input layer
input = Input(shape=(n_context,))
# the encoded representation of the input
encoded = Dense(n_latent_context, activation='sigmoid')(input)
# the reconstruction of the input
decoded = Dense(n_context, activation='sigmoid')(encoded)

# Autoencoder model
autoencoder = keras.Model(input, decoded)

# Only encoder model
encoder = keras.Model(input, encoded)

# Only Decoder model
encoded_input = keras.Input(shape=(n_latent_context,))   # takes as input the encoded context
decoder_layer = autoencoder.layers[-1]   # Retrieve the last layer of the autoencoder model
decoder = keras.Model(encoded_input, decoder_layer(encoded_input))

autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
# train the autoencoder on the context
history = autoencoder.fit(train_context_AE, train_context_AE,
                          epochs=50,
                          verbose=False,
                          batch_size=128,
                          validation_data=(test_context_AE, test_context_AE))

In [None]:
plot_loss(history, ymin=0, ymax=0.5)

In [None]:
# weight matrix of neurons that connect input layers to hidden layer
# get weight returns a list of weights and biases, by taking weight[0] you extract only the weights
weight_matrix = autoencoder.layers[1].get_weights()[0]
weight_matrix = np.asarray(weight_matrix)

def get_latent_context_AE():
    latent_context = np.empty(shape=(df.shape[0], n_latent_context))
    latent_context_labels = [f"latent_{x}" for x in range(n_latent_context)]
    
    # multiply each context sample for the weight matrix
    for idx, s in enumerate(df.loc[:, context_labels].values):
        latent_context[idx] = s @ weight_matrix
    
    # apply activation function
    latent_context = sigmoid(latent_context)
    df_latent_context = pd.DataFrame(latent_context, columns=latent_context_labels)

    return df_latent_context, latent_context_labels

#### With PCA

In [None]:
def get_latent_context_PCA(n_latent_context):
    latent_context_labels = [f"latent_{x}" for x in range(n_latent_context)]
    pca = PCA(n_components=n_latent_context)
    pca.fit(df.loc[:,context_labels])
    latent_context = pca.transform(df.loc[:,context_labels])  
    df_latent_context = pd.DataFrame(latent_context, columns=latent_context_labels)
    return df_latent_context, latent_context_labels

#### Run selected method

In [None]:
method = 'AE'
if method == 'PCA': # latent context with PCA
    df_latent_context, latent_context_labels = get_latent_context_PCA(n_latent_context)
    df = pd.concat([df, df_latent_context], axis=1)
elif method == 'AE': # latent context with AE
    df_latent_context, latent_context_labels = get_latent_context_AE()
    df = pd.concat([df, df_latent_context], axis=1)
df

### Hierarchical context extraction

In [None]:
def is_leaf(node):
    return node.left is None and node.right is None

# Recursive function to find paths from root node to every leaf node of a binary tree
def root_leaf_paths(node, path, hierarchy):

    if node is None:
        return
 
    path.append(node.id)
 
    if is_leaf(node):
        hierarchy.append(list(path)) # append a complete path to the list of all paths
 
    # Call the functions on left and right subtrees
    root_leaf_paths(node.left, path, hierarchy)
    root_leaf_paths(node.right, path, hierarchy)
 
    # remove current node after left and right subtrees are done
    path.pop()

def hierarchical_clustering(df):
    linked = linkage(df, 'ward')  # linkage matrix
    rootnode, nodelist = to_tree(linked, rd=True) # tree representing the hierarchical clustering
    path = deque() # a path from the root node to a leaf
    hierarchy = []
    root_leaf_paths(rootnode, path, hierarchy)
    longest_path = len(max(hierarchy, key=len)) # find longest path from root to leaf
    hierarchy = [x + [x[-1]]*(longest_path - len(x)) for x in hierarchy] # make path of equal size
    hierarchy.sort(key=lambda x: x[-1]) # sort the list by the last element (datapoints id)
    return hierarchy

In [None]:
# hier_context = hierarchical_clustering(df.loc[:30, latent_context_labels])
# hier_context

## Models

In [None]:
# Dictionary that contain evaluation metrics for each model
models_eval_metrics = {}

# embedding vectors length
n_latent_factors_user = n_users // 1000
n_latent_factors_item = n_items // 1000

# latent factors for matrix factorization
n_latent_factors_mf = n_items // 1000

f'latent factor user: {n_latent_factors_user}  latent factor item: {n_latent_factors_item}  latent factor MF: {n_latent_factors_mf}'

### Matrix factorization
The famous SVD algorithm, as popularized by Simon Funk during the Netflix Prize

In [None]:
reader = Reader()
data = Dataset.load_from_df(df[x_labels + [y_label]], reader) # load df in surprise
svd = SVD() # MF model
result = cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=2, verbose=True) # get result
rmse = np.mean(result['test_rmse'])
mae = np.mean(result['test_mae'])
models_eval_metrics['MF'] = [rmse, mae]

### NCF
Multi-layer perceptron without context features

In [None]:
def base_ncf(n_users, n_items, n_latent_factors_user, n_latent_factors_item):
    # inputs
    item_input = Input(shape=[1],name='item')
    user_input = Input(shape=[1],name='user')

    # Item embedding
    item_embedding_mlp = Embedding(n_items + 1, n_latent_factors_item, name='item_embedding')(item_input)
    item_vec_mlp = Flatten(name='flatten_item')(item_embedding_mlp)

    # User embedding
    user_embedding_mlp = Embedding(n_users + 1, n_latent_factors_user,name='user_embedding')(user_input)
    user_vec_mlp = Flatten(name='flatten_user')(user_embedding_mlp)

    # Concat user embedding,item embeddings and context vector
    concat = Concatenate(name='user_item')([item_vec_mlp, user_vec_mlp])

    # dense layers
    dense = Dense(8, name='fully_connected_1')(concat)
    dense_2 = Dense(4, name='fully_connected_2')(dense)
    dense_3 = Dense(2, name='fully_connected_3')(dense_2)

    # Output
    pred_mlp = Dense(1, activation='relu', name='Activation')(dense_3)

    # make and build the model
    return keras.Model([user_input, item_input], pred_mlp)

In [None]:
ncf = base_ncf(n_users, n_items, n_latent_factors_user, n_latent_factors_item)
opt = keras.optimizers.Adam(lr = 0.0005)
ncf.compile(optimizer = opt,loss= 'mean_absolute_error', metrics=['mae', 'mse'])

# tf.keras.utils.plot_model(ncf)

In [None]:
rmse, mae = kfold_train(ncf, 15, 128, False, df, x_labels, y_label, kf, using_context=False)
models_eval_metrics['NCF'] = [rmse, mae]
f'k-fold RMSE = {rmse}     k-fold MAE = {mae}'

### NeuMF
Multi-layer perceptron + dot product without context features

In [None]:
def base_neumf(n_users, n_items, n_latent_factors_user, n_latent_factors_item, n_latent_factors_mf):
    # inputs
    item_input = Input(shape=[1],name='item')
    user_input = Input(shape=[1],name='user')

    # item embedding MF
    item_embedding_mf = Embedding(n_items + 1, n_latent_factors_mf, name='item_embedding_MF')(item_input)
    item_vec_mf = Flatten(name='flatten_item_MF')(item_embedding_mf)

    # User embedding MF
    user_embedding_mf = Embedding(n_users + 1, n_latent_factors_mf,name='user_embedding_MF')(user_input)
    user_vec_mf = Flatten(name='flatten_user_MF')(user_embedding_mf)

    # Dot product MF
    dot = tf.keras.layers.Dot(axes=1)([user_vec_mf, item_vec_mf])

    # Item embedding MLP
    item_embedding_mlp = Embedding(n_items + 1, n_latent_factors_item, name='item_embedding_MLP')(item_input)
    item_vec_mlp = Flatten(name='flatten_item_MLP')(item_embedding_mlp)

    # User embedding MLP
    user_embedding_mlp = Embedding(n_users + 1, n_latent_factors_user,name='user_embedding_MLP')(user_input)
    user_vec_mlp = Flatten(name='flatten_user_MLP')(user_embedding_mlp)

    # Concat user embedding,item embeddings and context vector
    concat = Concatenate(name='user_item_context_MLP')([item_vec_mlp, user_vec_mlp])

    # dense layers
    dense = Dense(8, name='fully_connected_1')(concat)
    dense_2 = Dense(4, name='fully_connected_2')(dense)
    dense_3 = Dense(2, name='fully_connected_3')(dense_2)

    # concat MF and MLP
    concat_mf_mlp = Concatenate(name='MF_MLP')([dense_3, dot])

    # Output
    output = Dense(1, activation='relu',name='Activation')(concat_mf_mlp)

    # make and build the model
    return keras.Model([user_input, item_input], output)

In [None]:
neumf = base_neumf(n_users, n_items, n_latent_factors_user, n_latent_factors_item, n_latent_factors_mf)
opt = keras.optimizers.Adam(lr = 0.0005)
neumf.compile(optimizer = opt,loss= 'mean_absolute_error', metrics=['mae', 'mse'])

In [None]:
rmse, mae = kfold_train(neumf, 15, 128, False, df, x_labels, y_label, kf, using_context=False)
models_eval_metrics['NEUMF'] = [rmse, mae]
f'k-fold RMSE = {rmse}     k-fold MAE = {mae}'

### ECAM NCF
Multi-layer perceptron with explicit context

In [None]:
def ncf(n_users, n_items, n_context, n_latent_factors_user, n_latent_factors_item):
    # inputs
    item_input = Input(shape=[1],name='item')
    user_input = Input(shape=[1],name='user')
    context_input = Input(shape=(n_context, ), name='context')

    # Item embedding
    item_embedding_mlp = Embedding(n_items + 1, n_latent_factors_item, name='item_embedding')(item_input)
    item_vec_mlp = Flatten(name='flatten_item')(item_embedding_mlp)

    # User embedding
    user_embedding_mlp = Embedding(n_users + 1, n_latent_factors_user,name='user_embedding')(user_input)
    user_vec_mlp = Flatten(name='flatten_user')(user_embedding_mlp)

    # Concat user embedding,item embeddings and context vector
    concat = Concatenate(name='user_item')([item_vec_mlp, user_vec_mlp, context_input])

    # dense layers
    dense = Dense(8, name='fully_connected_1')(concat)
    dense_2 = Dense(4, name='fully_connected_2')(dense)
    dense_3 = Dense(2, name='fully_connected_3')(dense_2)

    # Output
    pred_mlp = Dense(1, activation='relu', name='Activation')(dense_3)

    # make and build the model
    return keras.Model([user_input, item_input, context_input], pred_mlp)

In [None]:
ecam_ncf = ncf(n_users, n_items, n_context, n_latent_factors_user, n_latent_factors_item)
opt = keras.optimizers.Adam(lr = 0.0005)
ecam_ncf.compile(optimizer = opt,loss= 'mean_absolute_error', metrics=['mae', 'mse'])

#ecam_ncf.summary()
#tf.keras.utils.plot_model(ecam_ncf)

In [None]:
rmse, mae = kfold_train(ecam_ncf, 15, 128, False, df, x_labels, y_label, kf, using_context=True, context_labels=context_labels)
models_eval_metrics['ECAM NCF'] = [rmse, mae]
f'k-fold RMSE = {rmse}     k-fold MAE = {mae}'

### ECAM NeuMF
Multi-layer perceptron + dot product with explicit context

In [None]:
def neumf(n_users, n_items, n_context, n_latent_factors_user, n_latent_factors_item, n_latent_factors_mf):
    # inputs
    item_input = Input(shape=[1],name='item')
    user_input = Input(shape=[1],name='user')
    context_input = Input(shape=(n_context, ), name='context')

    # item embedding MF
    item_embedding_mf = Embedding(n_items + 1, n_latent_factors_mf, name='item_embedding_MF')(item_input)
    item_vec_mf = Flatten(name='flatten_item_MF')(item_embedding_mf)

    # User embedding MF
    user_embedding_mf = Embedding(n_users + 1, n_latent_factors_mf,name='user_embedding_MF')(user_input)
    user_vec_mf = Flatten(name='flatten_user_MF')(user_embedding_mf)

    # Dot product MF
    dot = tf.keras.layers.Dot(axes=1)([user_vec_mf, item_vec_mf])

    # Item embedding MLP
    item_embedding_mlp = Embedding(n_items + 1, n_latent_factors_item, name='item_embedding_MLP')(item_input)
    item_vec_mlp = Flatten(name='flatten_item_MLP')(item_embedding_mlp)

    # User embedding MLP
    user_embedding_mlp = Embedding(n_users + 1, n_latent_factors_user,name='user_embedding_MLP')(user_input)
    user_vec_mlp = Flatten(name='flatten_user_MLP')(user_embedding_mlp)

    # Concat user embedding,item embeddings and context vector
    concat = Concatenate(name='user_item_context_MLP')([item_vec_mlp, user_vec_mlp, context_input])

    # dense layers
    dense = Dense(8, name='fully_connected_1')(concat)
    dense_2 = Dense(4, name='fully_connected_2')(dense)
    dense_3 = Dense(2, name='fully_connected_3')(dense_2)

    # concat MF and MLP
    concat_mf_mlp = Concatenate(name='MF_MLP')([dense_3, dot])

    # Output
    output = Dense(1, activation='relu',name='Activation')(concat_mf_mlp)

    # make and build the model
    return keras.Model([user_input, item_input, context_input], output)

In [None]:
ecam_neumf = neumf(n_users, n_items, n_context, n_latent_factors_user, n_latent_factors_item, n_latent_factors_mf)
opt = keras.optimizers.Adam(lr = 0.0005)
ecam_neumf.compile(optimizer = opt,loss= 'mean_absolute_error', metrics=['mae', 'mse'])

#ecam_neumf.summary()
#tf.keras.utils.plot_model(ecam_neumf)

In [None]:
rmse, mae = kfold_train(ecam_neumf, 15, 128, False, df, x_labels, y_label, kf, using_context=True, context_labels=context_labels)
models_eval_metrics['ECAM NEUMF'] = [rmse, mae]
f'k-fold RMSE = {rmse}     k-fold MAE = {mae}'

### UCAM NCF
Multi-layer perceptron with latent context

In [None]:
ucam_ncf = ncf(n_users, n_items, n_latent_context, n_latent_factors_user, n_latent_factors_item)
opt = keras.optimizers.Adam(lr = 0.005)
ucam_ncf.compile(optimizer = opt,loss= 'mean_absolute_error', metrics=['mae', 'mse'])

#ucam_ncf.summary()
#tf.keras.utils.plot_model(ucam_ncf)

In [None]:
rmse, mae = kfold_train(ucam_ncf, 15, 128, False, df, x_labels, y_label, kf, 
                        using_context=True, context_labels=latent_context_labels)
models_eval_metrics['UCAM NCF'] = [rmse, mae]
f'k-fold RMSE = {rmse}     k-fold MAE = {mae}'

### UCAM NeuMF
Multi-layer perceptron + dot product with latent context

In [None]:
ucam_neumf = neumf(n_users, n_items, n_latent_context, n_latent_factors_user, n_latent_factors_item, n_latent_factors_mf)
opt = keras.optimizers.Adam(lr = 0.0005)
ucam_neumf.compile(optimizer = opt,loss= 'mean_absolute_error', metrics=['mae', 'mse'])

In [None]:
rmse, mae = kfold_train(ucam_neumf, 15, 128, False, df, x_labels, y_label, kf, 
                        using_context=True, context_labels=latent_context_labels)
models_eval_metrics['UCAM NEUMF'] = [rmse, mae]
f'k-fold RMSE = {rmse}     k-fold MAE = {mae}'

## Performance comparison

In [None]:
n_models = len(models_eval_metrics) # number of different models
models_name = [x[0] for x in models_eval_metrics.items()] 
rmse = [x[0] for x in models_eval_metrics.values()]
mae = [x[1] for x in models_eval_metrics.values()]

index = np.arange(n_models)
bar_width = 0.30
plt.figure(figsize=(10,6))

# MAE bar
rect1 = plt.bar(index + bar_width, mae, bar_width,
color='b',
label='MAE')

# RMSE bar
rect2 = plt.bar(index, rmse, bar_width,
color='#ff7b00',
label='RMSE')

plt.style.use('seaborn-ticks') # readable chart on dark editor
plt.xlabel('Models')
plt.ylabel('Scores')
plt.title('Prediction results')
plt.xticks(index + bar_width/2, models_name) # labels position
plt.legend()
plt.grid(True)
plt.savefig('prediction_results.png')
plt.show()

for name, rmse, mae in zip(models_name, rmse, mae):
    print(f"Name: {name}      \t      RMSE: {rmse}      \t      MAE: {mae}")