# CARS recommender system
Implementation of the deep NN model described in the paper "Context-Aware Recommendations Based on Deep
Learning Frameworks".
https://dl.acm.org/doi/10.1145/3386243

Datasets:
- frappe


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # to split dataset
from sklearn.metrics import * # evaluation metrics
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.layers import Dropout
from keras.layers import Input
from keras.layers import Embedding
from keras.layers import Flatten
from keras.layers import Concatenate
from keras.optimizers import Adam
import matplotlib.pyplot as plt # for creating visualisations

In [None]:
df = pd.read_csv('frappe/frappe.csv', sep="\t")
df

## Dataset preprocessing

In [None]:
# log transformation on the raw frequency numbers represents the application usage
df['cnt'] = df['cnt'].apply(np.log10)
f"frequency range is {df['cnt'][df['cnt'] == df['cnt'].min()].values[0]} to {df['cnt'][df['cnt'] == df['cnt'].max()].values[0]}"

# delete columns that are not needed
del df['homework']
del df['cost']

In [None]:
# delete rows where city and weather are unknown
df = df[df.city != 0]
df = df[df.weather != 'unknown']

df = df.reset_index(drop=True)

# make user and items id start from 0
df.user = pd.factorize(df.user)[0]
df.item = pd.factorize(df.item)[0]

df

In [None]:
# change column type to category and convert categorical data to integers
context_labels = ['daytime', 'weekday', 'isweekend', 'weather', 'country', 'city']
for col in context_labels:
    df[col] = df[col].astype('category').cat.codes.values

In [None]:
# train and test datasets
train_x, test_x = train_test_split(df, test_size=0.2)

# train and test context features
train_context = pd.concat([train_x.pop(x) for x in context_labels], axis=1)
test_context = pd.concat([test_x.pop(x) for x in context_labels], axis=1)

# train and test labels
train_y = train_x.pop('cnt')
test_y = test_x.pop('cnt')

f"train_x: {train_x.shape}   train_y: {train_y.shape}   train_context: {train_context.shape}    test_x: {test_x.shape}   test_y: {test_y.shape}     test_context:   {test_context.shape}"    

In [None]:
def min_max_norm(df):
    return (df - df.min()) / (df.max() - df.min())

train_context = min_max_norm(train_context)
test_context = min_max_norm(test_context)
train_context

In [None]:
n_latent_factors = 5

# count unique user and item
n_users, n_items = len(df.user.unique()), len(df.item.unique())
n_context = len(context_labels)
f'Number of users: {n_users}      Number of apps: {n_items}     Number of context features: {n_context}'

## ECAM NCF

In [None]:
def ecam_ncf():
    # inputs
    item_input = Input(shape=[1],name='item')
    user_input = Input(shape=[1],name='user')
    context_input = Input(shape=(n_context, ), name='context')

    # Item embedding
    item_embedding_mlp = Embedding(n_items + 1, n_latent_factors, name='item_embedding')(item_input)
    item_vec_mlp = Flatten(name='flatten_item')(item_embedding_mlp)
    item_vec_mlp = keras.layers.Dropout(0.2)(item_vec_mlp)

    # User embedding
    user_vec_mlp = Flatten(name='flatten_user')(keras.layers.Embedding(n_users + 1, n_latent_factors,name='user_embedding')(user_input))
    user_vec_mlp = keras.layers.Dropout(0.2)(user_vec_mlp)

    # Concat user embedding,item embeddings and context vector
    concat = Concatenate(name='user_item')([item_vec_mlp, user_vec_mlp, context_input])

    # dense layers
    dense = Dense(8, name='fully_connected_1')(concat)
    batch_1 = BatchNormalization()(dense)
    dense_2 = Dense(4, name='fully_connected_2')(batch_1)
    batch_2 = BatchNormalization()(dense_2)
    dense_3 = Dense(2, name='fully_connected_3')(batch_2)

    # Output
    pred_mlp = Dense(1, activation='relu',name='Activation')(dense_3)

    # make and build the model
    return keras.Model([user_input, item_input, context_input], pred_mlp)

In [None]:
ecam_ncf = ecam_ncf()
opt = keras.optimizers.Adam(lr = 0.005)
ecam_ncf.compile(optimizer = opt,loss= 'mean_absolute_error', metrics=['mae', 'mse'])

ecam_ncf.summary()
tf.keras.utils.plot_model(ecam_ncf)

In [None]:
history = ecam_ncf.fit([train_x.user, train_x.item, train_context], train_y, epochs=15, batch_size=64, verbose=1)

In [None]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.ylim([0, 1])
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.grid(True)
plot_loss(history)

In [None]:
pred_y = ecam_ncf.predict([test_x.user, test_x.item, test_context]).flatten()

In [None]:
a = plt.axes(aspect='equal')
plt.scatter(test_y, pred_y)
plt.xlabel('True Values')
plt.ylabel('Predictions')
lims = [0, 5]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
rmse = mean_squared_error(test_y, pred_y, squared = False)
mse = mean_squared_error(test_y, pred_y, squared = True)
mae = mean_absolute_error(test_y, pred_y)
f'RMSE = {rmse}    MAE = {mae}    MSE = {mse}'