## Deep Learning for Content-Based Filtering

Implement content-based filtering using a neural network to build a recommender system for movies.

## 1 - Packages

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import Model
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from numpy import genfromtxt
import csv
import tabulate

## 2 - Preparing data

In [2]:
def load_data():
    
    item_train = genfromtxt('./data/items.csv', delimiter=',')
    user_train = genfromtxt('./data/users.csv', delimiter=',')
    y_train    = genfromtxt('./data/ratings.csv', delimiter=',')

    with open('./data/item_features.txt', newline='') as f:    
        item_features = list(csv.reader(f))[0]
    with open('./data/user_features.txt', newline='') as f:
        user_features = list(csv.reader(f))[0]

    return(item_train, user_train, y_train, item_features, user_features)

item_train , user_train , y_train, item_features, user_features = load_data()

In [3]:
print(item_features)

['movie id', 'year', 'ave rating', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Horror', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller']


In [4]:
print(user_features)

['user id', 'rating count', 'rating ave', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Horror', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller']


In [5]:
scalerItem = StandardScaler()
scalerItem.fit(item_train)
item_train = scalerItem.transform(item_train)

scalerUser = StandardScaler()
scalerUser.fit(user_train)
user_train = scalerUser.transform(user_train)

scaler = MinMaxScaler((-1, 1))
scaler.fit(y_train.reshape(-1, 1))
y_train = scaler.transform(y_train.reshape(-1, 1))

In [6]:
item_train, item_test = train_test_split(item_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test = train_test_split(y_train, train_size=0.80, shuffle=True, random_state=1)

## 3 - Neural Network for content-based filtering

In [7]:
num_user_features = user_train.shape[1] - 3
num_item_features = item_train.shape[1] - 1
num_outputs = 32

class L2NormalizationLayer(Layer):
    def call(self, inputs):
        return tf.linalg.l2_normalize(inputs, axis=1)

tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([  
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'),
])

item_NN = tf.keras.models.Sequential([   
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'), 
])

input_user = tf.keras.layers.Input(shape=(num_user_features,))
vu = user_NN(input_user)
vu = L2NormalizationLayer()(vu)

input_item = tf.keras.layers.Input(shape=(num_item_features,))
vm = item_NN(input_item)
vm = L2NormalizationLayer()(vm)

output = tf.keras.layers.Dot(axes=1)([vu, vm])

model = Model([input_user, input_item], output)

model.summary()




In [8]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

In [None]:
tf.random.set_seed(1)
model.fit([user_train[:, 3:], item_train[:,1:]], y_train, epochs=30)

Epoch 1/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.1295
Epoch 2/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1173
Epoch 3/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1151
Epoch 4/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1138
Epoch 5/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1128
Epoch 6/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1116
Epoch 7/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.1107
Epoch 8/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1100
Epoch 9/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1092
Epoch 10/30
[1m1455/1455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

## 4 - Predictions

**Predictions for a new user**

In [None]:
new_user_id = 5000
new_rating_ave = 1.0
new_action = 1.0
new_adventure = 1
new_animation = 1
new_childrens = 1
new_comedy = 5
new_crime = 1
new_documentary = 1
new_drama = 1
new_fantasy = 1
new_horror = 1
new_mystery = 1
new_romance = 5
new_scifi = 5
new_thriller = 1
new_rating_count = 3

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
                      new_action, new_adventure, new_animation, new_childrens,
                      new_comedy, new_crime, new_documentary,
                      new_drama, new_fantasy, new_horror, new_mystery,
                      new_romance, new_scifi, new_thriller]])

In [None]:
item_vecs = genfromtxt('./data/item_vecs.csv', delimiter=',')

In [None]:
def gen_user_vecs(user_vec, num_items):
    user_vecs = np.tile(user_vec, (num_items, 1))
    return(user_vecs)

In [None]:
user_vecs = gen_user_vecs(user_vec,len(item_vecs))

In [None]:
def predict(user_vecs,item_vecs,scalerItem,scalerUser,scaler,model):

    scaled_user_vecs = scalerUser.transform(user_vecs)
    scaled_item_vecs = scalerItem.transform(item_vecs)

    y_p = model.predict([scaled_user_vecs[:,3:], scaled_item_vecs[:,1:]])

    y_pu = scaler.inverse_transform(y_p)

    if np.any(y_pu < 0) : 
        print("Error, expected all positive predictions")

    return y_pu

In [None]:
p = predict(user_vecs,item_vecs,scalerItem,scalerUser,scaler,model)

In [None]:
p