In [None]:
# We used code from this tutorial:
# https://github.com/susanli2016/Machine-Learning-with-Python/blob/master/Collaborative%20Filtering%20Model%20with%20TensorFlow.ipynb
# And we also used code from this tutorial:
# https://medium.com/@connectwithghosh/recommender-system-on-the-movielens-using-an-autoencoder-using-tensorflow-in-python-f13d3e8d600d
# Then, we integrated these two tutorials and edited the code from each of them in order to create a recommender that allows us to recommend a top 10 list of movies without needing to retrain for each new user

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE
import json

In [None]:
rating = pd.read_csv('data/ratings.csv', error_bad_lines=False, encoding='utf-8')

In [None]:
movie = pd.read_csv("data/movies.csv", error_bad_lines=False, encoding='utf-8')

In [None]:
movie_rating = pd.merge(rating, movie, on = 'movieId')

In [None]:
cols = ['timestamp']

In [None]:
movie_rating.drop(cols, axis=1, inplace=True)

In [None]:
numrate_movie = movie_rating.groupby("title")["rating"].count().reset_index()

In [None]:
numrate_movie.rename({"rating": "ratecount_movie"}, axis=1, inplace=True)

In [None]:
numrate_movie = numrate_movie.query("ratecount_movie >= 20")

In [None]:
ratings20plus = pd.merge(numrate_movie, movie_rating, on = 'title', how = 'inner')

In [None]:
numrate_user = ratings20plus.groupby("userId")["rating"].count().reset_index()

In [None]:
numrate_user.rename({"rating": "ratecount_user"}, axis=1, inplace=True)

In [None]:
numrate_user = numrate_user.query("ratecount_user >= 20")

In [None]:
ur20plus= pd.merge(ratings20plus, numrate_user, on = "userId", how = "inner")

In [None]:
scaler = MinMaxScaler()
ur20plus['rating'] = ur20plus['rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(ur20plus['rating'].values.reshape(-1,1)))
ur20plus['rating'] = rating_scaled

In [None]:
ur20plus = ur20plus.drop_duplicates(['userId', 'title'])
user_movie_matrix = ur20plus.pivot(index='userId', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)

In [None]:
X_train, X_test = train_test_split(user_movie_matrix, train_size=0.8)

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [None]:
num_input = ur20plus['title'].nunique()
n_nodes_inpl = num_input  
n_nodes_hl1  = 256  
n_nodes_outl = num_input  
hidden_1_layer_vals = {'weights':tf.Variable(tf.random_normal([n_nodes_inpl+1,n_nodes_hl1]))}
output_layer_vals = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1+1,n_nodes_outl]))}

In [None]:
input_layer = tf.placeholder('float', [None, num_input])
input_layer_const = tf.fill( [tf.shape(input_layer)[0], 1] ,1.0  )
input_layer_concat =  tf.concat([input_layer, input_layer_const], 1)
layer_1 = tf.nn.sigmoid(tf.matmul(input_layer_concat,hidden_1_layer_vals['weights']))
layer1_const = tf.fill( [tf.shape(layer_1)[0], 1] ,1.0  )
layer_concat =  tf.concat([layer_1, layer1_const], 1)
output_layer = tf.matmul( layer_concat,output_layer_vals['weights'])
output_true = tf.placeholder('float', [None, num_input])
meansq =    tf.reduce_mean(tf.square(output_layer - output_true))
learn_rate = 0.1
optimizer = tf.train.AdagradOptimizer(learn_rate).minimize(meansq)

In [None]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
batch_size = 100
hm_epochs =200
tot_images = X_train.shape[0]

In [None]:
for epoch in range(hm_epochs):
    epoch_loss = 0
    
    for i in range(int(tot_images/batch_size)):
        epoch_x = X_train[ i*batch_size : (i+1)*batch_size ]
        _, c = sess.run([optimizer, meansq],\
               feed_dict={input_layer: epoch_x, \
               output_true: epoch_x})
        epoch_loss += c
        
    output_train = sess.run(output_layer,\
               feed_dict={input_layer:X_train})
    output_test = sess.run(output_layer,\
                   feed_dict={input_layer:X_test})
        

In [None]:
# POST /get_recommended
req = json.loads(REQUEST)
res = json.loads(req['body'])
movie_name = str(res['movie'])
movie_rating = int(res['rating'])
#retrieve the top 10 recommendations for a new user who rated one movie, i.e., "Aliens (1986)", with a 5/5.
userCol = ur20plus["userId"]
max_value = userCol.max()
new_userId = max_value + 1
ur20plus= pd.merge(ratings20plus, numrate_user, on = "userId", how = "inner")
ur20plus = ur20plus.append(pd.DataFrame([[movie_name,1,new_userId,1,movie_rating,"genre",1]], columns =ur20plus.columns), ignore_index=True)
scaler = MinMaxScaler()
ur20plus['rating'] = ur20plus['rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(ur20plus['rating'].values.reshape(-1,1)))
ur20plus['rating'] = rating_scaled
ur20plus = ur20plus.drop_duplicates(['userId', 'title'])
user_movie_matrix = ur20plus.pivot(index='userId', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)
pred_data = pd.DataFrame()
preds = sess.run(output_layer, feed_dict={input_layer: user_movie_matrix})
pred_data = pred_data.append(pd.DataFrame(preds))
pred_data = pred_data.stack().reset_index(name='rating')
pred_data.columns = ['userId', 'title', 'rating']
users = user_movie_matrix.index.tolist()
movies = user_movie_matrix.columns.tolist()
pred_data['userId'] = pred_data['userId'].map(lambda value: users[value])
pred_data['title'] = pred_data['title'].map(lambda value: movies[value])
keys = ['userId', 'title']
index_1 = pred_data.set_index(keys).index
index_2 = ur20plus.set_index(keys).index
top_ten_ranked = pred_data[~index_1.isin(index_2)]
top_ten_ranked = top_ten_ranked.sort_values(['userId', 'rating'], ascending=[True, False])
top_ten_ranked = top_ten_ranked.groupby('userId').head(10)
recommended_result = list(top_ten_ranked.loc[top_ten_ranked['userId'] == new_userId]['title'].values)
print (recommended_result)