In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import os
PATH = './'
os.environ['CUDA_VISIBLE_DEVICES'] = "-1"

In [2]:
# filter: drop 0 > rating > 5 data and data without user rated
ratings = pd.read_csv(PATH + 'ratings.csv')
ratings['userId'] = ratings['userId'].fillna('')
ratings = ratings.drop(ratings[ratings.rating > 5].index)
ratings = ratings.drop(ratings[ratings.rating < 0].index)
ratings = ratings.drop(ratings[ratings.userId == ''].index)
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1.0,1.0,4.0,964982703
1,1.0,3.0,4.0,964981247
2,1.0,6.0,4.0,964982224
3,1.0,47.0,5.0,964983815
4,1.0,50.0,5.0,964982931
...,...,...,...,...
100831,610.0,166534.0,4.0,1493848402
100832,610.0,168248.0,5.0,1493850091
100833,610.0,168250.0,5.0,1494273047
100834,610.0,168252.0,5.0,1493846352


In [3]:
movies = pd.read_csv(PATH + 'movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [116]:
user_enc = LabelEncoder()
ratings['user'] = user_enc.fit_transform(ratings['userId'].values)
n_users = ratings['user'].nunique()
item_enc = LabelEncoder()
ratings['movie'] = item_enc.fit_transform(ratings['movieId'].values)
n_movies = ratings['movie'].nunique()
ratings['rating'] = ratings['rating'].values.astype(np.float32)
min_rating = min(ratings['rating'])
max_rating = max(ratings['rating'])
n_users, n_movies, min_rating, max_rating

(590, 9696, 0.5, 5.0)

In [7]:
# split training and testing: 0.95 and 0.05
X = ratings[['user', 'movie']].values
y = ratings['rating'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=69)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((94080, 2), (4952, 2), (94080,), (4952,))

In [8]:
n_factors = 50
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

# Collaborative Filtering model training

In [None]:
from tensorflow import keras 

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Reshape, Dot
from keras.layers.embeddings import Embedding
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

In [10]:
from keras.layers import Add, Activation, Lambda
class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                      embeddings_regularizer=l2(1e-6))(x)
        x = Reshape((self.n_factors,))(x)
        return x

In [26]:
from keras.layers import Concatenate, Dense, Dropout
def RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating):
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    
    x = Concatenate()([u, m])
    x = Dropout(0.05)(x)
    
    x = Dense(10, kernel_initializer='he_normal')(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)
    
    x = Dense(1, kernel_initializer='he_normal')(x)
    x = Activation('sigmoid')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)
    return model

In [27]:
model = RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding_2 (Embedding)        (None, 1, 50)        29500       ['input_3[0][0]']                
                                                                                                  
 embedding_3 (Embedding)        (None, 1, 50)        484800      ['input_4[0][0]']                
                                                                                            

In [28]:
history = model.fit(x=X_train_array, y=y_train, batch_size=128, epochs=10,
                    verbose=1, validation_data=(X_test_array, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
# deployment as 
import tensorflow
tensorflow.saved_model.save(model, "./model/")

INFO:tensorflow:Assets written to: ./model/assets


In [109]:
user_id = 100
ct_based_movie_recommendation = run_content_based_recommendation(user_id)

In [113]:
ct_based_movie_recommendation

[1030,
 7,
 6666,
 36363,
 11,
 76301,
 522,
 1551,
 2065,
 18,
 19,
 119828,
 42004,
 23,
 26,
 2586,
 2076,
 30,
 31,
 36,
 74789,
 3108,
 39,
 32296,
 553,
 3114,
 43,
 40,
 1586,
 2099,
 52,
 1076,
 1079,
 55,
 58,
 4157,
 2110,
 64,
 65,
 577,
 68,
 69,
 150596,
 75,
 588,
 82,
 142420,
 2133,
 88,
 1625,
 2137,
 1627,
 94,
 96,
 97,
 2150,
 1127,
 616,
 1129,
 106,
 1126,
 2662,
 1645,
 2160,
 117,
 118,
 1655,
 78467,
 1667,
 4243,
 26776,
 1689,
 156,
 101025,
 165,
 133802,
 3754,
 1196,
 5291,
 1711,
 694,
 695,
 50872,
 185,
 1210,
 195,
 2253,
 110297,
 2265,
 3807,
 4321,
 54001,
 2294,
 247,
 762,
 1278,
 257,
 260,
 8454,
 780,
 2321,
 3358,
 288,
 3873,
 89898,
 810,
 303,
 2872,
 313,
 1337,
 43836,
 316,
 5952,
 102720,
 837,
 1356,
 335,
 5456,
 849,
 1889,
 867,
 66915,
 355,
 1382,
 875,
 33138,
 888,
 384,
 4993,
 33669,
 390,
 904,
 910,
 96655,
 107406,
 5522,
 410,
 421,
 2470,
 4523,
 2478,
 1459,
 950,
 952,
 1465,
 442,
 2488,
 965,
 455,
 138702,
 479,
 552

In [111]:
x = [user_id for _ in range(len(ct_based_movie_recommendation))]
x2 = [movie_id for movie_id in ct_based_movie_recommendation]

In [21]:
model_input = [np.array(x), np.array(x2)]

In [25]:
len(model_input[0])

107

In [30]:
model.input, model.output

([<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'input_3')>,
  <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'input_4')>],
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'lambda_1')>)

In [32]:
if len(arr) > 3:
    arr[-3:]

array([4.616461], dtype=float32)

In [56]:
x = [0,0,0]

In [59]:
x[-4:]

[0, 0, 0]