In [6]:
#importing the required libraries
import numpy as np
import pandas as pd
import pickle
from models import matrix_factorization_utilities
import scipy.sparse as sp
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error
from math import sqrt

#import libraries
import keras
from keras.layers import Embedding, Reshape, concatenate
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [11]:
# Reading the ratings data
ratings = pd.read_csv('Dataset/ratings.csv')

In [12]:
#Just taking the required columns
ratings = ratings[['userId', 'movieId','rating']]

In [32]:
#reading the movies dataset
movie_list = pd.read_csv('Dataset/movies.csv')

In [41]:
# Couting no of unique users and movies
len(ratings.userId.unique()), len(ratings.movieId.unique())

(7120, 14026)

In [42]:
# Assigning a unique value to each user and movie in range 0,no_of_users and 0,no_of_movies respectively.
ratings.userId = ratings.userId.astype('category').cat.codes.values
ratings.movieId = ratings.movieId.astype('category').cat.codes.values

In [43]:
# Splitting the data into train and test.
train, test = train_test_split(ratings, test_size=0.2)

In [45]:
train.head

<bound method NDFrame.head of          userId  movieId  rating
176221     1190     1879     4.0
1016384    6880     6385     3.5
430586     2927    11411     3.0
712568     4746      373     5.0
342531     2319     5307     2.5
...         ...      ...     ...
716818     4770      580     3.0
1038673    7039     6940     5.0
117804      786     2444     4.0
323413     2208     2913     2.5
440064     2989      338     4.0

[838860 rows x 3 columns]>

In [46]:
test.head

<bound method NDFrame.head of         userId  movieId  rating
370522    2511      337     4.0
755659    5035     8642     2.5
32080      250      962     4.0
516534    3491     6147     4.5
59511      421     7111     4.0
...        ...      ...     ...
647074    4312     1318     4.0
547760    3672     1780     3.0
517004    3497    12029     2.5
108602     740     8546     4.5
539133    3628     6145     2.5

[209715 rows x 3 columns]>

In [47]:
n_users, n_movies = len(ratings.userId.unique()), len(ratings.movieId.unique())

In [111]:
# Returns a neural network model which does recommendation
#def neural_network_model(n_latent_factors_user, n_latent_factors_movie):
    
movie_input = keras.layers.Input(shape=[1],name='Item')
movie_embedding = keras.layers.Embedding(n_movies + 1, 13, name='Movie-Embedding')(movie_input)
# 13 yerinen_latent_factors_movie
movie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)
movie_vec = keras.layers.Dropout(0.2)(movie_vec)


user_input = keras.layers.Input(shape=[1],name='User')
user_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, 10,name='User-Embedding')(user_input))
#10 yerine  n_latent_factors_user
user_vec = keras.layers.Dropout(0.2)(user_vec)


concat = keras.layers.concatenate([movie_vec, user_vec],name='Concat')
concat_dropout = keras.layers.Dropout(0.2)(concat)
dense = keras.layers.Dense(100,name='FullyConnected')(concat)
dropout_1 = keras.layers.Dropout(0.2,name='Dropout')(dense)
dense_2 = keras.layers.Dense(50,name='FullyConnected-1')(concat)
dropout_2 = keras.layers.Dropout(0.2,name='Dropout')(dense_2)
dense_3 = keras.layers.Dense(20,name='FullyConnected-2')(dense_2)
dropout_3 = keras.layers.Dropout(0.2,name='Dropout')(dense_3)
dense_4 = keras.layers.Dense(10,name='FullyConnected-3', activation='relu')(dense_3)


result = keras.layers.Dense(1, activation='relu',name='Activation')(dense_4)
adam = Adam(lr=0.005)
model = keras.Model([user_input, movie_input], result)
model.compile(optimizer=adam,loss= 'mean_absolute_error')
    
history_neural_network = model.fit([train.userId, train.movieId], train.rating, epochs=5, verbose=0)    
    
    
    
    #return model

In [112]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Item (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
User (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
Movie-Embedding (Embedding)     (None, 1, 13)        182351      Item[0][0]                       
__________________________________________________________________________________________________
User-Embedding (Embedding)      (None, 1, 10)        71210       User[0][0]                       
____________________________________________________________________________________________

In [115]:
y_hat = np.round(model.predict([test.userId, test.movieId]),0)
y_true = test.rating

In [116]:
mean_absolute_error(y_true, y_hat)

0.6584602913477815