In [1]:
#importing the required libraries
import numpy as np
import pandas as pd
import pickle
from models import matrix_factorization_utilities
import scipy.sparse as sp
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.model_selection import train_test_split

#import libraries
import keras
from keras.layers import Embedding, Reshape, concatenate
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
# Reading the ratings data
ratings = pd.read_csv('Dataset/ratings.csv')

In [3]:
#Just taking the required columns
ratings = ratings[['userId', 'movieId','rating']]

In [4]:
#reading the movies dataset
movie_list = pd.read_csv('Dataset/movies.csv')

In [5]:
# Couting no of unique users and movies
len(ratings.userId.unique()), len(ratings.movieId.unique())

(7120, 14026)

In [6]:
# Assigning a unique value to each user and movie in range 0,no_of_users and 0,no_of_movies respectively.
ratings.userId = ratings.userId.astype('category').cat.codes.values
ratings.movieId = ratings.movieId.astype('category').cat.codes.values

In [7]:
# Splitting the data into train and test.
train, test = train_test_split(ratings, test_size=0.2)

In [8]:
train.head

<bound method NDFrame.head of          userId  movieId  rating
670362     4449    10316     2.5
348172     2351      541     2.0
801291     5347     1314     3.0
287834     1971     1691     2.0
1040947    7050     6259     4.0
...         ...      ...     ...
164440     1112     3398     4.5
594241     3979     3818     5.0
552086     3701     2534     3.0
846873     5652    10384     4.5
36284       279     5161     3.5

[838860 rows x 3 columns]>

In [9]:
test.head

<bound method NDFrame.head of         userId  movieId  rating
195199    1327     3188     4.0
892658    5972    11971     4.5
13656      115     2134     3.0
435286    2954      545     5.0
815811    5442     6691     3.5
...        ...      ...     ...
218909    1514     3031     3.0
160915    1083        0     4.0
115480     774     7450     2.0
155834    1043       49     5.0
500565    3378      498     4.0

[209715 rows x 3 columns]>

In [10]:
n_users, n_movies = len(ratings.userId.unique()), len(ratings.movieId.unique())

In [12]:
# Returns a neural network model which does recommendation
#def neural_network_model(n_latent_factors_user, n_latent_factors_movie):
    
movie_input = keras.layers.Input(shape=[1],name='Item')
movie_embedding = keras.layers.Embedding(n_movies + 1, 50, name='Movie-Embedding')(movie_input)
# 13 yerinen_latent_factors_movie
movie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)
movie_vec = keras.layers.Dropout(0.2)(movie_vec)


user_input = keras.layers.Input(shape=[1],name='User')
user_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, 20,name='User-Embedding')(user_input))
#10 yerine  n_latent_factors_user
user_vec = keras.layers.Dropout(0.2)(user_vec)


concat = keras.layers.concatenate([movie_vec, user_vec],name='Concat')
concat_dropout = keras.layers.Dropout(0.2)(concat)
dense = keras.layers.Dense(100,name='FullyConnected')(concat)
dropout_1 = keras.layers.Dropout(0.2,name='Dropout')(dense)
dense_2 = keras.layers.Dense(50,name='FullyConnected-1')(concat)
dropout_2 = keras.layers.Dropout(0.2,name='Dropout')(dense_2)
dense_3 = keras.layers.Dense(20,name='FullyConnected-2')(dense_2)
dropout_3 = keras.layers.Dropout(0.2,name='Dropout')(dense_3)
dense_4 = keras.layers.Dense(10,name='FullyConnected-3', activation='relu')(dense_3)


result = keras.layers.Dense(1, activation='relu',name='Activation')(dense_4)
adam = Adam(lr=0.001)
model = keras.Model([user_input, movie_input], result)
model.compile(optimizer=adam,loss= 'mean_absolute_error')

In [13]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Item (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
User (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
Movie-Embedding (Embedding)     (None, 1, 50)        701350      Item[0][0]                       
__________________________________________________________________________________________________
User-Embedding (Embedding)      (None, 1, 20)        142420      User[0][0]                       
____________________________________________________________________________________________

In [14]:
history_neural_network = model.fit([train.userId, train.movieId], train.rating, epochs=200, validation_data=0.1,verbose=1)

Epoch 1/200
 1759/26215 [=>............................] - ETA: 1:42 - loss: 1.1261

KeyboardInterrupt: 

In [115]:
y_hat = np.round(model.predict([test.userId, test.movieId]),0)
y_true = test.rating

In [116]:
mean_absolute_error(y_true, y_hat)

0.6584602913477815