In [1]:
import numpy as np
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Concatenate, Embedding, Dense, Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

warnings.filterwarnings('ignore')

In [2]:
def load_data(movie_path, rating_path):

    movie = pd.read_csv(movie_path)
    rating = pd.read_csv(rating_path)
    movie = movie.drop_duplicates()
    rating = rating.drop_duplicates()
    return movie, rating

def data_prep(movie_data, rating_data):

    data = rating_data.merge(movie_data, on = 'movieId', how = 'inner')
    group_df = data.groupby(['movieId']).count()
    idx = group_df[group_df.userId > 1000].index.to_list()
    data = data[data['movieId'].isin(idx)]
    return data

data_dir = './Data/archive (1)/'
rating_path = data_dir + 'rating.csv'
movie_path = data_dir + 'movie.csv'
movie_data, rating_data = load_data(movie_path, rating_path)
data = data_prep(movie_data, rating_data)

In [3]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,2,3.5,2005-04-02 23:53:47,Jumanji (1995),Adventure|Children|Fantasy
1,5,2,3.0,1996-12-25 15:26:09,Jumanji (1995),Adventure|Children|Fantasy
2,13,2,3.0,1996-11-27 08:19:02,Jumanji (1995),Adventure|Children|Fantasy
3,29,2,3.0,1996-06-23 20:36:14,Jumanji (1995),Adventure|Children|Fantasy
4,34,2,3.0,1996-10-28 13:29:44,Jumanji (1995),Adventure|Children|Fantasy


In [4]:
data['userId'] = pd.Categorical(data['userId'])
data['new_userid'] = data['userId'].cat.codes

data['movieId'] = pd.Categorical(data['movieId'])
data['new_movieid'] = data['movieId'].cat.codes

data['genres'] = pd.Categorical(data['genres'])
data['new_genres'] = data['genres'].cat.codes


data['title'] = pd.Categorical(data['title'])
data['new_title'] = data['title'].cat.codes

In [5]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,new_userid,new_movieid,new_genres,new_title
0,1,2,3.5,2005-04-02 23:53:47,Jumanji (1995),Adventure|Children|Fantasy,0,1,262,1523
1,5,2,3.0,1996-12-25 15:26:09,Jumanji (1995),Adventure|Children|Fantasy,4,1,262,1523
2,13,2,3.0,1996-11-27 08:19:02,Jumanji (1995),Adventure|Children|Fantasy,12,1,262,1523
3,29,2,3.0,1996-06-23 20:36:14,Jumanji (1995),Adventure|Children|Fantasy,28,1,262,1523
4,34,2,3.0,1996-10-28 13:29:44,Jumanji (1995),Adventure|Children|Fantasy,33,1,262,1523


In [6]:
df = data[['new_userid','new_movieid', 'rating', 'new_genres']]
df = df.drop_duplicates()

#df.head()

In [16]:
U = len(set(df['new_userid'].values))
M = len(set(df['new_movieid'].values))
G = len(set(df['new_genres'].values))
k = 20

id = Input(shape = (1,))
movie = Input(shape = (1,))
gen = Input(shape = (1,))

emb_id = Embedding(U, k)(id)
emb_movie = Embedding(M, k)(movie)
emb_genre = Embedding(G, k)(gen)

flatten_id = Flatten()(emb_id)
flatten_movie = Flatten()(emb_movie)
flatten_genre = Flatten()(emb_genre)

emb = Concatenate()([flatten_id, flatten_movie, flatten_genre])
dense = Dense(64, activation = 'relu')(emb)
dense_1 = Dense(1)(dense)
model = Model(inputs = [id, movie, gen], outputs = dense)
model.compile(optimizer = 'Adam',metrics = ['MeanSquaredError'] ,loss = 'mse')

In [17]:
train_data, test_data = train_test_split(df, test_size= 0.3, random_state=0)
userid_train = train_data['new_userid'].values
movieid_train = train_data['new_movieid'].values
gen_train = train_data['new_genres'].values
train_y = train_data['rating'].values

userid_test = test_data['new_userid'].values
movieid_test = test_data['new_movieid'].values
gen_test = test_data['new_genres'].values
test_y = test_data['rating'].values

hist = model.fit(x=[userid_train, movieid_train, gen_train],
                 y= train_y,
                 epochs = 20,
                 batch_size = 2048,
                validation_data = ([userid_test, movieid_test, gen_test], test_y) )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
