### Recommender System using Collaborative Filtering Model using Neural Networks

In [14]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder #Label Encoding for User and Movie Ids

from keras.models import Model
from keras.layers import Input, Embedding, Flatten, concatenate, Dense
from keras.optimizers import Adam # Adam optimizer

In [15]:
df_movies = pd.read_csv('movies.csv').head(62424)
df_ratings = pd.read_csv('ratings.csv').head(1048576)

In [16]:
#Merging movies and their ratings given
dfMerged = pd.merge(df_ratings, df_movies, on='movieId')

In [17]:
dfMerged

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,296,5.0,1147880044,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
1,3,296,5.0,1439474476,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
2,4,296,4.0,1573938898,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
3,5,296,4.0,830786155,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
4,7,296,4.0,835444730,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
...,...,...,...,...,...,...
1048571,7036,150858,2.5,1471038060,Cougar Hunting (2011),Comedy|Romance
1048572,7036,166480,3.0,1481035733,Eliminators (2016),Action|Thriller
1048573,7036,188931,3.5,1565119103,Birdsong (2012),Drama|War
1048574,7036,203799,1.0,1562442903,Cold Blood (2019),Action|Thriller


In [18]:
# Label encoding of IDs
userEncoder = LabelEncoder()
movieEncoder = LabelEncoder()

In [19]:
dfMerged.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,296,5.0,1147880044,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
1,3,296,5.0,1439474476,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
2,4,296,4.0,1573938898,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
3,5,296,4.0,830786155,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
4,7,296,4.0,835444730,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller


In [20]:
dfMerged['userEncoded'] = userEncoder.fit_transform(dfMerged['userId'])
dfMerged['movieEncoded'] = movieEncoder.fit_transform(dfMerged['movieId'])

In [21]:
dfMerged

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,userEncoded,movieEncoded
0,1,296,5.0,1147880044,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,0,289
1,3,296,5.0,1439474476,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,2,289
2,4,296,4.0,1573938898,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,3,289
3,5,296,4.0,830786155,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,4,289
4,7,296,4.0,835444730,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,6,289
...,...,...,...,...,...,...,...,...
1048571,7036,150858,2.5,1471038060,Cougar Hunting (2011),Comedy|Romance,7035,18306
1048572,7036,166480,3.0,1481035733,Eliminators (2016),Action|Thriller,7035,19420
1048573,7036,188931,3.5,1565119103,Birdsong (2012),Drama|War,7035,21364
1048574,7036,203799,1.0,1562442903,Cold Blood (2019),Action|Thriller,7035,22120


In [22]:
# Splitting for training and testing, random state = 10, for portable results
train_data, test_data = train_test_split(dfMerged, test_size=0.3) 

In [23]:
# Defining the Neural Network
def recommendationModel(Vu, Vm, embeddSize=50):
    # User
    userInput = Input(shape=(1,), name="userInput") # Taking the user Input
    userEmbed = Embedding(input_dim=Vu, output_dim=embeddSize, input_length=1)(userInput) # User data embedding based on embed size - Hidden layer
    userEmbed = Flatten()(userEmbed) # Flatten the userEmbed
    
    # Movie (similar)
    movieInput = Input(shape=(1,), name="movieInput") # Taking the movie Input
    movieEmbed = Embedding(input_dim=Vm, output_dim=embeddSize, input_length=1)(movieInput) # movie data embedding based on embed size - Hidden layer
    movieEmbed = Flatten()(movieEmbed) # Flatten the movieEmbed
    
    # Concatenate the embeddings
    concat = concatenate([userEmbed, movieEmbed])
    
    # Fully connected Dense layers
    D1 = Dense(128, activation='relu')(concat) # This signifies previous layer
    D2 = Dense(64, activation='relu')(D1)
    D3 = Dense(32, activation='relu')(D2)
    D4 = Dense(16, activation='relu')(D3)
    
    # Output layer for 0-5 rating
    output = Dense(1, activation='linear')(D4)
    
    # Model definition
    model = Model([userInput, movieInput], output)
    model.compile(optimizer=Adam(), loss='mean_squared_error')
    
    return model


In [24]:
# Acquire the unique users and movies
Vu = dfMerged['userEncoded'].nunique()
Vm = dfMerged['movieEncoded'].nunique()

In [25]:
# Create the model
model = recommendationModel(Vu, Vm)
model.summary

<bound method Model.summary of <keras.src.engine.functional.Functional object at 0x00000191B871B070>>

In [30]:
from keras.callbacks import ModelCheckpoint

# Create a ModelCheckpoint callback
checkpoint_path = "best_model.h5"
checkpoint_callback = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)


model.fit([train_data['userEncoded'], train_data['movieEncoded']], train_data['rating'],
          validation_data=([test_data['userEncoded'], test_data['movieEncoded']], test_data['rating']), epochs=10, batch_size=64, verbose=1,callbacks=checkpoint_callback)



Epoch 1/10
Epoch 2/10
    4/11469 [..............................] - ETA: 3:26 - loss: 0.6760

  saving_api.save_model(


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x191db4bf7c0>

## Getting a sample prediction

In [33]:
# Example: Get predictions for user with user_id = 1 and movie_id = 100
from keras.models import load_model
user_id = 1
movie_id = 10

# Encode user and movie IDs
user_encoded = userEncoder.transform([user_id])[0]
movie_encoded = movieEncoder.transform([movie_id])[0]
model = load_model('best_model.h5')
# Make predictions using the trained model
prediction = model.predict([np.array([user_encoded]), np.array([movie_encoded])], verbose=0)[0][0]
movieTitle = df_movies.loc[df_movies['movieId'] == movie_id,'title'].values[0]
print(f"Predicted rating for user {user_id} and movie \"{movieTitle}\": {prediction: 0.3f}", end=" ")
for i in range(int(prediction)):
    print("⭐", end=" ")

Predicted rating for user 1 and movie "GoldenEye (1995)":  3.529 ⭐ ⭐ ⭐ 