<a href="https://colab.research.google.com/github/Mayur01-gg/Movie_Recommender/blob/main/movie_recommender_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🎥 Movie Recommender System using Deep Learning (Keras + MovieLens 100K)

In [1]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dot, Dense
from keras.optimizers import Adam
import keras

In [2]:
# Step 2: Load Data
!wget https://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -q ml-100k.zip

ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
movies = pd.read_csv('ml-100k/u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1])
movies.columns = ['movie_id', 'title']

--2025-04-20 13:10:26--  https://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2025-04-20 13:10:27 (12.3 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]



In [3]:
# Step 3: Preprocess Data
n_users = ratings.user_id.nunique()
n_movies = ratings.movie_id.nunique()

user_to_index = {user_id: idx for idx, user_id in enumerate(ratings.user_id.unique())}
movie_to_index = {movie_id: idx for idx, movie_id in enumerate(ratings.movie_id.unique())}

ratings['user_idx'] = ratings['user_id'].map(user_to_index)
ratings['movie_idx'] = ratings['movie_id'].map(movie_to_index)

In [4]:
# Step 4: Train/Test Split
train, test = train_test_split(ratings, test_size=0.2, random_state=42)

In [5]:
# Step 5: Build Deep Learning Model
embedding_size = 50

user_input = Input(shape=(1,))
user_embedding = Embedding(n_users, embedding_size)(user_input)
user_vec = Flatten()(user_embedding)

movie_input = Input(shape=(1,))
movie_embedding = Embedding(n_movies, embedding_size)(movie_input)
movie_vec = Flatten()(movie_embedding)

dot_product = Dot(axes=1)([user_vec, movie_vec])
output = Dense(1, activation='linear')(dot_product)

model = Model([user_input, movie_input], output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

model.summary()

In [6]:
# Step 6: Train the Model
history = model.fit(
    [train.user_idx, train.movie_idx],
    train.rating,
    epochs=10,
    verbose=1,
    validation_data=([test.user_idx, test.movie_idx], test.rating)
)

Epoch 1/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 8.4418 - val_loss: 1.0038
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.9346 - val_loss: 0.9153
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 0.8187 - val_loss: 0.8873
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 0.7124 - val_loss: 0.8771
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.6102 - val_loss: 0.8856
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.5021 - val_loss: 0.9261
Epoch 7/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - loss: 0.3917 - val_loss: 0.9647
Epoch 8/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - loss: 0.3091 - val_loss: 1.0199
Epoch 9/10
[1m2500

In [7]:
# Step 7: Evaluate the Model
mse = model.evaluate([test.user_idx, test.movie_idx], test.rating)
print(f"Test MSE: {mse:.4f}, RMSE: {np.sqrt(mse):.4f}")

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.0977
Test MSE: 1.0985, RMSE: 1.0481


In [9]:
# Step 8: Recommend Movies for a User
def recommend_movies(user_id, top_n=5):
    user_idx = user_to_index[user_id]
    movie_indices = np.array([i for i in range(n_movies)])
    predictions = model.predict([np.full(n_movies, user_idx), movie_indices], verbose=0)

    top_indices = predictions.reshape(-1).argsort()[::-1][:top_n]
    recommended_movie_ids = [list(movie_to_index.keys())[i] for i in top_indices]

    return movies[movies['movie_id'].isin(recommended_movie_ids)]

# Example usage
user_id_example = ratings.user_id.sample(1).values[0]
print(f"\n🎬 Recommended movies for User {user_id_example}:\n")
print(recommend_movies(user_id_example))


🎬 Recommended movies for User 763:

      movie_id                                              title
56          57                                      Priest (1994)
319        320  Paradise Lost: The Child Murders at Robin Hood...
424        425                                 Bob Roberts (1992)
511        512                             Wings of Desire (1987)
1366      1367                                       Faust (1994)
