<a href="https://colab.research.google.com/github/MihirDesh/MovieRec/blob/main/MovieRecommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [30]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [32]:
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

In [47]:
ratings['user_id'] = user_encoder.fit_transform(ratings['userId'])
ratings['movie_id'] = movie_encoder.fit_transform(ratings['movieId'])

In [76]:
mlb = MultiLabelBinarizer()
genres_encoded = mlb.fit_transform(movies['genres'].apply(lambda x: x.split('|')))
genres_df = pd.DataFrame(genres_encoded, columns=mlb.classes_)
print(genres_df)

      (no genres listed)  Action  Adventure  Animation  Children  Comedy  \
0                      0       0          1          1         1       1   
1                      0       0          1          0         1       0   
2                      0       0          0          0         0       1   
3                      0       0          0          0         0       1   
4                      0       0          0          0         0       1   
...                  ...     ...        ...        ...       ...     ...   
9737                   0       1          0          1         0       1   
9738                   0       0          0          1         0       1   
9739                   0       0          0          0         0       0   
9740                   0       1          0          1         0       0   
9741                   0       0          0          0         0       1   

      Crime  Documentary  Drama  Fantasy  Film-Noir  Horror  IMAX  Musical  \
0        

In [77]:
data = pd.merge(ratings, genres_df, left_on='movie_id', right_index=True)
print(data)

        userId  movieId  rating   timestamp  user_id  movie_id  \
0            1        1     4.0   964982703        0         0   
516          5        1     4.0   847434962        4         0   
874          7        1     4.5  1106635946        6         0   
1434        15        1     2.5  1510577970       14         0   
1667        17        1     4.5  1305696483       16         0   
...        ...      ...     ...         ...      ...       ...   
100820     610   160341     2.5  1479545749      609      9307   
100821     610   160527     4.5  1479544998      609      9312   
100823     610   160836     3.0  1493844794      609      9324   
100827     610   163937     3.5  1493848789      609      9371   
100828     610   163981     3.5  1493850155      609      9372   

        (no genres listed)  Action  Adventure  Animation  ...  Film-Noir  \
0                        0       0          1          1  ...          0   
516                      0       0          1          

In [50]:
train_data, test_data = train_test_split(data, test_size = 0.2, random_state = 42)

In [51]:
num_users = len(user_encoder.classes_)
num_movies = len(movie_encoder.classes_)
num_genres = len(mlb.classes_)
embedding_size = 50

In [52]:
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))
genre_input = Input(shape=(num_genres,))

In [53]:
user_embedding = Embedding(input_dim = num_users, output_dim = embedding_size)(user_input)
movie_embedding = Embedding(input_dim = num_movies, output_dim = embedding_size)(movie_input)

In [54]:
user_vec = Flatten()(user_embedding)
movie_vec = Flatten()(movie_embedding)

In [55]:
concat = Concatenate()([user_vec, movie_vec, genre_input])
dense1 = Dense(128, activation = 'relu')(concat)
output = Dense(1)(dense1)

In [56]:
model = Model(inputs = [user_input, movie_input, genre_input], outputs = output)
model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'mean_squared_error')

In [57]:
early_stopping = EarlyStopping(patience = 2, verbose = 1)

In [58]:
model.fit([train_data['user_id'], train_data['movie_id'], train_data[mlb.classes_]], train_data['rating'], batch_size = 64, epochs = 10, validation_split = 0.1, callbacks = [early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 5: early stopping


<keras.callbacks.History at 0x7b2f6fb22020>

In [59]:
loss = model.evaluate([test_data['user_id'], test_data['movie_id'], test_data[mlb.classes_]],
                      test_data['rating'])
print(f'Test Loss: {loss}')

Test Loss: 0.7559601068496704


In [72]:
def get_movie_recommendations(user_id, num_recommendations=5):
    user_movies = data[data['user_id'] == user_id]['movie_id'].unique()
    all_movies = np.arange(num_movies)
    unrated_movies = np.setdiff1d(all_movies, user_movies)
    user_ids = np.array([user_id] * len(unrated_movies))

    genre_input = np.array([test_data[mlb.classes_].iloc[user_id]] * len(unrated_movies))

    predicted_ratings = model.predict([user_ids, unrated_movies, genre_input])

    user_liked_genres = data[data['user_id'] == user_id]['genres']
    recommended_movies = []
    for movie_id, rating in zip(unrated_movies, predicted_ratings):
        movie_genres = movies[movies['movie_id'] == movie_id]['genres'].values[0]
        if any(genre in movie_genres for genre in user_liked_genres):
            recommended_movies.append((movie_id, rating))

    recommended_movies.sort(key=lambda x: x[1], reverse=True)
    recommended_movie_ids = [movie_id for movie_id, _ in recommended_movies[:num_recommendations]]
    recommended_movie_titles = movie_encoder.inverse_transform(recommended_movie_ids)

    return recommended_movie_titles

In [82]:
user_id = 1
recommendations = get_movie_recommendations(user_id)
print(f"Recommended movies for userId {user_id}:")
for movie_id in recommendations:
  print(movies.loc[movies['movie_id'] == movie_id, 'title'].values[0])



KeyError: ignored