In [38]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns          


In [39]:
ratings_file = 'ratings.csv'
df_ratings = pd.read_csv(ratings_file)
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [40]:
movies_file = 'movies.csv'
df_movies = pd.read_csv(movies_file)
df_movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [41]:
movie_data = pd.merge(df_ratings, df_movies, on='movieId')
movie_data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,16,4.0,1217897793,Casino (1995),Crime|Drama
1,9,16,4.0,842686699,Casino (1995),Crime|Drama
2,12,16,1.5,1144396284,Casino (1995),Crime|Drama
3,24,16,4.0,963468757,Casino (1995),Crime|Drama
4,29,16,3.0,836820223,Casino (1995),Crime|Drama


In [42]:
user_item_matrix = movie_data.pivot(index='userId', columns='movieId', values='rating').fillna(0)


In [43]:
user_item_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,144482,144656,144976,146344,146656,146684,146878,148238,148626,149532
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
user_similarity = np.dot(user_item_matrix, user_item_matrix.T)
user_norms = np.linalg.norm(user_item_matrix, axis=1)
user_similarity /= np.outer(user_norms, user_norms)


In [45]:
user_similarity

array([[1.        , 0.10111327, 0.21004361, ..., 0.2386603 , 0.27821676,
        0.15347851],
       [0.10111327, 1.        , 0.11555911, ..., 0.05142323, 0.03590744,
        0.06481608],
       [0.21004361, 0.11555911, 1.        , ..., 0.08094014, 0.15894346,
        0.10964798],
       ...,
       [0.2386603 , 0.05142323, 0.08094014, ..., 1.        , 0.12325229,
        0.15836814],
       [0.27821676, 0.03590744, 0.15894346, ..., 0.12325229, 1.        ,
        0.11095479],
       [0.15347851, 0.06481608, 0.10964798, ..., 0.15836814, 0.11095479,
        1.        ]])

In [46]:
def predict_rating(user_item_matrix, user_similarity):
    return np.dot(user_similarity, user_item_matrix) / np.abs(user_similarity).sum(axis=1, keepdims=True)


In [47]:
predicted_ratings = predict_rating(user_item_matrix.values, user_similarity)


In [48]:
predicted_df = pd.DataFrame(predicted_ratings, columns=user_item_matrix.columns, index=user_item_matrix.index)

In [49]:
def recommend_movies(user_id, predicted_df, n=5):
    rated_movies = movie_data[movie_data['userId'] == user_id]['movieId'].tolist()
    recommended_movies = predicted_df.loc[user_id].sort_values(ascending=False).index
    recommended_movies = [movie for movie in recommended_movies if movie not in rated_movies]
    return recommended_movies[:n]

In [51]:
user_id = 3
recommendations = recommend_movies(user_id, predicted_df, n=5)

print(f'\nTop 5 Movie Recommendations for User {user_id}:')
for movie in recommendations:
    print(f'Movie {movie}')


Top 5 Movie Recommendations for User 3:
Movie 318
Movie 110
Movie 47
Movie 364
Movie 260
