In [1]:
movie_data = """movieId,title,genres
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
"""
with open('movies.csv', 'w') as file:
    file.write(movie_data)

print("movies.csv file created successfully.")

movies.csv file created successfully.


In [2]:
import os

if 'movies.csv' in os.listdir('.'):
    print("movies.csv is present in the current directory.")
else:
    print("movies.csv is not found in the current directory.")

movies.csv is present in the current directory.


In [3]:
ratings_data = """userId,movieId,rating,timestamp
1,1,4.0,964982703
1,3,4.0,964981247
1,6,4.0,964982224
1,47,5.0,964983815
1,50,5.0,964982931
2,1,4.0,964982703
2,3,4.0,964981247
2,6,4.0,964982224
2,47,5.0,964983815
2,50,5.0,964982931
"""

with open('ratings.csv', 'w') as file:
    file.write(ratings_data)

print("ratings.csv file created successfully.")

ratings.csv file created successfully.


In [4]:
import os

if 'ratings.csv' in os.listdir('.'):
    print("ratings.csv is present in the current directory.")
else:
    print("ratings.csv is not found in the current directory.")

ratings.csv is present in the current directory.


In [5]:
import pandas as pd

movies = pd.read_csv('movies.csv')
print(movies.head())

ratings = pd.read_csv('ratings.csv')
print(ratings.head())

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [6]:
data = pd.merge(ratings, movies, on='movieId')

user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')
print(user_movie_matrix.head())

title   Grumpier Old Men (1995)  Toy Story (1995)
userId                                           
1                           4.0               4.0
2                           4.0               4.0


In [7]:
from sklearn.metrics.pairwise import cosine_similarity

user_similarity = cosine_similarity(user_movie_matrix)
print(user_similarity[:5, :5])

[[1. 1.]
 [1. 1.]]


In [8]:
import numpy as np

def get_recommendations(user_id, user_movie_matrix, user_similarity, n_recommendations=5):
    if user_id not in user_movie_matrix.index:
        print(f"User {user_id} not found in the user_movie_matrix.")
        return None
    user_ratings = user_movie_matrix.loc[user_id]
    
    similar_users = user_similarity[user_id - 1] 
    
    weighted_sum_ratings = np.dot(similar_users, user_movie_matrix)
   
    sum_of_similarities = np.array([np.abs(similar_users).sum()] * user_movie_matrix.shape[1])
    weighted_avg_ratings = weighted_sum_ratings / sum_of_similarities
    
    weighted_avg_ratings_series = pd.Series(weighted_avg_ratings, index=user_movie_matrix.columns)
    
    already_rated = user_ratings[user_ratings > 0].index
    recommendations = weighted_avg_ratings_series.drop(already_rated)
    
    top_recommendations = recommendations.nlargest(n_recommendations)
    
    return top_recommendations

user_ids = [1, 2, 3, 4]  

for user_id in user_ids:
    recommendations = get_recommendations(user_id, user_movie_matrix, user_similarity)
    if recommendations is not None:
        print(f"Top recommendations for user {user_id}:\n", recommendations)

Top recommendations for user 1:
 Series([], dtype: float64)
Top recommendations for user 2:
 Series([], dtype: float64)
User 3 not found in the user_movie_matrix.
User 4 not found in the user_movie_matrix.
