In [33]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

In [34]:
new_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='rating')
print(new_matrix)

movie_id   10   20   30
user_id                
1         4.0  3.5  NaN
2         4.5  NaN  2.0
3         5.0  NaN  NaN


In [35]:
fill_missing_ratings0 = new_matrix.fillna(0)
print(fill_missing_ratings0)
     

movie_id   10   20   30
user_id                
1         4.0  3.5  0.0
2         4.5  0.0  2.0
3         5.0  0.0  0.0


In [36]:
user_cos_similarity = cosine_similarity(fill_missing_ratings0)
print(fill_missing_ratings0, "\n")
print(user_cos_similarity)

movie_id   10   20   30
user_id                
1         4.0  3.5  0.0
2         4.5  0.0  2.0
          5.0  0.0  0.0

[[1.         0.68771327 0.75257669]
 [0.68771327 1.         0.91381155]
 [0.75257669 0.91381155 1.        ]]


In [37]:
df_user_similarity = pd.DataFrame(user_cos_similarity, index=new_matrix.index, columns=new_matrix.index)
print(df_user_similarity)

user_id         1         2         3
user_id                              
1        1.000000  0.687713  0.752577
2        0.687713  1.000000  0.913812
3        0.752577  0.913812  1.000000


In [38]:
def recommend_movies(user_id, top_num=5):
    if user_id not in df_user_similarity.index:
        print("User not found.")
        return []

    similarity_scores = df_user_similarity.loc[user_id]

    similar_users = similarity_scores.sort_values(ascending=False)[1:top_num + 1]
    similar_user_ids = similar_users.index
    similar_users_ratings = ratings[ratings['user_id'].isin(similar_user_ids)]
    user_movies = ratings[ratings['user_id'] == user_id]['movie_id']
    unseen_movies = similar_users_ratings[~similar_users_ratings['movie_id'].isin(user_movies)]

    movie_scores = unseen_movies.groupby('movie_id')['rating'].mean()

    recommended_movies_list = movies[movies['movie_id'].isin(movie_scores.index)].copy()
    recommended_movies_list['predicted_rating'] = recommended_movies_list['movie_id'].map(movie_scores)

    recommended_movies_list = recommended_movies_list.sort_values(by='predicted_rating', ascending=False)
    recommended_movies_list.reset_index(drop=True, inplace=True)
    recommended_movies_list.index += 1

    return recommended_movies_list[['title', 'genres', 'predicted_rating']]

recommend_movies(3)

Unnamed: 0,title,genres,predicted_rating
1,Inception (2010),Action|Thriller,3.5
2,The Notebook (2004),Drama|Romance,2.0


In [39]:
def predict_rating(user_id, movie_id, ratings_matrix, similarity_matrix):
    if movie_id not in ratings_matrix.columns or user_id not in ratings_matrix.index:
        return None

    user_similarities = similarity_matrix.loc[user_id]
    
    movie_ratings = ratings_matrix[movie_id]
    mask = movie_ratings.notna() 
    filtered_similarities = user_similarities[mask]
    filtered_ratings = movie_ratings[mask]
    
    
    if filtered_similarities.sum() == 0:
        return None

    weighted_sum = (filtered_similarities * filtered_ratings).sum()
    similarity_sum = filtered_similarities.sum()
    return weighted_sum / similarity_sum

In [40]:
from sklearn.metrics import mean_squared_error
import numpy as np

actual = []
predicted = []

sample_data = ratings

for _, row in sample_data.iterrows():
    user = int(row['user_id'])     
    movie = int(row['movie_id'])  
    true_rating = row['rating']

    pred_rating = predict_rating(user, movie, new_matrix, df_user_similarity)

    if pred_rating is not None:
        actual.append(true_rating)
        predicted.append(pred_rating)

rmse = np.sqrt(mean_squared_error(actual, predicted))
print(f"\nUser Base RMSE score: {rmse:.4f}")


User Base RMSE score: 0.2862
