In [2]:
# Import Pandas
# The Pandas library to load and manipulate data.
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances
# the code imports the Pandas library using the import statement and assigns it the alias pd

# Load Movies data
movies_data = pd.read_csv("ml-latest-small/movies.csv")
# it loads a CSV file called movies.csv using the pd.read_csv() function and assigns it to a variable called movies_data.


# Load Links data
links_data = pd.read_csv("ml-latest-small/links.csv")

# Load ratings data
ratings_data = pd.read_csv("ml-latest-small/ratings.csv")

# Load tags data
tags_data = pd.read_csv("ml-latest-small/tags.csv")


In [3]:
ratings_data['rating'].unique()

array([4. , 5. , 3. , 2. , 1. , 4.5, 3.5, 2.5, 0.5, 1.5])

In [15]:


# Create a user-item matrix
user_item_matrix = ratings_data.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Calculate user-user similarity using Pearson correlation
user_similarity = 1 - pairwise_distances(user_item_matrix, metric='correlation')

# Prediction function with correct rating scaling
def predict_rating(user_id, item_id, k=5):
    user_ratings = user_item_matrix.loc[user_id]
    item_rated_by_user = user_ratings[item_id]

    if item_rated_by_user != 0:
        return item_rated_by_user

    similar_users = np.argpartition(user_similarity[user_id], -k)[-k:]

    weighted_sum = 0
    similarity_sum = 0
    for neighbor_id in similar_users:
        neighbor_rating = user_item_matrix.loc[neighbor_id, item_id]
        neighbor_mean = user_item_matrix.loc[neighbor_id].mean()

        if neighbor_rating != 0:
            similarity = user_similarity[user_id, neighbor_id]
            weighted_sum += similarity * (neighbor_rating - neighbor_mean)
            similarity_sum += abs(similarity)

    if similarity_sum == 0:
        return user_ratings.mean()
    else:
        user_mean = user_ratings[user_ratings != 0].mean()
        prediction = user_mean + weighted_sum / similarity_sum

        # Scale the prediction to the 0-5 range
        prediction = max(0, min(5, prediction))
        return prediction



# Recommendation function
def recommend_movies(user_id, top_n=10):
    unrated_movies = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] == 0]

    recommendations = []
    for item_id in unrated_movies.index:
        predicted_rating = predict_rating(user_id, item_id)
        recommendations.append((item_id, predicted_rating))

    recommendations.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = recommendations[:top_n]

    recommended_movie_ids = [x[0] for x in top_recommendations]
    recommended_ratings = [x[1] for x in top_recommendations]

    recommended_movies = movies_data[movies_data['movieId'].isin(recommended_movie_ids)].copy()
    recommended_movies['PredictedRating'] = recommended_ratings

    return recommended_movies

# Example: Recommend movies for user 1
user_id = 5
recommended_movies_df = recommend_movies(user_id, top_n=10)
print(recommended_movies_df)









    movieId                                      title  \
2         3                    Grumpier Old Men (1995)   
5         6                                Heat (1995)   
9        10                           GoldenEye (1995)   
10       11             American President, The (1995)   
15       16                              Casino (1995)   
19       20                         Money Train (1995)   
22       23                           Assassins (1995)   
31       32  Twelve Monkeys (a.k.a. 12 Monkeys) (1995)   
43       47                Seven (a.k.a. Se7en) (1995)   
47       52                    Mighty Aphrodite (1995)   

                                genres  PredictedRating  
2                       Comedy|Romance                5  
5                Action|Crime|Thriller                5  
9            Action|Adventure|Thriller                5  
10                Comedy|Drama|Romance                5  
15                         Crime|Drama                5  
19  Action|Co