In [82]:
# Import Pandas
# The Pandas library to load and manipulate data.
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances
# the code imports the Pandas library using the import statement and assigns it the alias pd

# Load Movies data
movies_data = pd.read_csv("ml-latest-small/movies.csv")
# it loads a CSV file called movies.csv using the pd.read_csv() function and assigns it to a variable called movies_data.


# Load Links data
links_data = pd.read_csv("ml-latest-small/links.csv")

# Load ratings data
ratings_data = pd.read_csv("ml-latest-small/ratings.csv")

# Load tags data
tags_data = pd.read_csv("ml-latest-small/tags.csv")


In [None]:
ratings_data['rating'].unique()

In [None]:


# Calculate the Pearson correlation between users
def pearson_correlation(user1, user2):
    # Get ratings for common movies
    common_movies = set(user1.index) & set(user2.index)
    if not common_movies:
        return 0  # No common movies, correlation is 0

    user1_ratings = user1[common_movies]
    user2_ratings = user2[common_movies]

    # Calculate the Pearson correlation
    mean_user1 = user1_ratings.mean()
    mean_user2 = user2_ratings.mean()
    numerator = sum((user1_ratings - mean_user1) * (user2_ratings - mean_user2))
    denominator = np.sqrt(sum((user1_ratings - mean_user1) ** 2) * sum((user2_ratings - mean_user2) ** 2))
    if denominator == 0:
        return 0  # Handle division by zero
    else:
        return numerator / denominator



In [None]:
def predict_rating(user_id, item_id, k=5):
    user = ratings_data[ratings_data['userId'] == user_id]
    item_rated_by_user = user[user['movieId'] == item_id]

    if not item_rated_by_user.empty:
        # The active user has already rated the item, no need to predict
        return item_rated_by_user['rating'].values[0]

    # Find k nearest neighbors
    neighbors = []
    for index, row in ratings_data.iterrows():
        if row['userId'] != user_id:
            similarity = pearson_correlation(user, ratings_data[ratings_data['userId'] == row['userId']])
            neighbors.append((row['userId'], similarity))

    neighbors.sort(key=lambda x: x[1], reverse=True)
    neighbors = neighbors[:k]

    weighted_sum = 0
    similarity_sum = 0
    for neighbor_id, similarity in neighbors:
        neighbor = ratings_data[ratings_data['userId'] == neighbor_id]
        neighbor_rating = neighbor[neighbor['movieId'] == item_id]['rating'].values
        if neighbor_rating.size == 0:
            continue  # Skip neighbors who haven't rated the item
        neighbor_rating = neighbor_rating[0]
        neighbor_mean = neighbor['rating'].mean()
        weighted_sum += similarity * (neighbor_rating - neighbor_mean)
        similarity_sum += abs(similarity)

    if similarity_sum == 0:
        return user['rating'].mean()
    else:
        user_mean = user['rating'].mean()
        prediction = user_mean + weighted_sum / similarity_sum
        return prediction


In [None]:

# Movie recommendation function
def recommend_movies(user_id, top_n=10):
    user = ratings_data[ratings_data['userId'] == user_id]
    unrated_movies = movies_data[~movies_data['movieId'].isin(user['movieId'])]
    unrated_movies['predicted_rating'] = unrated_movies['movieId'].apply(lambda x: predict_rating(user_id, x))
    recommended_movies = unrated_movies.sort_values(by='predicted_rating', ascending=False).head(top_n)
    return recommended_movies[['movieId', 'title', 'predicted_rating']]

# Example: Recommend movies for user 1
user_id = 1
recommended_movies = recommend_movies(user_id, top_n=10)
print(recommended_movies)

In [83]:


# Create a user-item matrix
user_item_matrix = ratings_data.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Calculate user-user similarity using Pearson correlation
user_similarity = 1 - pairwise_distances(user_item_matrix, metric='correlation')

# Common prediction function
def predict_rating(user_id, item_id, k=5):
    user_ratings = user_item_matrix.loc[user_id]
    item_rated_by_user = user_ratings[item_id]

    if item_rated_by_user != 0:
        # The active user has already rated the item, no need to predict
        return item_rated_by_user

    # Find k nearest neighbors
    similar_users = user_similarity[user_id]
    similar_users = np.argsort(similar_users, axis=0)[::-1][1:k+1]
    
    weighted_sum = 0
    similarity_sum = 0
    for neighbor_id in similar_users:
        neighbor_rating = user_item_matrix.iloc[neighbor_id][item_id]
        neighbor_mean = np.mean(user_item_matrix.iloc[neighbor_id][user_item_matrix.iloc[neighbor_id] != 0])
        if neighbor_rating != 0:
            similarity = user_similarity[user_id][neighbor_id]
            weighted_sum += similarity * (neighbor_rating - neighbor_mean)
            similarity_sum += abs(similarity)

    if similarity_sum == 0:
        return user_ratings.mean()
    else:
        user_mean = np.mean(user_ratings[user_ratings != 0])
        prediction = user_mean + weighted_sum / similarity_sum
        return prediction

# Movie recommendation function
def recommend_movies(user_id, top_n=10):
    user_ratings = user_item_matrix.loc[user_id]
    unrated_movies = user_ratings[user_ratings == 0]

    recommended_movies = []
    for item_id in unrated_movies.index:
        predicted_rating = predict_rating(user_id, item_id)
        recommended_movies.append((item_id, predicted_rating))

    recommended_movies.sort(key=lambda x: x[1], reverse=True)
    top_movies = recommended_movies[:top_n]

    return movies_data[movies_data['movieId'].isin([x[0] for x in top_movies])]

# Example: Recommend movies for user 1
user_id = 1
recommended_movies = recommend_movies(user_id, top_n=10)
print(recommended_movies)


      movieId                                            title  \
613       778                             Trainspotting (1996)   
922      1221                   Godfather: Part II, The (1974)   
1284     1704                         Good Will Hunting (1997)   
2036     2712                            Eyes Wide Shut (1999)   
3194     4306                                     Shrek (2001)   
3562     4878                              Donnie Darko (2001)   
5166     8368  Harry Potter and the Prisoner of Azkaban (2004)   
6331    48780                             Prestige, The (2006)   
7010    68157                      Inglourious Basterds (2009)   
7693    89745                             Avengers, The (2012)   

                                                 genres  
613                                  Comedy|Crime|Drama  
922                                         Crime|Drama  
1284                                      Drama|Romance  
2036                             Drama|My