In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
ratings_data = pd.read_csv('./data/ratings.csv')

In [3]:
ratings_data.columns

Index(['_id', 'movie_id', 'rating_val', 'user_id'], dtype='object')

In [None]:
user_item_matrix = ratings_data.pivot_table(index='user_id', columns='movie_id', values='rating_val')

# Fill missing values with 0 (alternatively, NaN could be used for a more sophisticated approach)
user_item_matrix.fillna(0, inplace=True)

# Calculate item similarity using cosine similarity
item_similarity = cosine_similarity(user_item_matrix.T)  # Transpose to get item-based similarity
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)
item_similarity_df.shape

(5000, 5000)

In [8]:
def predict_ratings(user_id, top_n=10):
    user_ratings = user_item_matrix.loc[user_id]
    rated_items = user_ratings[user_ratings > 0].index
    predictions = {}

    for item in user_item_matrix.columns:
        if item not in rated_items:  # Predict only for unrated items
            sim_scores = item_similarity_df[item][rated_items]
            user_rated_scores = user_ratings[rated_items]
            predicted_rating = np.dot(sim_scores, user_rated_scores) / sim_scores.sum() if sim_scores.sum() != 0 else 0
            predictions[item] = predicted_rating
    
    # Sort the predictions and return the top_n items
    top_predictions = sorted(predictions.items(), key=lambda x: x[1], reverse=True)[:top_n]
    return top_predictions

In [6]:
ratings_data.head()

Unnamed: 0,_id,movie_id,rating_val,user_id
0,5fc57c5d6758f6963451a063,loving-2016,7,deathproof
1,5fc57c5d6758f6963451a172,the-social-network,10,deathproof
2,5fc57c5d6758f6963451a0a0,saw-iii,7,deathproof
3,5fc57c5d6758f6963451a0e0,the-girl-on-the-train-2016,8,deathproof
4,5fc57c5d6758f6963451a11d,yes-man,6,deathproof


In [12]:
user_id = "deathproof"
recommendations = predict_ratings(user_id)
for (x,y) in recommendations:
    print(f"{x} -- {y.round(2)}")

el-mesero -- 9.0
talk-sex -- 7.59
christmas-at-the-ranch -- 7.57
crazy-fist -- 7.57
the-moment-you-fall-in-love -- 7.46
ive-always-liked-you -- 7.3
pursuit-2022 -- 7.11
midsommar-directors-cut -- 7.0
the-institute-2022 -- 7.0
the-kashmir-files -- 7.0
