In [17]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load the movie ratings dataset
# Simulating the dataset with user-movie ratings
ratings_dict = {
    "user_id": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
    "movie_id": [101, 102, 103, 101, 102, 104, 101, 103, 104, 102, 103, 104],
    "rating": [5, 4, 3, 4, 5, 3, 3, 5, 4, 5, 4, 2]
}

ratings_df = pd.DataFrame(ratings_dict)
print("Ratings DataFrame:")
print(ratings_df)

# Step 2: Create a user-item matrix
user_movie_matrix = ratings_df.pivot_table(index='user_id', columns='movie_id', values='rating')
print("\nUser-Movie Rating Matrix:")
print(user_movie_matrix)

# Step 3: Compute the similarity between users (using cosine similarity)
# Filling NaN values with 0 before calculating similarity
user_movie_matrix_filled = user_movie_matrix.fillna(0)
user_similarity = cosine_similarity(user_movie_matrix_filled)

# Convert the similarity matrix to a DataFrame for better readability
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)
print("\nUser Similarity Matrix:")
print(user_similarity_df)

# Step 4: Predict ratings for each user-movie pair
# Prediction formula: weighted sum of ratings by similar users / sum of similarities

def predict_ratings(user_id, movie_id):
    # Get the user's similarity scores with other users
    user_sim_scores = user_similarity_df.loc[user_id]
    
    # Get the ratings of the movie by other users
    other_users_ratings = user_movie_matrix[movie_id]
    
    # Only consider users who have rated the movie
    valid_ratings = other_users_ratings[other_users_ratings.notnull()]
    
    # Get the similarities of the valid users
    valid_similarities = user_sim_scores[valid_ratings.index]
    
    # Compute the predicted rating using weighted sum of ratings
    if valid_similarities.sum() != 0:
        predicted_rating = np.dot(valid_similarities, valid_ratings) / valid_similarities.sum()
    else:
        predicted_rating = 0  # Return 0 if no valid similarities
    
    return predicted_rating

# Predicting the rating for user 1 for movie 104 (which user 1 hasn't rated)
predicted_rating = predict_ratings(user_id=1, movie_id=104)
print(f"\nPredicted rating for User 1 on Movie 104: {predicted_rating:.2f}")

# Step 5: Generate movie recommendations for a specific user
def recommend_movies(user_id, num_recommendations=3):
    # Get all movies that the user has not rated yet
    user_ratings = user_movie_matrix.loc[user_id]
    unrated_movies = user_ratings[user_ratings.isnull()].index
    
    # Predict ratings for all unrated movies
    movie_predictions = []
    for movie_id in unrated_movies:
        predicted_rating = predict_ratings(user_id, movie_id)
        movie_predictions.append((movie_id, predicted_rating))
    
    # Sort movies by predicted rating in descending order
    movie_predictions.sort(key=lambda x: x[1], reverse=True)
    
    # Return the top N recommended movies
    return movie_predictions[:num_recommendations]

# Generate top 3 movie recommendations for user 1
recommended_movies = recommend_movies(user_id=1, num_recommendations=3)
print(f"\nTop movie recommendations for User 1: {recommended_movies}")
    

Ratings DataFrame:
    user_id  movie_id  rating
0         1       101       5
1         1       102       4
2         1       103       3
3         2       101       4
4         2       102       5
5         2       104       3
6         3       101       3
7         3       103       5
8         3       104       4
9         4       102       5
10        4       103       4
11        4       104       2

User-Movie Rating Matrix:
movie_id  101  102  103  104
user_id                     
1         5.0  4.0  3.0  NaN
2         4.0  5.0  NaN  3.0
3         3.0  NaN  5.0  4.0
4         NaN  5.0  4.0  2.0

User Similarity Matrix:
user_id         1         2         3         4
user_id                                        
1        1.000000  0.800000  0.600000  0.674619
2        0.800000  1.000000  0.480000  0.653537
3        0.600000  0.480000  1.000000  0.590292
4        0.674619  0.653537  0.590292  1.000000

Predicted rating for User 1 on Movie 104: 2.96

Top movie recommendations fo