In [2]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from math import sqrt

# Step 2: Load Dataset
# Update the file paths to the location on your machine
ratings_path = "C:/Users/harsh/OneDrive/Desktop/Projects/Internship Projects/SkillBit/implementation/ml-100k/ml-100k/u.data"
movies_path = "C:/Users/harsh/OneDrive/Desktop/Projects/Internship Projects/SkillBit/implementation/ml-100k/ml-100k/u.item"

# Read ratings data
ratings = pd.read_csv(ratings_path, sep="\t", names=["user_id", "movie_id", "rating", "timestamp"])
# Read movies data
movies = pd.read_csv(movies_path, sep="|", encoding="latin-1", names=["movie_id", "title"], usecols=[0, 1])

# Merge datasets
data = pd.merge(ratings, movies, on="movie_id")

# Step 3: Create User-Item Matrix
user_item_matrix = data.pivot_table(index='user_id', columns='title', values='rating')

# Fill missing values with 0 (for simplicity)
user_item_matrix = user_item_matrix.fillna(0)

# Step 4: Calculate Similarity
# Cosine Similarity between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Step 5: Prediction Function
def predict_ratings(user_id):
    user_ratings = user_item_matrix.loc[user_id]
    similar_users = user_similarity_df[user_id]
    
    # Weighted sum of ratings
    weighted_ratings = np.dot(similar_users, user_item_matrix.fillna(0))
    sum_of_weights = np.abs(similar_users).sum()
    
    predicted_ratings = weighted_ratings / sum_of_weights
    return predicted_ratings

# Step 6: Get Recommendations
def recommend_movies(user_id, num_recommendations=5):
    predicted_ratings = predict_ratings(user_id)
    movie_recommendations = pd.Series(predicted_ratings, index=user_item_matrix.columns)
    
    # Exclude already rated movies
    rated_movies = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index
    movie_recommendations = movie_recommendations.drop(index=rated_movies)
    
    # Get top recommendations
    top_recommendations = movie_recommendations.nlargest(num_recommendations)
    
    # Format output
    recommendations = pd.DataFrame({
        'Movie Title': top_recommendations.index,
        'Predicted Rating': top_recommendations.values
    })
    return recommendations


# Example: Recommend 5 movies for user with ID 1
user_id = 1  # Specify the user ID
num_recommendations = 5  # Number of recommendations to display

recommendations = recommend_movies(user_id, num_recommendations)
print("Top Movie Recommendations for User", user_id)
print(recommendations)



Top Movie Recommendations for User 1
                              Movie Title  Predicted Rating
0                 Schindler's List (1993)          2.037720
1       E.T. the Extra-Terrestrial (1982)          1.873322
2  One Flew Over the Cuckoo's Nest (1975)          1.794487
3             English Patient, The (1996)          1.742308
4                           Scream (1996)          1.694651
