In [1]:
#importing neccessary libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [5]:
# Load datasets directly from uploaded files
def load_movie_data():
    movies = pd.read_csv("C:\\Users\\krush\\Downloads\\codetech_ml\\codetech task-4\\movies.csv")
    ratings = pd.read_csv("C:\\Users\\krush\\Downloads\\codetech_ml\\codetech task-4\\ratings.csv")
    return movies, ratings

In [7]:
# Create the user-item matrix from ratings
def create_user_movie_matrix(ratings):
    return ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [9]:
# Compute user similarity using cosine similarity
def train_collaborative_filter(user_movie_matrix):
    user_similarity = cosine_similarity(user_movie_matrix)
    return pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

In [11]:
# Generate top N recommendations for a given user
def get_recommendations(user_id, user_similarity, user_movie_matrix, n_recommendations=5):
    similar_users = user_similarity[user_id].sort_values(ascending=False)[1:6]
    user_ratings = user_movie_matrix.loc[user_id]
    unwatched_movies = user_ratings[user_ratings == 0].index
    
    recommendations = []
    for movie in unwatched_movies:
        pred_rating = 0
        sim_sum = 0
        for similar_user in similar_users.index:
            sim = similar_users[similar_user]
            rating = user_movie_matrix.loc[similar_user, movie]
            pred_rating += sim * rating
            sim_sum += sim
        pred_rating = pred_rating / sim_sum if sim_sum != 0 else 0
        recommendations.append((movie, pred_rating))
    
    return sorted(recommendations, key=lambda x: x[1], reverse=True)[:n_recommendations]


In [13]:
# Evaluate model using MSE, RMSE, and MAE
def evaluate_model(user_similarity, user_movie_matrix):
    mse_scores = []
    mae_scores = []
    rmse_scores = []
    
    for user in user_movie_matrix.index:
        actual = user_movie_matrix.loc[user]
        pred = np.zeros(len(actual))
        
        for i, movie_id in enumerate(actual.index):
            if actual.iloc[i] > 0:
                similar_users = user_similarity[user].sort_values(ascending=False)[1:6]
                pred_score = 0
                sim_sum = 0
                for sim_user in similar_users.index:
                    sim = similar_users[sim_user]
                    rating = user_movie_matrix.loc[sim_user, movie_id]
                    pred_score += sim * rating
                    sim_sum += sim
                pred[i] = pred_score / sim_sum if sim_sum != 0 else 0
                 # Filter out unrated items
                mask = actual > 0
                if mask.sum() > 0:
                    mse_scores.append(mean_squared_error(actual[mask], pred[mask]))
                    mae_scores.append(mean_absolute_error(actual[mask], pred[mask]))
                    rmse_scores.append(np.sqrt(mean_squared_error(actual[mask], pred[mask])))
                        
                return {
                            'MSE': np.mean(mse_scores),
                            'RMSE': np.mean(rmse_scores),
                            'MAE': np.mean(mae_scores)
                        }


In [15]:
# Main execution
if __name__ == "__main__":
    movies, ratings = load_movie_data()
    user_movie_matrix = create_user_movie_matrix(ratings)
    user_similarity = train_collaborative_filter(user_movie_matrix)

    sample_user_id = user_movie_matrix.index[0]
    recommendations = get_recommendations(sample_user_id, user_similarity, user_movie_matrix)

    metrics = evaluate_model(user_similarity, user_movie_matrix)

    #Evaluation Metrics
    print("Evaluation Metrics:")
    for metric, score in metrics.items():
        print(f"{metric}: {score:.4f}")
    #movie recommendations
    print("\nTop 5 Movie Recommendations:")
    for movie_id, score in recommendations:
        movie_title = movies[movies['movieId'] == movie_id]['title'].values[0]
        print(f"{movie_title}: {score:.2f}")
    

Evaluation Metrics:
MSE: 19.6479
RMSE: 4.4326
MAE: 4.3570

Top 5 Movie Recommendations:
Aliens (1986): 4.80
Hunt for Red October, The (1990): 4.30
Blade Runner (1982): 4.00
Terminator 2: Judgment Day (1991): 4.00
Die Hard (1988): 4.00
