<a href="https://colab.research.google.com/github/IJ-Jany/dmproject1/blob/main/project1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#PROJECT 1

# 1. Setup and Data Load
import pandas as pd
import numpy as np

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load movie ratings and movie titles
ratings_df = pd.read_csv("/content/drive/MyDrive/Data mining/ratings.csv")
movies_df = pd.read_csv("/content/drive/MyDrive/Data mining/movies (1).csv")

# 2. Create User-Movie Matrix
user_movie_matrix = ratings_df.pivot_table(index="userId", columns="movieId", values="rating", fill_value=0)

# 3. Compute Movie Similarities
movie_similarity_matrix = user_movie_matrix.corr(method="pearson")

# Q1: Recommend similar movies to a given movie (ID=3)
def recommend_similar_movies(movie_id, top_n=5):
    if movie_id not in movie_similarity_matrix.columns:
        return f"Movie ID {movie_id} not found in the similarity matrix."

    similarity_scores = movie_similarity_matrix[movie_id].dropna().sort_values(ascending=False)
    top_similar = similarity_scores.iloc[1:top_n+1]  # Skip the movie itself

    top_movies = movies_df[movies_df["movieId"].isin(top_similar.index)].copy()
    top_movies["similarity"] = top_movies["movieId"].map(top_similar)

    return top_movies.sort_values(by="similarity", ascending=False).reset_index(drop=True)

# Run recommendation for movie ID 3
print("\nQ1: Top 5 Movies Similar to Movie ID 3:")
print(recommend_similar_movies(movie_id=3, top_n=5))

# Q2: Get all ratings from a specific user
selected_user_id = 47
user_rated_movies = ratings_df[ratings_df["userId"] == selected_user_id]

print(f"\nQ2: Movies rated by User {selected_user_id}:")
print(user_rated_movies)

# Q3: Identify the movie that user rated the highest
if not user_rated_movies.empty:
    top_rated_movie = user_rated_movies.loc[user_rated_movies["rating"].idxmax()]
    print(f"\nQ3: Highest Rated Movie by User {selected_user_id}:")
    print(top_rated_movie)
else:
    print(f"\nQ3: No ratings found for User {selected_user_id}.")

# Q4: List movies that user has NOT rated yet
rated_movie_ids = set(user_rated_movies["movieId"])
all_movie_ids = set(movies_df["movieId"])
unrated_movie_ids = all_movie_ids - rated_movie_ids

unrated_movies_df = movies_df[movies_df["movieId"].isin(unrated_movie_ids)]

print(f"\nQ4: Sample of Movies NOT Rated by User {selected_user_id}:")
print(unrated_movies_df.head())

# Q5: Recommend highest average-rated movies not seen by the user
def recommend_top_avg_movies(user_id, top_n=5):
    rated_ids = set(ratings_df[ratings_df["userId"] == user_id]["movieId"])
    unseen_ids = set(movies_df["movieId"]) - rated_ids

    avg_ratings = ratings_df.groupby("movieId")["rating"].mean()
    unseen_movies = movies_df[movies_df["movieId"].isin(unseen_ids)].copy()
    unseen_movies = unseen_movies.merge(avg_ratings, on="movieId")

    return unseen_movies.sort_values(by="rating", ascending=False).head(top_n)[["movieId", "title", "rating"]]

print(f"\nQ5: Top 5 Movie Recommendations for User {selected_user_id} Based on Avg Ratings:")
print(recommend_top_avg_movies(user_id=selected_user_id, top_n=5))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Q1: Top 5 Movies Similar to Movie ID 3:
   movieId                        title                         genres  \
0     3450        Grumpy Old Men (1993)                         Comedy   
1      762            Striptease (1996)                   Comedy|Crime   
2      788  Nutty Professor, The (1996)  Comedy|Fantasy|Romance|Sci-Fi   
3      542            Son in Law (1993)           Comedy|Drama|Romance   
4       65              Bio-Dome (1996)                         Comedy   

   similarity  
0    0.415447  
1    0.408148  
2    0.388505  
3    0.378602  
4    0.371938  

Q2: Movies rated by User 47:
      userId  movieId  rating   timestamp
6918      47       31     3.0  1496205717
6919      47       47     3.0  1496209354
6920      47       62     3.0  1496205312
6921      47      141     2.0  1496208714
6922      47      147     3.5  1496205843
...    