<a href="https://colab.research.google.com/github/Mofidulhaque/DataMiningLab/blob/main/Nibir20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np

In [2]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Load datasets
ratings_data = pd.read_csv("/content/drive/MyDrive/heart/ratings.csv")
movies_data = pd.read_csv("/content/drive/MyDrive/heart/movies.csv")

In [4]:

# Display raw data
ratings_data.head()
movies_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:

# Step 1: Create movie-to-movie similarity matrix
user_movie_pivot = ratings_data.pivot_table(index='userId', columns='movieId', values='rating')
similarity_matrix = user_movie_pivot.corr(method='pearson')  # Using Pearson correlation
similarity_matrix.head()


movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.330978,0.487109,1.0,0.310971,0.106465,0.208402,0.968246,0.095913,-0.021409,...,,,,,,,,,,
2,0.330978,1.0,0.419564,,0.562791,0.16351,0.430261,0.415227,0.27735,0.016626,...,,,,,,,,,,
3,0.487109,0.419564,1.0,,0.602266,0.345069,0.554088,0.333333,0.458591,-0.050276,...,,,,,,,,,,
4,1.0,,,1.0,0.654654,,0.203653,,,0.870388,...,,,,,,,,,,
5,0.310971,0.562791,0.602266,0.654654,1.0,0.291302,0.609119,0.555556,0.319173,0.218263,...,,,,,,,,,,


In [6]:

# Step 2: Movie recommendation based on a given movie
def get_similar_movies(target_movie_id, num_recommendations=5):
    if target_movie_id not in similarity_matrix:
        return "Selected movie not found in the dataset."

    similarity_scores = similarity_matrix[target_movie_id].dropna()
    top_matches = similarity_scores.sort_values(ascending=False)[1:num_recommendations+1]

    top_movies = movies_data[movies_data["movieId"].isin(top_matches.index)][["movieId", "title"]]
    return top_movies

In [7]:

# Example: Recommend movies similar to movieId = 1
top_recommendations = get_similar_movies(1, num_recommendations=5)
top_recommendations

Unnamed: 0,movieId,title
1467,1992,Child's Play 2 (1990)
2648,3545,Cabaret (1972)
4158,5988,Quicksilver (1986)
4190,6041,Amen. (2002)
6141,43919,Date Movie (2006)


In [8]:

selected_user = int(input("Enter your user ID: "))
user_rated = ratings_data[ratings_data['userId'] == selected_user]
user_rated.head()

Enter your user ID: 20


Unnamed: 0,userId,movieId,rating,timestamp
2977,20,2,3.0,1054038313
2978,20,8,1.0,1054038422
2979,20,13,4.0,1054038425
2980,20,34,4.0,1054038093
2981,20,48,5.0,1054038357


In [9]:

# Step 3: Find the highest-rated movie by the user
fav_movie = user_rated.loc[user_rated['rating'].idxmax()]
fav_movie

Unnamed: 0,2981
userId,20.0
movieId,48.0
rating,5.0
timestamp,1054038000.0


In [10]:

# Step 4: Identify movies not rated by the user
all_movie_ids = set(movies_data["movieId"])
rated_by_user = set(user_rated["movieId"])
not_rated_yet = all_movie_ids - rated_by_user

unseen_movies = movies_data[movies_data["movieId"].isin(not_rated_yet)]
unseen_movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller


In [11]:
# Step 5: Recommend movies not rated by the user, sorted by average rating
def recommend_unseen_top_movies(user_id, num_movies=5):
    user_history = ratings_data[ratings_data["userId"] == user_id]
    movies_rated = set(user_history["movieId"])
    full_list = set(movies_data["movieId"])
    movies_left = full_list - movies_rated

    candidate_movies = movies_data[movies_data["movieId"].isin(movies_left)]

    avg_movie_scores = ratings_data.groupby("movieId")["rating"].mean()

    final_recommendations = candidate_movies.merge(
        avg_movie_scores, on="movieId", how="left"
    ).sort_values(by="rating", ascending=False).head(num_movies)

    return final_recommendations[["movieId", "title", "rating"]]

In [12]:

user_id = int(input("Enter your user ID: "))
final_suggestions = recommend_unseen_top_movies(user_id=user_id, num_movies=10)
final_suggestions

Enter your user ID: 20


Unnamed: 0,movieId,title,rating
6168,50999,"Ugly Duckling and Me!, The (2006)",5.0
6855,70451,Max Manus (2008),5.0
6838,69860,Eichmann (2007),5.0
6817,69469,Garfield's Pet Force (2009),5.0
7675,95311,Presto (2008),5.0
7666,95175,Front of the Class (2008),5.0
9326,173963,Empties (2007),5.0
9321,173619,Fugitives (1986),5.0
9319,173355,Travels of an Ant (1983),5.0
9392,179135,Blue Planet II (2017),5.0
