<a href="https://colab.research.google.com/github/Dimple155/MyTasks/blob/main/Bharat_Intern_Task1_Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
!pip install scikit-surprise



In [21]:
#required libraries
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import KNNBasic
from surprise import SVD
from surprise import accuracy
import pandas as pd

In [22]:
# Define the Reader
reader = Reader(line_format='user item rating timestamp', sep='\t')

# Load the MovieLens dataset using the defined Reader
data = Dataset.load_builtin('ml-100k')

# Convert the Surprise dataset to a Pandas DataFrame
df = pd.DataFrame(data.raw_ratings, columns=['user_id', 'item_id', 'rating', 'timestamp'])

# Display the head of the DataFrame
print(df.head())

  user_id item_id  rating  timestamp
0     196     242     3.0  881250949
1     186     302     3.0  891717742
2      22     377     1.0  878887116
3     244      51     2.0  880606923
4     166     346     1.0  886397596


In [23]:
# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

In [24]:
from surprise import get_dataset_dir
import os

# Function to load movie titles
def load_movie_titles():
    file_path = os.path.join(get_dataset_dir(), 'ml-100k/ml-100k/u.item')
    movie_titles = {}
    with open(file_path, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            parts = line.split('|')
            movie_id = parts[0]
            title = parts[1]
            movie_titles[movie_id] = title
    return movie_titles

In [25]:
# Function to get top-N recommendations with movie titles
def get_top_n_recommendations_with_titles(model, testset, movie_titles, user_id, n=5):
    user_ratings = [item for item in testset if item[0] == user_id]
    predictions = model.test(user_ratings)
    top_n = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]
    top_n_with_titles = [(movie_titles[str(recommendation.iid)], recommendation.est) for recommendation in top_n]
    return top_n_with_titles


In [26]:
# Load movie titles
movie_titles = load_movie_titles()

# Choose a user for whom you want to get recommendations
user_id_to_recommend = '1'#int(input("Enter User ID:"))

In [27]:
# Collaborative Filtering with kNN
sim_options = {'name': 'cosine', 'user_based': True}
knn_model = KNNBasic(sim_options=sim_options)
knn_model.fit(trainset)
knn_predictions = knn_model.test(testset)
knn_rmse = accuracy.rmse(knn_predictions)
print(f'kNN RMSE: {knn_rmse}')

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 1.0172
kNN RMSE: 1.0171617833582838


In [28]:
# Get top-N recommendations for the chosen user using kNN model
knn_top_n_recommendations = get_top_n_recommendations_with_titles(knn_model, testset, movie_titles, user_id_to_recommend)

# Print recommendations with movie titles
print(f'Top-5 Recommendations for User {user_id_to_recommend} using kNN:')
for title, estimated_rating in knn_top_n_recommendations:
    print(f'Movie Title: {title}, Estimated Rating: {estimated_rating}')


Top-5 Recommendations for User 1 using kNN:
Movie Title: Cinema Paradiso (1988), Estimated Rating: 4.526774715351243
Movie Title: Taxi Driver (1976), Estimated Rating: 4.448622922785968
Movie Title: Room with a View, A (1986), Estimated Rating: 4.299695585181687
Movie Title: Graduate, The (1967), Estimated Rating: 4.251122227579801
Movie Title: Mr. Smith Goes to Washington (1939), Estimated Rating: 4.224858250118751


In [29]:
# Collaborative Filtering with SVD
svd_model = SVD()
svd_model.fit(trainset)
svd_predictions = svd_model.test(testset)
svd_rmse = accuracy.rmse(svd_predictions)
print(f'SVD RMSE: {svd_rmse}')

RMSE: 0.9432
SVD RMSE: 0.9432233724171681


In [30]:
# Get top-N recommendations for the chosen user using SVD model
svd_top_n_recommendations = get_top_n_recommendations_with_titles(svd_model, testset, movie_titles, user_id_to_recommend)

# Print recommendations with movie titles
print(f'\nTop-5 Recommendations for User {user_id_to_recommend} using SVD:')
for title, estimated_rating in svd_top_n_recommendations:
    print(f'Movie Title: {title}, Estimated Rating: {estimated_rating}')


Top-5 Recommendations for User 1 using SVD:
Movie Title: Cinema Paradiso (1988), Estimated Rating: 4.4598245662031015
Movie Title: Belle de jour (1967), Estimated Rating: 4.364476245813141
Movie Title: Clerks (1994), Estimated Rating: 4.289420631041578
Movie Title: Mr. Smith Goes to Washington (1939), Estimated Rating: 4.24068634192067
Movie Title: Alien (1979), Estimated Rating: 4.1972717467370515
