<a href="https://colab.research.google.com/github/Abhinavmandve/Recommender_System_for_Movie_Ratings/blob/main/Recommender_System_for_Movie_Ratings_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import required libraries
!pip install scikit-surprise
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp39-cp39-linux_x86_64.whl size=3195818 sha256=534b821bda619a23ce3430eacd45a62a8e5b24f2fddda917b685469485bab459
  Stored in directory: /root/.cache/pip/wheels/c6/3a/46/9b17b3512bdf283c6cb84f59929cdd5199d4e754d596d22784
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [4]:
# Load the data from the MovieLens dataset
url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.data"
names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(url, sep='\t', names=names)

In [5]:
# Filter out users and movies with fewer ratings
min_movie_ratings = 50
min_user_ratings = 50
df = df.groupby('item_id').filter(lambda x: len(x) >= min_movie_ratings)
df = df.groupby('user_id').filter(lambda x: len(x) >= min_user_ratings)

In [6]:
# Create a reader object and load the data into Surprise dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], reader)

In [7]:
# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.25)

In [8]:
# Build a collaborative filtering model using SVD
model = SVD(n_factors=100, biased=True, random_state=42)

In [9]:
# Train the model using the train set
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f1664594730>

In [10]:
# Test the model using the test set
predictions = model.test(testset)

In [11]:
# Evaluate the model using RMSE and other metrics
accuracy.rmse(predictions)

RMSE: 0.9079


0.9078675233991291

#get_recommendation function

In [12]:
# Load the data from the MovieLens dataset
url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.data"
names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(url, sep='\t', names=names)

In [13]:
# Filter out users and movies with fewer ratings
min_movie_ratings = 50
min_user_ratings = 50
df = df.groupby('item_id').filter(lambda x: len(x) >= min_movie_ratings)
df = df.groupby('user_id').filter(lambda x: len(x) >= min_user_ratings)

# Create a reader object and load the data into Surprise dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], reader)

In [14]:
# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.25)

In [15]:
# Build a collaborative filtering model using SVD
model = SVD(n_factors=100, biased=True, random_state=42)

# Train the model using the train set
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f163b7d36d0>

In [16]:
# Get movie recommendations for a given user
def get_recommendations(user_id, num_recommendations=10):
    # Create a list of all movie IDs
    movie_ids = df['item_id'].unique().tolist()

    # Remove the movies that the user has already rated
    rated_movies = df.loc[df['user_id'] == user_id]['item_id'].tolist()
    unrated_movies = list(set(movie_ids) - set(rated_movies))

    # Randomly select a subset of unrated movies for efficiency
    random.shuffle(unrated_movies)
    unrated_movies = unrated_movies[:500]

    # Create a list of (movie ID, predicted rating) tuples
    predictions = []
    for movie_id in unrated_movies:
        rating = model.predict(user_id, movie_id).est
        predictions.append((movie_id, rating))

    # Sort the list by predicted rating in descending order
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Return the top N recommendations
    return predictions[:num_recommendations]

In [18]:
import random

# Example usage: Get 10 movie recommendations for user 42
recommendations = get_recommendations(42, num_recommendations=10)
for movie_id, rating in recommendations:
    print(f"Movie ID: {movie_id}, Predicted Rating: {rating}")

Movie ID: 22, Predicted Rating: 4.793605910148957
Movie ID: 114, Predicted Rating: 4.769533659464468
Movie ID: 408, Predicted Rating: 4.720281438899912
Movie ID: 169, Predicted Rating: 4.631806221793472
Movie ID: 302, Predicted Rating: 4.53592171240483
Movie ID: 513, Predicted Rating: 4.394414194649806
Movie ID: 527, Predicted Rating: 4.390770206731454
Movie ID: 257, Predicted Rating: 4.322371605953737
Movie ID: 166, Predicted Rating: 4.314432548385652
Movie ID: 285, Predicted Rating: 4.306156373374457
