In [None]:
!pip uninstall -y numpy
!pip install numpy==1.26.4
!pip install --upgrade scikit-surprise

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy
import requests
import io
import os
import zipfile


In [None]:
def download_and_extract_movielens():
    if not os.path.exists('ml-100k'):
        print("Downloading MovieLens 100K dataset...")
        url = "https://files.grouplens.org/datasets/movielens/ml-100k.zip"
        r = requests.get(url)
        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall()
        print("Movielens 100K dataset downloaded and extracted successfully.")
    else:
        print("The dataset already exists. Download skipped.")

In [None]:
download_and_extract_movielens()

ratings_df = pd.read_csv('ml-100k/u.data', sep='\t',
                       names=['user_id', 'item_id', 'rating', 'timestamp'])

print(f"Dataset shape: {ratings_df.shape}")
print(f"Number of unique users: {ratings_df['user_id'].nunique()}")
print(f"Number of unique movies: {ratings_df['item_id'].nunique()}")
print(f"Range of ratings: {ratings_df['rating'].min()} to {ratings_df['rating'].max()}")

Downloading MovieLens 100K dataset...
Movielens 100K dataset downloaded and extracted successfully.
Dataset shape: (100000, 4)
Number of unique users: 943
Number of unique movies: 1682
Range of ratings: 1 to 5


In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['user_id', 'item_id', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
model = SVD(n_factors=20, lr_all=0.01, reg_all=0.01, n_epochs=20, random_state=42)
model.fit(trainset)

predictions = model.test(testset)
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

print(f"Test RMSE: {rmse:.4f}")
print(f"Test MAE: {mae:.4f}")

RMSE: 0.9576
MAE:  0.7455
Test RMSE: 0.9576
Test MAE: 0.7455


#Predicting movie ratings for a user

In [None]:
def get_movie_names():
    movies_df = pd.read_csv('ml-100k/u.item', sep='|', encoding='latin-1',
                          header=None, usecols=[0, 1],
                          names=['item_id', 'title'])
    return movies_df

In [None]:
def recommend_movies(user_id, n=10):
    # List of all movies
    movies_df = get_movie_names()
    all_movies = movies_df['item_id'].unique()

    # Movies already rated by the user
    rated_movies = ratings_df[ratings_df['user_id'] == user_id]['item_id'].values

    # Movies not yet rated by the user
    unrated_movies = np.setdiff1d(all_movies, rated_movies)

    # Predicting ratings on unseen movies, by using the trained SVD model
    predictions = []
    for item_id in unrated_movies:
        predicted_rating = model.predict(user_id, item_id).est
        predictions.append((item_id, predicted_rating))

    # Rank predictions by estimated rating
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Get top N recommendations
    top_recommendations = predictions[:n]

    # Fetch movie titles associated with top N recommendations
    recommendations = pd.DataFrame(top_recommendations, columns=['item_id', 'predicted_rating'])
    recommendations = recommendations.merge(movies_df, on='item_id')

    return recommendations

In [None]:
user_id = 41
recommendations = recommend_movies(user_id, n=10)

print(f"\nTop 10 recommended movies for user {user_id}:")
print(recommendations[['title', 'predicted_rating']])


Top 10 recommended movies for user 41:
                                               title  predicted_rating
0                              Close Shave, A (1995)          4.642751
1                         Usual Suspects, The (1995)          4.630781
2                          North by Northwest (1959)          4.507565
3                             Full Monty, The (1997)          4.476879
4  Wallace & Gromit: The Best of Aardman Animatio...          4.466863
5                              Third Man, The (1949)          4.399962
6                Once Upon a Time in the West (1969)          4.381188
7                                Sunset Blvd. (1950)          4.339992
8                                  Persuasion (1995)          4.338851
9                              Paths of Glory (1957)          4.328396
