In [4]:
# 🧠 Task 4: Movie Recommendation System using SVD (MovieLens 100k)

# 🛠️ Fix NumPy version compatibility for scikit-surprise
!pip install numpy==1.23.5
!pip install scikit-surprise --no-binary :all:

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2469550 sha256=a55ebad8afb326db270ad081f122d84551f899d047bdfb90c7d35685b4316357
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [5]:
# 🔄 Restart runtime manually after running the above 2 cells
# Then run the rest of this code ⬇️

In [6]:
# ✅ Imports (run after restart)
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy
import pandas as pd
from collections import defaultdict

In [7]:
# Load built-in MovieLens 100k dataset
data = Dataset.load_builtin('ml-100k')

Dataset ml-100k could not be found. Do you want to download it? [Y/n] Y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k


In [8]:
# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [9]:
# Use SVD algorithm
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7ab5babf3010>

In [10]:
# Predict on test data
predictions = model.test(testset)

In [11]:
# Evaluate model
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

RMSE: 0.9351
MAE:  0.7366


In [12]:
# Cross-validation scores
cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9373  0.9292  0.9358  0.9395  0.9327  0.9349  0.0036  
MAE (testset)     0.7373  0.7304  0.7373  0.7429  0.7366  0.7369  0.0040  
Fit time          1.12    1.46    1.46    1.14    1.12    1.26    0.17    
Test time         0.21    0.18    0.11    0.26    0.12    0.18    0.06    


{'test_rmse': array([0.93727208, 0.92921718, 0.93580162, 0.93953175, 0.93271871]),
 'test_mae': array([0.73728495, 0.73035889, 0.73725511, 0.74289437, 0.73659386]),
 'fit_time': (1.120807409286499,
  1.4640130996704102,
  1.4642961025238037,
  1.1360220909118652,
  1.1243417263031006),
 'test_time': (0.2103714942932129,
  0.17505431175231934,
  0.11185789108276367,
  0.2628357410430908,
  0.12025046348571777)}

In [13]:
# Recommend top 5 movies for a given user
def get_top_n(predictions, n=5):
    from collections import defaultdict
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n_recommendations = get_top_n(predictions, n=5)


In [14]:
# Print top 5 movie IDs recommended for user '196'
print("Top 5 movie IDs recommended for user 196:")
for movie_id, rating in top_n_recommendations['196']:
    print(f"Movie ID: {movie_id}, Predicted Rating: {rating:.2f}")

Top 5 movie IDs recommended for user 196:
Movie ID: 173, Predicted Rating: 4.35
Movie ID: 153, Predicted Rating: 4.30
Movie ID: 286, Predicted Rating: 4.06
Movie ID: 116, Predicted Rating: 3.95
Movie ID: 70, Predicted Rating: 3.67
