In [1]:
import pandas as pd
import numpy as np

from surprise.dataset import Dataset
from surprise.reader import Reader
from surprise.prediction_algorithms import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [2]:
!pip uninstall -y numpy
!pip install numpy==1.26.4

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pytensor 2.36.3 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
rasterio 1.5.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.
jaxlib 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompat

In [3]:
data_dict = {
    "user_id": [1,1,1,2,2,2,3,3,3,4,4,4],
    "item_id": [101,102,103,101,102,104,101,103,104,102,103,104],
    "rating":  [5,4,3,4,5,2,2,5,4,4,3,5]
}

df = pd.DataFrame(data_dict)
df


Unnamed: 0,user_id,item_id,rating
0,1,101,5
1,1,102,4
2,1,103,3
3,2,101,4
4,2,102,5
5,2,104,2
6,3,101,2
7,3,103,5
8,3,104,4
9,4,102,4


In [4]:
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(
    df[['user_id', 'item_id', 'rating']],
    reader
)


In [5]:
trainset, testset = train_test_split(data, test_size=0.25, random_state=42)


In [6]:
model = SVD()
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7c846eb37ec0>

In [7]:
predictions = model.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)


RMSE: 1.3909
MAE:  1.1285


1.1284636357918851

In [8]:
def recommend(user_id, item_list, n=3):
    predictions = []
    for item in item_list:
        pred = model.predict(user_id, item)
        predictions.append((item, pred.est))

    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions[:n]


In [9]:
items = df['item_id'].unique()
recommend(1, items)


[(102, 4.182471098464654), (101, 4.1456340028206), (104, 3.994147379324673)]

The recommendation system predicts ratings for unseen userâ€“item pairs.
Items with the highest predicted ratings are recommended to the user.
Matrix factorization captures latent user preferences and item features.