In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

ratings_data = pd.read_csv("/content/drive/MyDrive/ratings.csv")


In [None]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357283 sha256=1ca51688dd59a262c6c6e8734e78ff49ba63c15c0fbf552cee60a15abcea0297
  Stored in directory: /root/.cach

In [None]:
from surprise import Dataset
from surprise import Reader

# Get minimum and maximum rating from the dataset
min_rating = ratings_data.rating.min()
max_rating = ratings_data.rating.max()

reader = Reader(rating_scale=(min_rating, max_rating))
data = Dataset.load_from_df(ratings_data[['userId', 'movieId', 'rating']], reader)

In [None]:
from surprise.model_selection import train_test_split
trainset, testset = train_test_split(data, test_size=0.25)
print(type(trainset))

<class 'surprise.trainset.Trainset'>


In [None]:
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import Reader



# Initialize the algorithm
algo = KNNWithMeans(k=50, sim_options={'name': 'cosine', 'user_based': True})

# Train the algorithm
algo.fit(trainset)

# Use the algorithm to make predictions
predictions = algo.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [None]:
for uid, iid, rating in testset[:10]:
    print(f"User {uid} rated item {iid} with a rating of {rating}")

User 426 rated item 318 with a rating of 3.5
User 608 rated item 2699 with a rating of 1.5
User 462 rated item 508 with a rating of 2.5
User 111 rated item 37384 with a rating of 5.0
User 610 rated item 100159 with a rating of 4.5
User 563 rated item 59333 with a rating of 3.0
User 370 rated item 5989 with a rating of 3.5
User 414 rated item 6374 with a rating of 2.5
User 287 rated item 4954 with a rating of 1.5
User 448 rated item 4564 with a rating of 3.0


In [None]:
for prediction in predictions[:10]:
    print(prediction)

user: 426        item: 318        r_ui = 3.50   est = 4.46   {'actual_k': 50, 'was_impossible': False}
user: 608        item: 2699       r_ui = 1.50   est = 2.45   {'actual_k': 37, 'was_impossible': False}
user: 462        item: 508        r_ui = 2.50   est = 3.61   {'actual_k': 50, 'was_impossible': False}
user: 111        item: 37384      r_ui = 5.00   est = 3.18   {'actual_k': 9, 'was_impossible': False}
user: 610        item: 100159     r_ui = 4.50   est = 3.50   {'was_impossible': True, 'reason': 'User and/or item is unknown.'}
user: 563        item: 59333      r_ui = 3.00   est = 2.52   {'actual_k': 2, 'was_impossible': False}
user: 370        item: 5989       r_ui = 3.50   est = 3.78   {'actual_k': 50, 'was_impossible': False}
user: 414        item: 6374       r_ui = 2.50   est = 2.41   {'actual_k': 2, 'was_impossible': False}
user: 287        item: 4954       r_ui = 1.50   est = 3.07   {'actual_k': 6, 'was_impossible': False}
user: 448        item: 4564       r_ui = 3.00   est 

In [None]:
predictions

[Prediction(uid=426, iid=318, r_ui=3.5, est=4.461255047468936, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid=608, iid=2699, r_ui=1.5, est=2.449181487399427, details={'actual_k': 37, 'was_impossible': False}),
 Prediction(uid=462, iid=508, r_ui=2.5, est=3.6064185569278155, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid=111, iid=37384, r_ui=5.0, est=3.175672570661468, details={'actual_k': 9, 'was_impossible': False}),
 Prediction(uid=610, iid=100159, r_ui=4.5, est=3.5012429423354092, details={'was_impossible': True, 'reason': 'User and/or item is unknown.'}),
 Prediction(uid=563, iid=59333, r_ui=3.0, est=2.5248936950817393, details={'actual_k': 2, 'was_impossible': False}),
 Prediction(uid=370, iid=5989, r_ui=3.5, est=3.781921181955255, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid=414, iid=6374, r_ui=2.5, est=2.413851986418083, details={'actual_k': 2, 'was_impossible': False}),
 Prediction(uid=287, iid=4954, r_ui=1.5, est=