# Lab-7 - Collaborative filtering

## Before you start

- Notebook tasks can be done individually or in a group of two
- Please save notebooks with outputs filled in - this will speed up the checking process
- Send notebooks with solutions via email:
  - To: michal.wojcik@doctorate.put.poznan.pl
  - Subject format example: [IR] Lab 7 - Jan Kowalski 123456, Anna Nowak 789012
  - Attach: notebook file
- **Deadline** - 14 days after the class
- All of the tasks require implementation - complete the code
- The number of points for each task is next to the command


## Dataset

Data source: https://grouplens.org/datasets/movielens/ (File [ml-100k.zip](https://files.grouplens.org/datasets/movielens/ml-100k.zip))

- 943 users
- 1682 items
- 100000 ratings (1-5 scale)

# Imports

#### *Surprise* package

"Surprise is a Python scikit for building and analyzing recommender systems that deal with explicit rating data."

The package will be used in several places. Link to the documentation: https://surpriselib.com/

In [1]:
%pip install surprise

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from surprise import AlgoBase, SlopeOne, KNNBasic, NormalPredictor, SVD
from surprise import Dataset
from surprise import Reader
from surprise import accuracy

pd.options.display.max_columns = 50

# Read data

In [3]:
train_set_path = 'data/ua.base'
test_set_path = 'data/ua.test'
sets_column_names = ['User_ID', 'Item_ID', 'Rating', 'Timestamp']

items_path = 'data/u.item'
items_column_names = [
    'Item_ID', 'Movie_Title', 'Release_Date', 'Video_Release_Date', 
    'IMDb_URL', 'Unknown', 'Action', 'Adventure', 'Animation', 
    'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 
    'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 
    'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
]

users_path = 'data/u.user'
users_column_names=['User_ID', 'Age', 'Gender', 'Occupation', 'Zip_Code']

train_set = pd.read_csv(train_set_path, sep='\t', names=sets_column_names, encoding = "ISO-8859-1")
display(train_set)

test_set = pd.read_csv(test_set_path, sep='\t', names=sets_column_names, encoding = "ISO-8859-1")
display(test_set)

items = pd.read_csv(items_path, sep='|', names=items_column_names, encoding = "ISO-8859-1")
display(items)

users = pd.read_csv(users_path, sep='|', names=users_column_names, encoding = "ISO-8859-1")
display(users)

Unnamed: 0,User_ID,Item_ID,Rating,Timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712
...,...,...,...,...
90565,943,1047,2,875502146
90566,943,1074,4,888640250
90567,943,1188,3,888640250
90568,943,1228,3,888640275


Unnamed: 0,User_ID,Item_ID,Rating,Timestamp
0,1,20,4,887431883
1,1,33,4,878542699
2,1,61,4,878542420
3,1,117,3,874965739
4,1,155,2,878542201
...,...,...,...,...
9425,943,232,4,888639867
9426,943,356,4,888639598
9427,943,570,1,888640125
9428,943,808,4,888639868


Unnamed: 0,Item_ID,Movie_Title,Release_Date,Video_Release_Date,IMDb_URL,Unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,1678,Mat' i syn (1997),06-Feb-1998,,http://us.imdb.com/M/title-exact?Mat%27+i+syn+...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1678,1679,B. Monkey (1998),06-Feb-1998,,http://us.imdb.com/M/title-exact?B%2E+Monkey+(...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
1679,1680,Sliding Doors (1998),01-Jan-1998,,http://us.imdb.com/Title?Sliding+Doors+(1998),0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
1680,1681,You So Crazy (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?You%20So%20Cr...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,User_ID,Age,Gender,Occupation,Zip_Code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213
...,...,...,...,...,...
938,939,26,F,student,33319
939,940,32,M,administrator,02215
940,941,20,M,student,97229
941,942,48,F,librarian,78209


In [4]:
users_agg = train_set[['User_ID', 'Rating']].groupby('User_ID').agg(['mean', 'count'])
display(users_agg.sort_values([('Rating', 'mean')]))
display(users_agg.sort_values([('Rating', 'count')]))

Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
User_ID,Unnamed: 1_level_2,Unnamed: 2_level_2
181,1.489412,425
405,1.839065,727
445,2.000000,125
774,2.074766,214
685,2.100000,10
...,...,...
583,4.647059,17
225,4.647059,17
507,4.708333,48
849,4.846154,13


Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
User_ID,Unnamed: 1_level_2,Unnamed: 2_level_2
202,2.700000,10
441,3.700000,10
685,2.100000,10
34,3.800000,10
36,4.000000,10
...,...,...
276,3.474409,508
450,3.860377,530
13,3.089457,626
655,2.909630,675


In [5]:
items_agg = train_set[['Item_ID', 'Rating']].groupby('Item_ID').agg(['mean', 'count'])
display(items_agg.sort_values([('Rating', 'mean')]))
display(items_agg.sort_values([('Rating', 'count')]))

Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
Item_ID,Unnamed: 1_level_2,Unnamed: 2_level_2
1486,1.0,1
1548,1.0,1
1343,1.0,1
830,1.0,1
1617,1.0,1
...,...,...
1656,5.0,1
1189,5.0,3
1122,5.0,1
1293,5.0,2


Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
Item_ID,Unnamed: 1_level_2,Unnamed: 2_level_2
1682,3.000000,1
1571,1.000000,1
1570,1.000000,1
1569,1.000000,1
1568,1.000000,1
...,...,...
286,3.692500,400
258,3.791262,412
181,4.011390,439
100,4.148984,443


# Helpers

In [6]:
def print_true_value(dataset, user_id, item_id):
    true_df = dataset[(dataset['User_ID'] == user_id) & (dataset['Item_ID'] == item_id)]
    if len(true_df):
        print("TRUE VALUE:", true_df.iloc[0]['Rating'])
    else:
        print("TRUE VALUE: UNKNOWN")

# Models

### Task 0 [3p]

- Convert dataframes (*train_set* and *test_set*) to a form that will allow you to perform *fit* and *predict* on models from the *Surprise* package **[2p]**
- Test the solution on a model that performs random predictions for the user $289$ and item $815$ **[1p]**

In [7]:
# TODO - load dataframes to Surprise framework train_set and test_set form [2p]

# Hint:
# Reader - https://surprise.readthedocs.io/en/stable/reader.html?highlight=reader#surprise.reader.Reader
# Dataset - https://surprise.readthedocs.io/en/stable/dataset.html?highlight=Dataset#surprise.dataset.Dataset
# load_from_df - https://surprise.readthedocs.io/en/stable/dataset.html?highlight=build_full_trainset#surprise.dataset.Dataset.load_from_df
# build_full_trainset - https://surprise.readthedocs.io/en/stable/dataset.html?highlight=build_full_trainset#surprise.dataset.DatasetAutoFolds.build_full_trainset
# build_testset - https://surprise.readthedocs.io/en/stable/trainset.html#surprise.Trainset.build_testset

reader = Reader(rating_scale=(1, 5))

train_data = Dataset.load_from_df(train_set[['User_ID', 'Item_ID', 'Rating']], reader)
train_set_surprise = train_data.build_full_trainset()

test_data = Dataset.load_from_df(test_set[['User_ID', 'Item_ID', 'Rating']], reader)
test_set_surprise = test_data.build_full_trainset().build_testset()


In [8]:
# TODO - test the solution with the random model [1p]

# Hint:
# NormalPredictor - https://surprise.readthedocs.io/en/stable/basic_algorithms.html#surprise.prediction_algorithms.random_pred.NormalPredictor

user_id = 289
item_id = 815

model = NormalPredictor()
model.fit(train_set_surprise)

predictions = model.test(test_set_surprise)
print("NORMAL PREDICTOR")
print_true_value(test_set, user_id, item_id)
print("PREDICTED VALUE:", predictions[0].est)
print("RMSE:", accuracy.rmse(predictions))


NORMAL PREDICTOR
TRUE VALUE: 3
PREDICTED VALUE: 5
RMSE: 1.5146
RMSE: 1.514627608097809


#### Symbols

- $r_{ui}$ - Rating given by the user $u$ to item $i$
- $\hat{r}_{ui}$ - Prediction of the rating given by the user $u$ to item $i$
- $\mu_u$ - Average rating provided by the user $u$ to all of the rated items
- $R_i(u)$ - Set of items that was rated by $u$ and was rated by at least one other user which also rated item $i$
- $U_{i}$ - A set of users who rated item $i$
- $U_{ij}$ - A set of users who rated both item $i$ and $j$
- $I_{u}$ - A set of items which was rated by $u$
- $I_{uv}$ - A set of items which was rated both by $u$ and $v$
- $\text{dev}(i, j)$ - Average difference in ratings between items $i$ and $j$

## 1) Average rating per Item

**General idea:**

The user $u$ will rate the item $i$ according to the average rating for $i$ provided by other users:

$$
\hat{r}_{ui} = \frac{\sum\limits_{v \in U_{i}} r_{vi}}{|U_{i}|}
$$

### Task 1 [4p]

- Implement the code that executes the above algorithm **[3p]**
  - Use *train_set* for training and predict the *test_set* ratings
- Calculate the root mean squared error (RMSE) on the *test_set* predictions **[1p]**

In [9]:
def item_by_average_rating_by_other_users(user_id, item_id):
    item_df = train_set[(train_set['Item_ID'] == item_id) & (train_set['User_ID'] != user_id)]
    if len(item_df):
        return item_df['Rating'].mean()
    else:
        return train_set['Rating'].mean()
    
# measure 
predictions = []
for _, row in test_set.iterrows():
    predictions.append(item_by_average_rating_by_other_users(row['User_ID'], row['Item_ID']))

# check the RMSE value
print("NORMAL PREDICTOR RMSE:", np.sqrt(mean_squared_error(test_set['Rating'], predictions)))




NORMAL PREDICTOR RMSE: 1.0417647969439812


## 2) Slope One

**General idea:**

In order to predict the user $u$ rating for the item $i$, check how any other item ($j$) was rated by $u$ and what was the average difference between the ratings of items $i$ and $j$ among users who rated both of them.

To generalize the above approach to all users and items that are available in the dataset, and then aggregate the estimates into one prediction, it is worth using the following formulas.

$$
\hat{r}_{ui} = \mu_u + \frac{1}{|R_i(u)|}\sum\limits_{j \in R_i(u)} \text{dev}(i, j)
$$

$$
\text{dev}(i, j) = \frac{1}{|U_{ij}|}\sum\limits_{u \in U_{ij}} r_{ui} - r_{uj}
$$

**Algorithm:**

To estimate unknown $\hat{r}_{ui}$:

- Find all items that was rated by the user $u$
- For each of item ($j$):
  - find all of the users ($U_{ij}$) that rated both items ($i$ and $j$)
  - if $U_{ij} \neq \varnothing$, then calculate average difference ($\text{dev}(i, j)$) between ratings for item $i$ and $j$
- calculate average of each $\text{dev}(i, j)$ and add it to the average rating for the user $u$ ($\mu_u$) to get the final estimation

**Note:** This approach differs from the one presented on the lecture - here we use the arithmetic mean instead of the weighted one.

Details: https://surprise.readthedocs.io/en/stable/slope_one.html#surprise.prediction_algorithms.slope_one.SlopeOne

### Task 2 [7p]

- Implement a function that will allow you to make a prediction for a single user-item pair, based on available ratings, on the basis of a data set **[4p]**
- Predict the user $70$ rating prediction for the item $50$ **[1p]**
    - Use data from train_set for prediction
    - Print the actual rating value from test_set for the mentioned user-item pair
- Verify the result with the *Surprise* package model **[1p]**
- Calculate RMSE for the *test_set* using the *Surprise* package **[1p]**

In [10]:
def slope_one(dataset, user_id, item_id):
    # Filter out ratings by the current user
    user_ratings = dataset[dataset['User_ID'] == user_id]

    # Initialize a list to store individual predictions
    sub_pred = []

    for _, row in user_ratings.iterrows():
        # Find users who rated both the current item and the target item
        users_who_rated_both = dataset[(dataset['Item_ID'] == row['Item_ID']) & (dataset['User_ID'] != user_id)]
        target_ratings = dataset[(dataset['Item_ID'] == item_id) & (dataset['User_ID'].isin(users_who_rated_both['User_ID']))]

        # Calculate the deviation for users who rated both items
        if not target_ratings.empty and not users_who_rated_both.empty:
            deviation = (target_ratings['Rating'] - users_who_rated_both['Rating']).mean()
            if not np.isnan(deviation):
                sub_pred.append(row['Rating'] + deviation)

    # Calculate the final prediction
    if sub_pred:
        return np.mean(sub_pred)
    else:
        # If no prediction could be made, return the user's average rating
        return user_ratings['Rating'].mean()

In [11]:
# TODO - Make a prediction based on the train_set data [1p]

user_id = 70
item_id = 50

print_true_value(test_set, user_id, item_id)
print(slope_one(train_set, user_id, item_id))

TRUE VALUE: 4


3.446280991735537


In [12]:
# TODO - fit and predict the SlopeOne model from the surprise package on the loaded data and check if you get the same result [1p]

model = SlopeOne()
model.fit(train_set_surprise)

predictions = model.test(test_set_surprise)
# MSE
print("SLOPE ONE MSE:", accuracy.rmse(predictions))

RMSE: 0.9649
SLOPE ONE MSE: 0.9648840238100149


## 3) k-NN user-based

**General idea:**

User $u$ will rate item $i$ in a similar way as people who rated other items similarly to $u$. Therefore, it is necessary to select those users for whom:
- the rating for the item $i$ is known
- it is possible to evaluate the similarity to the user $u$

The similarity between users is measurable, e.g.:

$$
\text{cosine\_sim}(u, v) = \frac{\sum\limits_{i \in I_{uv}} r_{ui} \cdot r_{vi}}{\sqrt{\sum\limits_{i \in I_{uv}} r_{ui}^2} \cdot \sqrt{\sum\limits_{i \in I_{uv}} r_{vi}^2}}

$$

$$
\text{msd\_distance}(u, v) = \frac{1}{|I_{uv}|} \cdot \sum\limits_{i \in I_{uv}} (r_{ui} - r_{vi})^2

$$

$$
\text{msd\_sim}(u, v) = \frac{1}{\text{msd\_distance}(u, v) + 1}

$$

The parameter $k$ must be chosen, which indicates how many nearest neighbors (NN) are being considered. In the basic approach, each of the neighbors *votes* for the rating they gave for the item $i$, and the weight of their vote is the similarity measure to user $u$.

$$
\hat{r}_{ui} = \frac{
\sum\limits_{v \in N^k_i(u)} \text{sim}(u, v) \cdot r_{vi}}
{\sum\limits_{v \in N^k_i(u)} \text{sim}(u, v)}
$$

Details: https://surprise.readthedocs.io/en/stable/knn_inspired.html#surprise.prediction_algorithms.knns.KNNBasic

### Task 3 [7p]

- Implement a function that will allow you to make a prediction for a single user-item pair, based on available ratings, on the basis of a data set **[4p]**
    - Use mean squared difference (MSD) to estimate the similarity between the users
- Predict the user $262$ rating prediction for the item $1147$ **[1p]**
    - Use data from *train_set* for prediction
    - Print the actual rating value from *test_set* for the mentioned user-item pair
- Verify the result with the *Surprise* package model **[1p]**
- Calculate RMSE for the *test_set* using the *Surprise* package **[1p]**

In [13]:
def k_nn(dataset, user_id, item_id, k=40):
    # Filter out users who have rated the target item
    users_who_rated_item = dataset[dataset['Item_ID'] == item_id]
    
    target_ratings = dataset[dataset['User_ID'] == user_id]

    # Dictionary to store MSD values for each user
    user_msd = {}

    # Iterate through each user who rated the target item
    for other_user_id in users_who_rated_item['User_ID'].unique():
        if other_user_id != user_id:
            # Find common items between the current user and the target user
            common_items = dataset[(dataset['User_ID'] == other_user_id) & (dataset['Item_ID'].isin(target_ratings['Item_ID']))]

            # Join to get both ratings for each common item
            common_with_ratings = common_items.merge(target_ratings, on='Item_ID')
            common_with_ratings['rating_diff'] = common_with_ratings['Rating_x'] - common_with_ratings['Rating_y']

            # Calculate the Mean Squared Difference (MSD)
            msd = np.mean(common_with_ratings['rating_diff'] ** 2)
            user_msd[other_user_id] = 1 / (1 + msd)

    # Sort the MSD values in descending order
    sorted_msd = sorted(user_msd.items(), key=lambda x: x[1], reverse=True)
    
    # return weighted average
    return sum([dataset[(dataset['User_ID'] == user_id) & (dataset['Item_ID'] == item_id)]['Rating'].mean() * weight for user_id, weight in sorted_msd[:k]]) / sum([weight for _, weight in sorted_msd[:k]])

In [14]:
# TODO - Make a prediction based on the train_set data [1p]

user_id = 262
item_id = 1147

print_true_value(test_set, user_id, item_id)
print(k_nn(train_set, user_id, item_id))

TRUE VALUE: 4
3.702814685918104


In [15]:
algo = KNNBasic()
algo.fit(trainset=train_set_surprise)
predictions = algo.test(testset=test_set_surprise)

print_true_value(test_set, user_id, item_id)
print("PREDICTED VALUE:", algo.predict(user_id, item_id).est)
print("RMSE:", accuracy.rmse(predictions))

Computing the msd similarity matrix...
Done computing similarity matrix.
TRUE VALUE: 4
PREDICTED VALUE: 3.702814685918104
RMSE: 0.9963
RMSE: 0.9963111213563114


## Matrix factorization (SVD inspired)

**General idea:**

The user ratings of items are influenced by *latent factors*, which are unknown. It is possible to find such values of these factors that make it possible to reproduce the resulting matrix. Stochastic Gradient Descent (SGD) can be used for this purpose.

In the basic version of the algorithm, we are looking for two matrices $p$ and $q^T$, the dot product of which will recreate known user ratings (and also calculate predictions for unknowns):

$$
\hat{r}_{ui} = p_uq_{i}^T
$$

To find the values of the $p$ and $q$ matrices, the error for known ratings should be minimized:

$$
e_{ui} = \frac{(r_{ui} - \hat{r}_{ui})^{2}}{2}
$$

$$
e_{ui}' = r_{ui} - \hat{r}_{ui}
$$

So at each step we take a different known user rating for the item, compute the error, and then update the $p$ and $q$ matrices, thus minimizing the error.

$$
p_{u}' = p_{u} + \gamma \cdot e_{ui}' \cdot q_{i} \\
q_{i}' = q_{i} + \gamma \cdot e_{ui}' \cdot p_{u}
$$

Where $\gamma$ is *learning rate*.

After the optimization step, predictions for unknown evaluations can be obtained using the dot product for the $p$ and $q^T$ matrices.

More details about the algorithm and the more sophisticated version:
https://surprise.readthedocs.io/en/stable/matrix_factorization.html

### Task 4 [9p]

- Implement a function that optimizes $p$ and $q$ matrices with the SGD algorithm **[6p]**
    - Measure and print the mean error for the entire training set for each epoch to see if it is decreasing
    - Return a matrix that contains predictions for all user-item pairs
- Check the real and predicted rating values for the user $619$ and item $332$ **[1p]**
    - Use data from *train_set* for prediction
    - Print the actual rating value from *test_set* for the mentioned user-item pair
- Check the prediction using the *Surprise* SVD model for the same pair **[1p]**
- Calculate RMSE using the *Surprise* package **[1p]**

**Hint:** In this task, in addition to the correctness of implementation, the achieved results are also assessed. Compare your RMSE against the RMSE of the *Surprise* model. If your initial error value is large, slowly converging, or your algorithm is performing poorly compared to SVD, take a look at the questions below:

- How many factors and epochs are used by default in the reference model? What is the learning rate value? (*see documentation*)
- Is the indexing correct? Note that the *UserId* and *ItemId* indexes start at 1 and that there may be some missing data. Adding *useless* vectors in $p$ and $q$ for non-existent Users and Items may be helpful to make the indexing and implementation simpler.
- Display and multiply any two vectors from the initial matrices $p$ and $q$. Does the score match the rating range you want to achieve? If not, how should you change the initial matrices?


In [16]:
def matrix_factorization(dataset, epochs=20, learning_rate=0.01, factors=25):
    user_matrix = np.random.rand(dataset['User_ID'].nunique(), factors)
    item_matrix = np.random.rand(dataset['Item_ID'].nunique(), factors)

    user_item_matrix = dataset.pivot_table(index='User_ID', columns='Item_ID', values='Rating').fillna(0).values

    for epoch in range(epochs):
        total_error = 0
        for user_id in range(user_matrix.shape[0]):
            for item_id in range(item_matrix.shape[0]):
                if user_item_matrix[user_id, item_id] > 0:
                    error_user_item = user_item_matrix[user_id, item_id] - np.dot(user_matrix[user_id, :], item_matrix[item_id, :].T)

                    user_matrix[user_id, :] += learning_rate * (2 * error_user_item * item_matrix[item_id, :])
                    item_matrix[item_id, :] += learning_rate * (2 * error_user_item * user_matrix[user_id, :])

                    total_error += error_user_item ** 2

        mean_error = total_error / dataset.shape[0]
        print("Epoch:", epoch, "Error:", mean_error)

    return np.dot(user_matrix, item_matrix.T)


    

In [17]:
# TODO - Make a prediction based on the matrix [1p]

user_id = 619
item_id = 332

print_true_value(test_set, user_id, item_id)
u_i_matrix = matrix_factorization(train_set, factors=75)
print(u_i_matrix[user_id, item_id])

TRUE VALUE: 4
Epoch: 0 Error: 4.681677565542154
Epoch: 1 Error: 1.7975072407963568
Epoch: 2 Error: 0.8364891849805807
Epoch: 3 Error: 0.6098656815649243
Epoch: 4 Error: 0.4860037125208051
Epoch: 5 Error: 0.40302971847325525
Epoch: 6 Error: 0.3432055025408247
Epoch: 7 Error: 0.29823031410113554
Epoch: 8 Error: 0.2633470793265785
Epoch: 9 Error: 0.23559856987463562
Epoch: 10 Error: 0.21305409948898293
Epoch: 11 Error: 0.19440687086457867
Epoch: 12 Error: 0.17874600260408366
Epoch: 13 Error: 0.16542001646475213
Epoch: 14 Error: 0.15395168547775132
Epoch: 15 Error: 0.14398375201286087
Epoch: 16 Error: 0.13524382865501078
Epoch: 17 Error: 0.12752115483870155
Epoch: 18 Error: 0.12065061032156359
Epoch: 19 Error: 0.11450128174053906
3.2005817485684904


In [18]:
method = SVD()
method.fit(train_set_surprise)
predictions = method.test(test_set_surprise)

print_true_value(test_set, user_id, item_id)
print("PREDICTED VALUE:", method.predict(user_id, item_id).est)

accuracy.rmse(predictions)

TRUE VALUE: 4
PREDICTED VALUE: 3.372132673367684
RMSE: 0.9548


0.954808768067656