In [54]:
from IPython.display import display
import gzip
import json
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import correlation, cosine
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics import pairwise_distances
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_absolute_error
from math import sqrt
import sys, os
from contextlib import contextmanager
import matplotlib
import matplotlib.pyplot as plt

In [2]:
def get_needed_fields(json_line):
    return {
        "book_id": json_line["book_id"],
        "title": json_line["title"],
        "ratings_count": json_line["ratings_count"],
        "url": json_line["url"],
        "image_url": json_line["image_url"],
        "average_rating": json_line["average_rating"],
        "authors": json_line["authors"],
        "publisher": json_line["publisher"]
    }

In [3]:
def get_books_df(min_rating_count=1000):
    parsed_books = []
    with gzip.open("inputs/goodreads_books.json.gz", 'r') as f:
        while line := f.readline():
            needed_fields = get_needed_fields(json.loads(line))
            try:
                ratings_count = int(needed_fields["ratings_count"])
            except ValueError:
                continue
            if ratings_count > min_rating_count:
                parsed_books.append(needed_fields)
    books = pd.DataFrame.from_dict(parsed_books)
    books["ratings_count"] = pd.to_numeric(books["ratings_count"])
    books["title"] = books["title"].str.replace("[^a-zA-Z0-9 ]", "", regex=True)
    books["title"] = books["title"].str.lower()
    books["title"] = books["title"].str.replace("\s+", " ", regex=True)
    books = books[books["title"].str.len() > 0]
    books["book_id"] = pd.to_numeric( books["book_id"])
    return books

In [4]:
books_df = get_books_df(1000)

In [5]:
def get_books_csv_map():
    csv_book_mapping = {}
    first_line = True
    with open("inputs/book_id_map.csv", "r") as f:
        while True:
            line = f.readline()
            if not line:
                break
            if first_line:
                first_line = False
                continue
            csv_id, book_id = line.strip().split(",")
            csv_book_mapping[int(csv_id)] = int(book_id)
    return csv_book_mapping

In [6]:
books_csv_ids_map = get_books_csv_map()

In [7]:
def get_my_rated_books_df(books_df):
    rated_books = []
    first_line = True
    with open("inputs/my_rated_books.csv") as f:
        while True:
            line = f.readline()
            if not line:
                break
            if first_line:
                first_line = False
                continue
            book_id, rating = line.strip().split(",")
            rated_books.append({"book_id": int(book_id), "rating": int(rating)})
    books = pd.DataFrame.from_dict(rated_books, dtype = int)
    books_augmented = books_df.merge(books, how="inner", on="book_id")
    return books_augmented

In [8]:
my_rate_books_df = get_my_rated_books_df(books_df)

In [9]:
def get_filtered_users(my_rate_books_df, same_books_ratio):
    users = {}
    first_line = True
    book_set = set(my_rate_books_df["book_id"])
    with open("inputs/goodreads_interactions.csv") as f:
        while True:
            line = f.readline()
            if not line:
                break
            if first_line:
                first_line = False
                continue
            user_id, csv_id, _, rating, _ = line.strip().split(",")
            book_id = books_csv_ids_map.get(int(csv_id))
            if int(book_id) in book_set:
                if int(user_id) not in users:
                    users[int(user_id)] = 1
                else:
                    users[int(user_id)] +=1
    return set([k for k in users if users[k] > len(my_rate_books_df)/same_books_ratio])

In [10]:
filtered_users = get_filtered_users(my_rate_books_df, 1.5)

In [11]:
def get_interactions_df(filtered_users):
    interactions_lists = []
    first_line = True       
    with open("inputs/goodreads_interactions.csv") as f:
        while True:
            line = f.readline()
            if not line:
                break
            if first_line:
                first_line = False
                continue
            user_id, csv_id, _, rating, _ = line.strip().split(",")
            book_id = books_csv_ids_map.get(int(csv_id))
            if int(user_id) in filtered_users:
                interactions_lists.append({"user_id": int(user_id), "book_id": int(book_id), "rating": int(rating)})

    interactions_df = pd.DataFrame.from_dict(interactions_lists, dtype=int)
    # interactions_df = interactions_df[interactions_df["book_id"].isin(filtered_users)]
    return interactions_df

In [12]:
interactions_df_initial = get_interactions_df(filtered_users)

In [13]:
interactions_df_initial

Unnamed: 0,user_id,book_id,rating
0,520,13609836,2
1,520,11521040,5
2,520,301082,4
3,520,19501,0
4,520,7654769,4
...,...,...,...
2878502,442052,33249079,0
2878503,442052,33509085,0
2878504,442052,34511923,0
2878505,442052,20898092,0


In [14]:
interactions_df = interactions_df_initial
for i, row in my_rate_books_df[["book_id", "rating"]].iterrows():
    interactions_df = pd.concat([interactions_df, pd.DataFrame([{"user_id": 1, "book_id": row["book_id"], "rating": row["rating"]}])], ignore_index=True)

In [15]:
interactions_df = interactions_df.sort_values(by="user_id").reset_index(drop=True)

In [16]:
R_df = interactions_df.pivot(index = 'user_id', columns ='book_id', values = 'rating').fillna(0)
R_df

book_id,1,2,3,4,5,6,7,8,10,11,...,36465981,36465991,36466338,36468395,36472650,36475156,36479398,36490593,36491811,36496896
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
520,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1033,0.0,0.0,4.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2614,5.0,5.0,4.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3556,5.0,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439717,4.0,4.0,4.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
440351,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
440975,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
441524,5.0,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
def get_df_maps(R_df):
    user_index_map = {}
    book_index_map = {}

    j = 1
    for i, row in R_df.iterrows():
        user_index_map[i] = j
        j += 1
    j = 1
    for col in R_df:
        book_index_map[j] = col
        j += 1
    return user_index_map, book_index_map

In [18]:
user_index_map, book_index_map = get_df_maps(R_df)

In [19]:
R_df_indexed_aux = R_df.reset_index(drop=True)
R_df_indexed_aux.index = np.arange(1, len(R_df_indexed_aux) + 1)
R_df_indexed_aux.index = R_df_indexed_aux.index.set_names(['user_id'])

In [20]:
R_df_indexed = R_df_indexed_aux.T.reset_index(drop=True)
R_df_indexed.index = np.arange(1, len(R_df_indexed) + 1)
R_df_indexed.index = R_df_indexed.index.set_names(['book_id'])
R_df_indexed = R_df_indexed.T

In [21]:
R_df_indexed.head()

book_id,1,2,3,4,5,6,7,8,9,10,...,572373,572374,572375,572376,572377,572378,572379,572380,572381,572382
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,4.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5.0,5.0,4.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5.0,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
R = R_df_indexed.values

In [23]:
user_ratings_mean = np.mean(R, axis = 1)

In [24]:
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

In [25]:
# Begin Singular Value Decomposition

In [26]:
from scipy.sparse.linalg import svds

In [27]:
U, sigma, Vt = svds(R_demeaned, k = 50)

In [28]:
sigma

array([140.91535155, 141.53706224, 141.84039625, 143.3921085 ,
       144.28480692, 144.76712467, 145.98892599, 146.57709607,
       147.42821175, 149.41048828, 151.25706501, 153.80291565,
       154.93612858, 156.22630022, 157.69088482, 159.41032812,
       161.52443051, 163.68219688, 165.17333073, 168.3860154 ,
       170.74580432, 174.48363642, 176.61305207, 177.08324262,
       180.6916608 , 183.84214897, 184.34350692, 184.85592803,
       188.26762261, 189.40731189, 195.54423995, 197.58186132,
       199.93727547, 201.81162362, 203.1579043 , 207.06279295,
       210.34299915, 213.66657598, 213.97642498, 216.22876981,
       225.10159554, 232.85587041, 248.68999073, 260.36606746,
       267.82000411, 288.63151303, 303.2798551 , 328.55871602,
       338.77693738, 761.10203326])

In [29]:
sigma = np.diag(sigma)

In [30]:
sigma

array([[140.91535155,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        , 141.53706224,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        , 141.84039625, ...,   0.        ,
          0.        ,   0.        ],
       ...,
       [  0.        ,   0.        ,   0.        , ..., 328.55871602,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
        338.77693738,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        , 761.10203326]])

In [31]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

In [32]:
all_user_predicted_ratings

array([[-2.39731052e-02,  1.08799059e-01,  5.87487512e-01, ...,
         1.52417041e-04,  1.52417041e-04,  1.52417041e-04],
       [ 9.06948134e-01,  1.02273717e+00,  2.04833120e+00, ...,
        -3.81896702e-04, -3.81896702e-04, -3.81896702e-04],
       [ 2.43314899e+00,  2.49556153e+00,  3.23779567e+00, ...,
        -4.72692080e-04, -4.72692080e-04, -4.72692080e-04],
       ...,
       [ 7.67145655e-02,  6.86864013e-02,  4.46611315e-02, ...,
         2.61300402e-03,  2.61300402e-03,  2.61300402e-03],
       [ 4.37850619e+00,  4.22973975e+00,  4.72758354e+00, ...,
         1.04019287e-03,  1.04019287e-03,  1.04019287e-03],
       [ 6.59151075e-01,  6.55124670e-01,  8.22243564e-01, ...,
         8.15318197e-05,  8.15318197e-05,  8.15318197e-05]])

In [33]:
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df_indexed.columns)

In [34]:
preds_df

book_id,1,2,3,4,5,6,7,8,9,10,...,572373,572374,572375,572376,572377,572378,572379,572380,572381,572382
0,-0.023973,0.108799,0.587488,-0.024290,0.070034,0.117275,-0.000137,-0.056169,-0.004708,-0.000819,...,-0.000152,-0.000152,0.000152,0.000152,0.000152,0.000152,0.000152,0.000152,0.000152,0.000152
1,0.906948,1.022737,2.048331,0.051450,1.073831,1.130127,-0.004148,0.231917,0.320629,0.207886,...,-0.001009,-0.001009,-0.000382,-0.000382,-0.000382,-0.000382,-0.000382,-0.000382,-0.000382,-0.000382
2,2.433149,2.495562,3.237796,0.033373,2.632467,2.544486,-0.010429,0.313937,0.239090,-0.094630,...,-0.001032,-0.001032,-0.000473,-0.000473,-0.000473,-0.000473,-0.000473,-0.000473,-0.000473,-0.000473
3,4.214655,3.974341,3.666383,0.000306,4.305515,4.195017,0.000030,0.245061,0.145806,0.240698,...,0.002489,0.002489,0.001275,0.001275,0.001275,0.001275,0.001275,0.001275,0.001275,0.001275
4,5.179415,4.788934,4.730368,0.032198,4.966915,5.121368,-0.004693,0.178912,0.001737,0.087538,...,0.000554,0.000554,-0.000143,-0.000143,-0.000143,-0.000143,-0.000143,-0.000143,-0.000143,-0.000143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,5.509005,4.974715,4.895439,0.027540,5.275102,5.395156,-0.011910,0.241733,0.213683,0.558140,...,0.000408,0.000408,-0.000318,-0.000318,-0.000318,-0.000318,-0.000318,-0.000318,-0.000318,-0.000318
621,1.353628,1.581234,2.188640,0.112474,1.793683,1.724539,-0.017370,0.516455,0.311182,0.038844,...,0.007848,0.007848,0.005608,0.005608,0.005608,0.005608,0.005608,0.005608,0.005608,0.005608
622,0.076715,0.068686,0.044661,0.001199,0.082819,0.073234,0.000299,0.000848,-0.004662,0.002592,...,0.002743,0.002743,0.002613,0.002613,0.002613,0.002613,0.002613,0.002613,0.002613,0.002613
623,4.378506,4.229740,4.727584,0.000742,4.479955,4.437509,-0.003279,0.163978,0.113292,0.369338,...,0.001904,0.001904,0.001040,0.001040,0.001040,0.001040,0.001040,0.001040,0.001040,0.001040


In [35]:
def recommend_book(predictions_df, user_id, books_df, interactions_df, user_index_map, book_index_map, num_recommendations=10):
     # Get and sort the user's predictions
    indexed_user_id = user_index_map[user_id]
    user_row_number = indexed_user_id - 1 # User_id starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].rename(index=book_index_map).sort_values(ascending=False)

    # Get the user's data and merge in the book information.
    user_data = interactions_df[interactions_df.user_id == (user_id)]
    user_full = (user_data.merge(books_df, how='left', left_on='book_id', right_on='book_id').
                    sort_values(['rating'], ascending=False)
                )
    
    print('Utilizatorul {0} a oferit deja ratinguri pentru {1} de cărți.'.format(user_id, user_full.shape[0]))
    print('Se recomandă {0} de cărți cu cele mai mari ratinguri prezise.'.format(num_recommendations))

    recommendations = (books_df[~books_df['book_id'].isin(user_full['book_id'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'book_id',
               right_on = 'book_id').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )
    return user_full, recommendations
    

In [36]:
already_rated, recommendations = recommend_book(preds_df, 1, books_df, interactions_df, user_index_map, book_index_map, 20)

Utilizatorul 1 a oferit deja ratinguri pentru 25 de cărți.
Se recomandă 20 de cărți cu cele mai mari ratinguri prezise.


In [37]:
recommendations

Unnamed: 0,book_id,title,ratings_count,url,image_url,average_rating,authors,publisher
73014,13546173,the assassin and the underworld throne of glas...,17857,https://www.goodreads.com/book/show/13546173-t...,https://images.gr-assets.com/books/1336062984m...,4.37,"[{'author_id': '3433047', 'role': ''}]",Bloomsbury USA Childrens
23920,18243700,the assassins blade throne of glass 0105,57264,https://www.goodreads.com/book/show/18243700-t...,https://images.gr-assets.com/books/1384362444m...,4.48,"[{'author_id': '3433047', 'role': ''}]",Bloomsbury Childrens
41034,13623150,destroy me shatter me 15,47724,https://www.goodreads.com/book/show/13623150-d...,https://images.gr-assets.com/books/1340398466m...,4.27,"[{'author_id': '4637539', 'role': ''}]",Harper
62759,23437156,six of crows six of crows 1,102183,https://www.goodreads.com/book/show/23437156-s...,https://images.gr-assets.com/books/1500689111m...,4.46,"[{'author_id': '4575289', 'role': ''}]",Henry Holt and Company
20274,2767052,the hunger games the hunger games 1,4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,4.34,"[{'author_id': '153394', 'role': ''}]",Scholastic Press
68145,10507293,the selection the selection 1,530867,https://www.goodreads.com/book/show/10507293-t...,https://images.gr-assets.com/books/1322103400m...,4.15,"[{'author_id': '2987125', 'role': ''}]",HarperTeen
66090,10194157,shadow and bone the grisha 1,143005,https://www.goodreads.com/book/show/10194157-s...,https://images.gr-assets.com/books/1339533695m...,4.05,"[{'author_id': '4575289', 'role': ''}]",Henry Holt and Company
56042,16248068,the elite the selection 2,242242,https://www.goodreads.com/book/show/16248068-t...,https://images.gr-assets.com/books/1391454595m...,4.02,"[{'author_id': '2987125', 'role': ''}]",HarperTeen
88200,13335037,divergent divergent 1,1962813,https://www.goodreads.com/book/show/13335037-d...,https://images.gr-assets.com/books/1328559506m...,4.23,"[{'author_id': '4039811', 'role': ''}]",Katherine Tegen Books
83238,9275658,legend legend 1,309165,https://www.goodreads.com/book/show/9275658-le...,https://images.gr-assets.com/books/1501368160m...,4.19,"[{'author_id': '4342215', 'role': ''}]",Putnam Juvenile


In [38]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate
from collections import defaultdict

from surprise import Dataset, SVD
from surprise.model_selection import KFold

In [39]:
# Load Reader library
reader = Reader()

# Load ratings dataset with Dataset library
data = Dataset.load_from_df(interactions_df[['user_id','book_id', 'rating']], reader)

In [40]:
svd = SVD()

In [41]:
# Compute the RMSE of the SVD algorithm.
# cross_validate(svd, data, measures=['RMSE'], cv = 5, verbose = True)

In [42]:
# cross_validate(svd, data, measures=['mae'], cv = 5, verbose = True)

In [43]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x237ae598490>

In [44]:
# 1. trainset = data.build_full_trainset()
# 2. svd.fit(trainset)
# 3. testset = data.build_full_testset()
# testset = some ratings from some users to some book_ids
# 4. Facem un array cu actual ratings din testset: 
# 5. Facem acelasi array, doar ca inlocuim valorile cu svn.predict(user_x, book_id_y)
# 6. Acelasi calcul ca mai jos

In [45]:
svd.predict(1, 7171637)

Prediction(uid=1, iid=7171637, r_ui=None, est=4.74894194115784, details={'was_impossible': False})

In [46]:
def evaluate_predictions(svd, my_rate_books_df, metric="svd"):
    predictions = []
    target = []
    preditions_to_ret = {}
    for i, row in my_rate_books_df.iterrows():
        target.append(row["rating"])
        predictions.append(svd.predict(1, row["book_id"]).est)
        preditions_to_ret[row["book_id"]] = (svd.predict(1, row["book_id"]).est, row["rating"])

    prediction= pd.DataFrame(predictions).T
    target_prediction = pd.DataFrame(target).T

    # 1. Compute RMSE
    RS = np.sqrt(np.mean((prediction-target_prediction)**2))
    print("Valoarea RMSE pentru predictia de tip {} este: {}".format(metric, RS))

    # 2. Compute MAE
    mae = mean_absolute_error(target_prediction, prediction)
    print("Valoarea MAE pentru predictia de tip {} este: {}".format(metric, mae))

    TPs = 0
    FNs = 0
    # The number of relevant items are the items with actual rating greater or equal to 3.5.
    threshold = 3.5
     # Find the relevant items using the threshold
    relevant_items = []
    for i in range(0, target_prediction.shape[0]):
        for j in range(0, target_prediction.shape[1]):
            if target_prediction.values[i, j] > threshold:
                relevant_items.append((i, j))

    # Compute K
    k = len(relevant_items)

    # Recommended items @ k
    recommended_items_at_k = []
    for i in range(0, target_prediction.shape[0]):
        for j in range(0, target_prediction.shape[1]):
            if prediction.values[i, j] > threshold:
                recommended_items_at_k.append((i, j))

     # Recommended and Relevant items @ k (Intersection)
    recomm_and_relevant_items_at_k = list(set(relevant_items) & set(recommended_items_at_k))

    # Compute Precision @ K
    precision_at_k = len(recomm_and_relevant_items_at_k) / len(recommended_items_at_k)

    print("Precision at k={}, for prediction using metric {}, is: {}".format(k, metric, precision_at_k))

    # Compute Recall @ K
    recall_at_k = len(recomm_and_relevant_items_at_k) / len(relevant_items)

    print("Recall at k={} for prediction using metric {}, is: {}".format(k, metric, recall_at_k))

    # Compute F1 score @ K
    f1_score_at_k = 2 * precision_at_k * recall_at_k / (precision_at_k + recall_at_k)

    print("F1 Score at k={}, for prediction using metric {}, is: {}".format(k, metric, f1_score_at_k))
    return preditions_to_ret
    

In [47]:
a = evaluate_predictions(svd, my_rate_books_df)

Valoarea RMSE pentru predictia de tip svd este: 0.7168688850255858
Valoarea MAE pentru predictia de tip svd este: 0.6117986274039983
Precision at k=22, for prediction using metric svd, is: 0.9130434782608695
Recall at k=22 for prediction using metric svd, is: 0.9545454545454546
F1 Score at k=22, for prediction using metric svd, is: 0.9333333333333332
