In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd drive/MyDrive/Colab Notebooks/Masters Python Workbooks/Redone Python Workbooks
!pip install scikit-surprise
from surprise.model_selection import GridSearchCV
from surprise import SVD
from collections import defaultdict
from surprise import Dataset
from surprise import Reader
%cd drive/MyDrive/Colab Notebooks/Masters Python Workbooks
books = pd.read_csv("Books.csv")
ratings = pd.read_csv("Ratings.csv")
users = pd.read_csv("Users.csv")
books["Book-Author"].fillna("Edinburgh Financial Publishing",inplace=True)
# We can replace the publisher name in these cases as well

# The publisher of 'Tyrant Moon' as per Amazon and FantasticFiction.com is 'Mundania Press LLC'
books['Publisher'].fillna("Mundania Press LLC", inplace=True)

# The publisher of 'Finders Keepers' as per GoodReads.com and Amazon is 'Bantam'
books['Publisher'].fillna("Bantam", inplace=True)

# Replacing the 0 aged users with nans so that they can be replaced in the next step
users.Age.replace(0,np.nan,inplace=True)

# Linear Interpolation of age in the dataset
users['Age'].interpolate(method = 'linear', limit_direction = 'forward',inplace=True)

# Dropping the rows with age > 100
users = users[users.Age <= 100]
book_rating = pd.merge(books,ratings,on='ISBN',how='outer')
book_rating = pd.merge(book_rating, users, on='User-ID', how='outer')
book_rating.drop(['Image-URL-S','Image-URL-M','Image-URL-L'],axis=1,inplace=True)
temp = book_rating[['ISBN','User-ID','Book-Rating']]
counts_user = temp.groupby('User-ID').agg(['count'])
counts_user.columns = ['ISBN_ct','rating_users_ct']
counts_book = temp.groupby('ISBN').agg(['count'])
counts_book.columns = ['ISBN_ct','rating_books_ct']
counts_user.drop(['ISBN_ct'], inplace=True, axis=1)
counts_book.drop(['ISBN_ct'], inplace=True, axis=1)
book_rating = pd.merge(book_rating, counts_book, on='ISBN',how='left')
book_rating = pd.merge(book_rating, counts_user, on='User-ID',how='left')

book_rating['City'] = book_rating['Location'].str.split(",",expand=True)[0]
book_rating['State'] = book_rating['Location'].str.split(",",expand=True)[1]
book_rating['Country'] = book_rating['Location'].str.split(",",expand=True)[2]
book_rating.drop('Location',axis=1,inplace=True)
df = book_rating.copy()

index_year = df[(df['Year-Of-Publication'] == 'DK Publishing Inc') | (df['Year-Of-Publication'] == 'Gallimard')].index
df.drop(index_year,inplace=True)
df = df.dropna(subset=['ISBN'])
df = df.dropna(subset=['Book-Title'])

# Dropping null values in User-ID as there could be no results if the user ID is not present
df = df.dropna(subset=['User-ID'])

# Deleting missing rows present in location as those are only 0.3% of the dataset
df.dropna(inplace=True)
# df.info()
# pd.isnull(df).sum()
df['Year-Of-Publication'] = df['Year-Of-Publication'].astype(int)

# One of the city names is '&#304;stanbul' which will be replaced with 'Istanbul'
df['City'].replace('&#304;stanbul','Istanbul',inplace=True)
df[df['Book-Title'] == 'Wild Animus']['ISBN'].value_counts()
most_popular_books = df[(df['rating_books_ct'] >= 568) & (df['rating_books_ct'] <= 2502.0)]
most_popular_books = most_popular_books[['ISBN','Book-Title','Book-Author','Publisher','rating_books_ct']]
most_popular_books = most_popular_books.drop_duplicates()
most_popular_books.shape

# books = [most_popular_books['Book-Title'].values]
# temp = df.copy()
# df = temp[~temp['Book-Title'].isin(books)]
label_encoder = LabelEncoder()
# Creating a copy that will contain all the label encoders instead of the actual categorical variables
df1 = df.copy()
df1['City'] = label_encoder.fit_transform(df1['City'])
columns = ['Book-Author','Book-Title','Year-Of-Publication','Publisher','State','Country']
df1['State'] = label_encoder.fit_transform(df1['State'])
df1['Book-Author'] = label_encoder.fit_transform(df1['Book-Author'])
df1['Book-Title'] = label_encoder.fit_transform(df1['Book-Title'])
df1['Year-Of-Publication'] = df1['Year-Of-Publication'].astype(str)
df1['Year-Of-Publication'] = label_encoder.fit_transform(df1['Year-Of-Publication'])
df1['Publisher'] = label_encoder.fit_transform(df1['Publisher'])
df1['Country'] = label_encoder.fit_transform(df1['Country'])
# Creating another dataset that has only non categorical values that need to be scaled
numerical_df = df1[['Book-Title','Book-Author','Year-Of-Publication','Publisher','Book-Rating',
                    'Age','rating_books_ct','rating_users_ct','City','State','Country']] 
scaler = StandardScaler()
df_scaled = scaler.fit_transform(numerical_df)
df_scaled = pd.DataFrame(df_scaled)
df_scaled.columns = ['Book-Title','Book-Author','Year-Of-Publication','Publisher','Book-Rating',
                    'Age','rating_books_ct','rating_users_ct','City','State','Country']

# df is the dataset to use from Part A


import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# model with k = 8
kmeans = KMeans(n_clusters = 8, max_iter=50, random_state=0)
kmeans.fit(df_scaled)
df_8 = df.copy()
# assign the label
df_8['cluster_id'] = kmeans.labels_
# 8 
df_8_0 = df_8[df_8['cluster_id'] == 0]
df_8_1 = df_8[df_8['cluster_id'] == 1]
df_8_2 = df_8[df_8['cluster_id'] == 2]
df_8_3 = df_8[df_8['cluster_id'] == 3]
df_8_4 = df_8[df_8['cluster_id'] == 4]
df_8_5 = df_8[df_8['cluster_id'] == 5]
df_8_6 = df_8[df_8['cluster_id'] == 6]
df_8_7 = df_8[df_8['cluster_id'] == 7]

df_8_0['Book-Rating'] = df_8_0['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
df_8_1['Book-Rating'] = df_8_1['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
df_8_2['Book-Rating'] = df_8_2['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
df_8_3['Book-Rating'] = df_8_3['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
# This df becomes nan completely because, the below codes (lambda) turn all the books that were rated to 0. This cluster is he only cluster that doesnt have any book which is not rated
df_8_4['Book-Rating'] = df_8_4['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
df_8_5['Book-Rating'] = df_8_5['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
df_8_6['Book-Rating'] = df_8_6['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
df_8_7['Book-Rating'] = df_8_7['Book-Rating'].apply(lambda x: 0 if x>=1 else 1)
df_8_0.drop_duplicates()
df_8_1.drop_duplicates()
df_8_2.drop_duplicates()
df_8_3.drop_duplicates()
df_8_4.drop_duplicates()
df_8_5.drop_duplicates()
df_8_6.drop_duplicates()
df_8_7.drop_duplicates()

print("Ready for collaborative filtering!")

/content/drive/MyDrive/Colab Notebooks/Masters Python Workbooks/Redone Python Workbooks
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 5.1 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1617762 sha256=035cba4c66ab8c9cc5a1559fe921e58b7178398151c1b86d00c74aa10c502f55
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1
[Errno 2] No such file or directory: 'drive/MyDrive/Colab Notebooks/Masters Python Workbooks'
/content/drive/MyDrive/Colab Notebooks/Masters Python Workbooks/Redone Python Workbooks
Ready for collaborative filtering!


In [None]:
def get_df_name(df):
    name =[x for x in globals() if globals()[x] is df][0]
    return name
list = [df_8_0,df_8_1,df_8_2,df_8_3,df_8_4,df_8_5,df_8_6,df_8_7]
for i in list:
  print(get_df_name(i),": ",i.shape)

df_8_0 :  (184337, 14)
df_8_1 :  (197017, 14)
df_8_2 :  (172700, 14)
df_8_3 :  (242014, 14)
df_8_4 :  (14619, 14)
df_8_5 :  (3789, 14)
df_8_6 :  (179247, 14)
df_8_7 :  (34436, 14)


### Function for user similarity

In [None]:
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import MinMaxScaler
from numpy import *

def user_similarity(df_features, original_df):
  mean = np.nanmean(df_features, axis=1)
  df_subtracted = (df_features.T-mean).T
# User Similarity Matrix
  user_correlation = 1 - pairwise_distances(df_subtracted.fillna(0), metric='cosine')
  user_correlation[np.isnan(user_correlation)] = 0
  user_correlation[user_correlation<0]=0
  user_predicted_ratings = np.dot(user_correlation, df_features.fillna(0))
  user_final_rating = np.multiply(user_predicted_ratings,df_features)

  X  = user_final_rating.copy() 
  X = X[X>0]
  scaler = MinMaxScaler(feature_range=(1, 5))
  scaler.fit(X)
  y = (scaler.transform(X))
  df_ = original_df.pivot_table(index='User-ID',columns='ISBN',values='Book-Rating')
  # Finding total non-NaN value
  total_non_nan = np.count_nonzero(~np.isnan(y))
  rmse = (sum(sum((df_ - y )**2))/total_non_nan)**0.5
  print("The RMSE of {}: {}".format(get_df_name(original_df), rmse))

### Breaking df_8_0, df_8_1, df_8_2, df_8_3, df_8_4, df_8_6 

In [None]:
df_8_0_1 = np.array_split(df_8_0,8)[0]
df_8_0_2 = np.array_split(df_8_0,8)[1]
df_8_0_3 = np.array_split(df_8_0,8)[2]
df_8_0_4 = np.array_split(df_8_0,8)[3]
df_8_0_5 = np.array_split(df_8_0,8)[4]
df_8_0_6 = np.array_split(df_8_0,8)[5]
df_8_0_7 = np.array_split(df_8_0,8)[6]
df_8_0_8 = np.array_split(df_8_0,8)[7]
df_8_features_0_1 = df_8_0_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_0_2 = df_8_0_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_0_3 = df_8_0_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_0_4 = df_8_0_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_0_5 = df_8_0_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_0_6 = df_8_0_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_0_7 = df_8_0_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_0_8 = df_8_0_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)

user_similarity(df_8_features_0_1, df_8_0_1)
user_similarity(df_8_features_0_2, df_8_0_2)
user_similarity(df_8_features_0_3, df_8_0_3)
user_similarity(df_8_features_0_4, df_8_0_4)
user_similarity(df_8_features_0_5, df_8_0_5)
user_similarity(df_8_features_0_6, df_8_0_6)
user_similarity(df_8_features_0_7, df_8_0_7)
user_similarity(df_8_features_0_8, df_8_0_8)

The RMSE of df_8_0_1: 1.3903417369936435
The RMSE of df_8_0_2: 1.2826966351697549
The RMSE of df_8_0_3: 1.274620167919725
The RMSE of df_8_0_4: 1.3813766555039064
The RMSE of df_8_0_5: 1.3848030385661698
The RMSE of df_8_0_6: 1.3101272888489446
The RMSE of df_8_0_7: 1.4890799519633944
The RMSE of df_8_0_8: 1.1416165057075442


In [None]:
# df_8_1
df_8_1_1 = np.array_split(df_8_1,9)[0]
df_8_1_2 = np.array_split(df_8_1,9)[1]
df_8_1_3 = np.array_split(df_8_1,9)[2]
df_8_1_4 = np.array_split(df_8_1,9)[3]
df_8_1_5 = np.array_split(df_8_1,9)[4]
df_8_1_6 = np.array_split(df_8_1,9)[5]
df_8_1_7 = np.array_split(df_8_1,9)[6]
df_8_1_8 = np.array_split(df_8_1,9)[7]
df_8_1_9 = np.array_split(df_8_1,9)[8]
df_8_features_1_1 = df_8_1_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_2 = df_8_1_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_3 = df_8_1_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_4 = df_8_1_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_5 = df_8_1_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_6 = df_8_1_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_7 = df_8_1_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_8 = df_8_1_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_1_9 = df_8_1_9.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
user_similarity(df_8_features_1_1, df_8_1_1)
user_similarity(df_8_features_1_2, df_8_1_2)
user_similarity(df_8_features_1_3, df_8_1_3)
user_similarity(df_8_features_1_4, df_8_1_4)
user_similarity(df_8_features_1_5, df_8_1_5)
user_similarity(df_8_features_1_6, df_8_1_6)
user_similarity(df_8_features_1_7, df_8_1_7)
user_similarity(df_8_features_1_8, df_8_1_8)
user_similarity(df_8_features_1_9, df_8_1_9)

The RMSE of df_8_1_1: 1.6317267070485357
The RMSE of df_8_1_2: 1.6619548617298827
The RMSE of df_8_1_3: 1.6177843735029438
The RMSE of df_8_1_4: 1.6765153346417463
The RMSE of df_8_1_5: 1.734685059758937
The RMSE of df_8_1_6: 1.7134032424500667
The RMSE of df_8_1_7: 1.7339910742585884
The RMSE of df_8_1_8: 1.7957978870815337
The RMSE of df_8_1_9: 1.7447708186688644


In [None]:
# df_8_2
df_8_2_1 = np.array_split(df_8_2,8)[0]
df_8_2_2 = np.array_split(df_8_2,8)[1]
df_8_2_3 = np.array_split(df_8_2,8)[2]
df_8_2_4 = np.array_split(df_8_2,8)[3]
df_8_2_5 = np.array_split(df_8_2,8)[4]
df_8_2_6 = np.array_split(df_8_2,8)[5]
df_8_2_7 = np.array_split(df_8_2,8)[6]
df_8_2_8 = np.array_split(df_8_2,8)[7]
df_8_features_2_1 = df_8_2_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_2_2 = df_8_2_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_2_3 = df_8_2_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_2_4 = df_8_2_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_2_5 = df_8_2_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_2_6 = df_8_2_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_2_7 = df_8_2_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_2_8 = df_8_2_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
user_similarity(df_8_features_2_1, df_8_2_1)
user_similarity(df_8_features_2_2, df_8_2_2)
user_similarity(df_8_features_2_3, df_8_2_3)
user_similarity(df_8_features_2_4, df_8_2_4)
user_similarity(df_8_features_2_5, df_8_2_5)
user_similarity(df_8_features_2_6, df_8_2_6)
user_similarity(df_8_features_2_7, df_8_2_7)
user_similarity(df_8_features_2_8, df_8_2_8)

The RMSE of df_8_2_1: 1.800832097261892
The RMSE of df_8_2_2: 1.7079902374458846
The RMSE of df_8_2_3: 1.7018292264416077
The RMSE of df_8_2_4: 1.7119086450922871
The RMSE of df_8_2_5: 1.744765237622467
The RMSE of df_8_2_6: 1.7218083443920218
The RMSE of df_8_2_7: 1.812787517646766
The RMSE of df_8_2_8: 1.8384630955173438


In [None]:
# df_8_3
df_8_3_1 = np.array_split(df_8_3,12)[0]
df_8_3_2 = np.array_split(df_8_3,12)[1]
df_8_3_3 = np.array_split(df_8_3,12)[2]
df_8_3_4 = np.array_split(df_8_3,12)[3]
df_8_3_5 = np.array_split(df_8_3,12)[4]
df_8_3_6 = np.array_split(df_8_3,12)[5]
df_8_3_7 = np.array_split(df_8_3,12)[6]
df_8_3_8 = np.array_split(df_8_3,12)[7]
df_8_3_9 = np.array_split(df_8_3,12)[8]
df_8_3_10 = np.array_split(df_8_3,12)[9]
df_8_3_11 = np.array_split(df_8_3,12)[10]
df_8_3_12 = np.array_split(df_8_3,12)[11]
df_8_features_3_1 = df_8_3_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_2 = df_8_3_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_3 = df_8_3_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_4 = df_8_3_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_5 = df_8_3_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_6 = df_8_3_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_7 = df_8_3_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_8 = df_8_3_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_9 = df_8_3_9.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_10 = df_8_3_10.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_11 = df_8_3_11.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_3_12 = df_8_3_12.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
user_similarity(df_8_features_3_1, df_8_3_1)
user_similarity(df_8_features_3_2, df_8_3_2)
user_similarity(df_8_features_3_3, df_8_3_3)
user_similarity(df_8_features_3_4, df_8_3_4)
user_similarity(df_8_features_3_5, df_8_3_5)
user_similarity(df_8_features_3_6, df_8_3_6)
user_similarity(df_8_features_3_7, df_8_3_7)
user_similarity(df_8_features_3_8, df_8_3_8)
user_similarity(df_8_features_3_9, df_8_3_9)
user_similarity(df_8_features_3_10, df_8_3_10)
user_similarity(df_8_features_3_11, df_8_3_11)
user_similarity(df_8_features_3_12, df_8_3_12)

The RMSE of df_8_3_1: nan
The RMSE of df_8_3_2: nan
The RMSE of df_8_3_3: nan
The RMSE of df_8_3_4: nan
The RMSE of df_8_3_5: nan
The RMSE of df_8_3_6: nan
The RMSE of df_8_3_7: nan
The RMSE of df_8_3_8: nan
The RMSE of df_8_3_9: nan
The RMSE of df_8_3_10: nan
The RMSE of df_8_3_11: nan
The RMSE of df_8_3_12: nan


In [None]:
df_8_features_4 = df_8_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
user_similarity(df_8_features_4, df_8_4)

df_8_features_5 = df_8_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
user_similarity(df_8_features_5, df_8_5)

The RMSE of df_8_4: 2.3191155376917765
The RMSE of df_8_5: 0.0


In [None]:
# df_8_7
df_8_7_1 = np.array_split(df_8_7,6)[0]
df_8_7_2 = np.array_split(df_8_7,6)[1]
df_8_7_3 = np.array_split(df_8_7,6)[2]

df_8_features_7_1 = df_8_7_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_7_2 = df_8_7_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_7_3 = df_8_7_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)

user_similarity(df_8_features_7_1, df_8_7_1)
user_similarity(df_8_features_7_2, df_8_7_2)
user_similarity(df_8_features_7_3, df_8_7_3)

The RMSE of df_8_7_1: 0.0
The RMSE of df_8_7_2: 0.0
The RMSE of df_8_7_3: 0.4902690161969607


In [None]:
# df_8_6
df_8_6_1 = np.array_split(df_8_6,9)[0]
df_8_6_2 = np.array_split(df_8_6,9)[1]
df_8_6_3 = np.array_split(df_8_6,9)[2]
df_8_6_4 = np.array_split(df_8_6,9)[3]
df_8_6_5 = np.array_split(df_8_6,9)[4]
df_8_6_6 = np.array_split(df_8_6,9)[5]
df_8_6_7 = np.array_split(df_8_6,9)[6]
df_8_6_8 = np.array_split(df_8_6,9)[7]
df_8_6_9 = np.array_split(df_8_6,9)[8]
df_8_features_6_1 = df_8_6_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_2 = df_8_6_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_3 = df_8_6_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_4 = df_8_6_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_5 = df_8_6_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_6 = df_8_6_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_7 = df_8_6_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_8 = df_8_6_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
df_8_features_6_9 = df_8_6_9.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0)
user_similarity(df_8_features_6_1, df_8_6_1)
user_similarity(df_8_features_6_2, df_8_6_2)
user_similarity(df_8_features_6_3, df_8_6_3)
user_similarity(df_8_features_6_4, df_8_6_4)
user_similarity(df_8_features_6_5, df_8_6_5)
user_similarity(df_8_features_6_6, df_8_6_6)
user_similarity(df_8_features_6_7, df_8_6_7)
user_similarity(df_8_features_6_8, df_8_6_8)
user_similarity(df_8_features_6_9, df_8_6_9)

The RMSE of df_8_6_1: 1.852064213485157
The RMSE of df_8_6_2: 1.814177557040058
The RMSE of df_8_6_3: 1.723642640093394
The RMSE of df_8_6_4: 1.7857262974366728
The RMSE of df_8_6_5: 1.8055102453713128
The RMSE of df_8_6_6: 1.8171730986985184
The RMSE of df_8_6_7: 1.7442587219429915
The RMSE of df_8_6_8: 1.8943253270901526
The RMSE of df_8_6_9: 1.8700758513049036


### Item Similarity

In [None]:
def item_similarity(df_features, original_df):
  mean = np.nanmean(df_features, axis=1)
  df_subtracted = (df_features.T-mean).T
# Item Similarity Matrix
  item_correlation = 1 - pairwise_distances(df_subtracted.fillna(0), metric='cosine')
  item_correlation[np.isnan(item_correlation)] = 0
  item_correlation[item_correlation<0]=0
  item_predicted_ratings = np.dot((df_features.fillna(0).T),item_correlation)
  dummy_df = original_df.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating').fillna(0)
  item_final_rating = np.multiply(item_predicted_ratings,dummy_df)
  X  = item_final_rating.copy() 
  X = X[X>0]
  scaler = MinMaxScaler(feature_range=(1, 5))
  scaler.fit(X)
  y = (scaler.transform(X))
  df_ = original_df.pivot_table(index='User-ID',columns='ISBN',values='Book-Rating')
  # Finding total non-NaN value

  total_non_nan = np.count_nonzero(~np.isnan(y))
  rmse = (sum(sum((df_ - y )**2))/total_non_nan)**0.5
  print("{} (Item) RMSE: {}".format(get_df_name(original_df), rmse))

In [None]:
# df_8_0
df_8_0_1 = np.array_split(df_8_0,8)[0]
df_8_0_2 = np.array_split(df_8_0,8)[1]
df_8_0_3 = np.array_split(df_8_0,8)[2]
df_8_0_4 = np.array_split(df_8_0,8)[3]
df_8_0_5 = np.array_split(df_8_0,8)[4]
df_8_0_6 = np.array_split(df_8_0,8)[5]
df_8_0_7 = np.array_split(df_8_0,8)[6]
df_8_0_8 = np.array_split(df_8_0,8)[7]

df_8_features_0_1 = df_8_0_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_0_2 = df_8_0_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_0_3 = df_8_0_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_0_4 = df_8_0_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_0_5 = df_8_0_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_0_6 = df_8_0_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_0_7 = df_8_0_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_0_8 = df_8_0_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T

item_similarity(df_8_features_0_1, df_8_0_1)
item_similarity(df_8_features_0_2, df_8_0_2)
item_similarity(df_8_features_0_3, df_8_0_3)
item_similarity(df_8_features_0_4, df_8_0_4)
item_similarity(df_8_features_0_5, df_8_0_5)
item_similarity(df_8_features_0_6, df_8_0_6)
item_similarity(df_8_features_0_7, df_8_0_7)
item_similarity(df_8_features_0_8, df_8_0_8)

df_8_0_1 (Item) RMSE: 1.4266060361809145
df_8_0_2 (Item) RMSE: 1.284965134119493
df_8_0_3 (Item) RMSE: 1.3009457064253322
df_8_0_4 (Item) RMSE: 1.4498210983818949
df_8_0_5 (Item) RMSE: 1.4177250644005575
df_8_0_6 (Item) RMSE: 1.3353657132081092
df_8_0_7 (Item) RMSE: 1.3632771571034557


In [None]:
# # df_8_1
# df_8_1_1 = np.array_split(df_8_1,9)[0]
# df_8_1_2 = np.array_split(df_8_1,9)[1]
# df_8_1_3 = np.array_split(df_8_1,9)[2]
# df_8_1_4 = np.array_split(df_8_1,9)[3]
# df_8_1_5 = np.array_split(df_8_1,9)[4]
# df_8_1_6 = np.array_split(df_8_1,9)[5]
# df_8_1_7 = np.array_split(df_8_1,9)[6]
# df_8_1_8 = np.array_split(df_8_1,9)[7]
# df_8_1_9 = np.array_split(df_8_1,9)[8]
# df_8_features_1_1 = df_8_1_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_2 = df_8_1_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_3 = df_8_1_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_4 = df_8_1_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_5 = df_8_1_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_6 = df_8_1_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_7 = df_8_1_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_8 = df_8_1_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
# df_8_features_1_9 = df_8_1_9.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
item_similarity(df_8_features_1_1, df_8_1_1)
item_similarity(df_8_features_1_2, df_8_1_2)
item_similarity(df_8_features_1_3, df_8_1_3)
item_similarity(df_8_features_1_4, df_8_1_4)
item_similarity(df_8_features_1_5, df_8_1_5)
item_similarity(df_8_features_1_6, df_8_1_6)
item_similarity(df_8_features_1_7, df_8_1_7)
item_similarity(df_8_features_1_8, df_8_1_8)
item_similarity(df_8_features_1_9, df_8_1_9)

df_8_1_1 (Item) RMSE: 1.7365637800416691
df_8_1_2 (Item) RMSE: 1.6796470800788021
df_8_1_3 (Item) RMSE: 1.6144406871475379
df_8_1_4 (Item) RMSE: 1.6654928778589029
df_8_1_5 (Item) RMSE: 1.669254633407995
df_8_1_6 (Item) RMSE: 1.7079826948432653
df_8_1_7 (Item) RMSE: 1.6227879668172467
df_8_1_8 (Item) RMSE: 1.5569101007727197
df_8_1_9 (Item) RMSE: 1.3963636698894986


In [None]:
# df_8_2
df_8_2_1 = np.array_split(df_8_2,8)[0]
df_8_2_2 = np.array_split(df_8_2,8)[1]
df_8_2_3 = np.array_split(df_8_2,8)[2]
df_8_2_4 = np.array_split(df_8_2,8)[3]
df_8_2_5 = np.array_split(df_8_2,8)[4]
df_8_2_6 = np.array_split(df_8_2,8)[5]
df_8_2_7 = np.array_split(df_8_2,8)[6]
df_8_2_8 = np.array_split(df_8_2,8)[7]
df_8_features_2_1 = df_8_2_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_2_2 = df_8_2_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_2_3 = df_8_2_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_2_4 = df_8_2_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_2_5 = df_8_2_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_2_6 = df_8_2_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_2_7 = df_8_2_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_2_8 = df_8_2_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
item_similarity(df_8_features_2_1, df_8_2_1)
item_similarity(df_8_features_2_2, df_8_2_2)
item_similarity(df_8_features_2_3, df_8_2_3)
item_similarity(df_8_features_2_4, df_8_2_4)
item_similarity(df_8_features_2_5, df_8_2_5)
item_similarity(df_8_features_2_6, df_8_2_6)
item_similarity(df_8_features_2_7, df_8_2_7)
item_similarity(df_8_features_2_8, df_8_2_8)

df_8_2_1 (Item) RMSE: 1.7580028418374625
df_8_2_2 (Item) RMSE: 1.673693354484108
df_8_2_3 (Item) RMSE: 1.6992836488551442
df_8_2_4 (Item) RMSE: 1.6895605301362826
df_8_2_5 (Item) RMSE: 1.6977817002162043
df_8_2_6 (Item) RMSE: 1.650787637751714
df_8_2_7 (Item) RMSE: 1.590052555225532
df_8_2_8 (Item) RMSE: 1.4405576265664495


In [None]:
# df_8_3
df_8_3_1 = np.array_split(df_8_3,12)[0]
df_8_3_2 = np.array_split(df_8_3,12)[1]
df_8_3_3 = np.array_split(df_8_3,12)[2]
df_8_3_4 = np.array_split(df_8_3,12)[3]
df_8_3_5 = np.array_split(df_8_3,12)[4]
df_8_3_6 = np.array_split(df_8_3,12)[5]
df_8_3_7 = np.array_split(df_8_3,12)[6]
df_8_3_8 = np.array_split(df_8_3,12)[7]
df_8_3_9 = np.array_split(df_8_3,12)[8]
df_8_3_10 = np.array_split(df_8_3,12)[9]
df_8_3_11 = np.array_split(df_8_3,12)[10]
df_8_3_12 = np.array_split(df_8_3,12)[11]
df_8_features_3_1 = df_8_3_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_2 = df_8_3_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_3 = df_8_3_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_4 = df_8_3_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_5 = df_8_3_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_6 = df_8_3_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_7 = df_8_3_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_8 = df_8_3_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_9 = df_8_3_9.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_10 = df_8_3_10.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_11 = df_8_3_11.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_3_12 = df_8_3_12.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
item_similarity(df_8_features_3_1, df_8_3_1)
item_similarity(df_8_features_3_2, df_8_3_2)
item_similarity(df_8_features_3_3, df_8_3_3)
item_similarity(df_8_features_3_4, df_8_3_4)
item_similarity(df_8_features_3_5, df_8_3_5)
item_similarity(df_8_features_3_6, df_8_3_6)
item_similarity(df_8_features_3_7, df_8_3_7)
item_similarity(df_8_features_3_8, df_8_3_8)
item_similarity(df_8_features_3_9, df_8_3_9)
item_similarity(df_8_features_3_10, df_8_3_10)
item_similarity(df_8_features_3_11, df_8_3_11)
item_similarity(df_8_features_3_12, df_8_3_12)

df_8_3_1 (Item) RMSE: nan
df_8_3_2 (Item) RMSE: nan
df_8_3_3 (Item) RMSE: nan
df_8_3_4 (Item) RMSE: nan
df_8_3_5 (Item) RMSE: nan
df_8_3_6 (Item) RMSE: nan
df_8_3_7 (Item) RMSE: nan
df_8_3_8 (Item) RMSE: nan
df_8_3_9 (Item) RMSE: nan
df_8_3_10 (Item) RMSE: nan
df_8_3_11 (Item) RMSE: nan


In [None]:
df_8_features_4 = df_8_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
item_similarity(df_8_features_4, df_8_4)

df_8_4 (Item) RMSE: 1.782038973902171


In [None]:
df_8_features_5 = df_8_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
item_similarity(df_8_features_5, df_8_5)

df_8_5 (Item) RMSE: 0.0


In [None]:
# df_8_7
df_8_7_1 = np.array_split(df_8_7,6)[0]
df_8_7_2 = np.array_split(df_8_7,6)[1]
df_8_7_3 = np.array_split(df_8_7,6)[2]

df_8_features_7_1 = df_8_7_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_7_2 = df_8_7_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_7_3 = df_8_7_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T

item_similarity(df_8_features_7_1, df_8_7_1)
item_similarity(df_8_features_7_2, df_8_7_2)
item_similarity(df_8_features_7_3, df_8_7_3)

df_8_7_1 (Item) RMSE: nan
df_8_7_2 (Item) RMSE: 0.0
df_8_7_3 (Item) RMSE: 0.0


In [None]:
# df_8_6
df_8_6_1 = np.array_split(df_8_6,9)[0]
df_8_6_2 = np.array_split(df_8_6,9)[1]
df_8_6_3 = np.array_split(df_8_6,9)[2]
df_8_6_4 = np.array_split(df_8_6,9)[3]
df_8_6_5 = np.array_split(df_8_6,9)[4]
df_8_6_6 = np.array_split(df_8_6,9)[5]
df_8_6_7 = np.array_split(df_8_6,9)[6]
df_8_6_8 = np.array_split(df_8_6,9)[7]
df_8_6_9 = np.array_split(df_8_6,9)[8]
df_8_features_6_1 = df_8_6_1.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_2 = df_8_6_2.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_3 = df_8_6_3.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_4 = df_8_6_4.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_5 = df_8_6_5.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_6 = df_8_6_6.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_7 = df_8_6_7.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_8 = df_8_6_8.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
df_8_features_6_9 = df_8_6_9.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating',fill_value=0).T
item_similarity(df_8_features_6_1, df_8_6_1)
item_similarity(df_8_features_6_2, df_8_6_2)
item_similarity(df_8_features_6_3, df_8_6_3)
item_similarity(df_8_features_6_4, df_8_6_4)
item_similarity(df_8_features_6_5, df_8_6_5)
item_similarity(df_8_features_6_6, df_8_6_6)
item_similarity(df_8_features_6_7, df_8_6_7)
item_similarity(df_8_features_6_8, df_8_6_8)
item_similarity(df_8_features_6_9, df_8_6_9)

df_8_6_1 (Item) RMSE: 1.7621188747987002
df_8_6_2 (Item) RMSE: 1.7316403582269422
df_8_6_3 (Item) RMSE: 1.7011068126385376
df_8_6_4 (Item) RMSE: 1.784821358422622
df_8_6_5 (Item) RMSE: 1.6743870475765539
df_8_6_6 (Item) RMSE: 1.739047804367934
df_8_6_7 (Item) RMSE: 1.6871796520294329
df_8_6_8 (Item) RMSE: 1.6089326889475632
df_8_6_9 (Item) RMSE: 1.4376458046772116


### SVD + Cross validation

In [None]:
list = [df_8_0,df_8_1,df_8_2,df_8_3,df_8_4,df_8_5,df_8_6,df_8_7]
for i in list:
  print(get_df_name(i),": ",i.shape)

df_8_0 :  (184337, 14)
df_8_1 :  (197017, 14)
df_8_2 :  (172700, 14)
df_8_3 :  (242014, 14)
df_8_4 :  (14619, 14)
df_8_5 :  (3789, 14)
df_8_6 :  (179247, 14)
df_8_7 :  (34436, 14)


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_0[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

0.636167644443412


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_1[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

0.26722810121204543


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_2[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

0.2953232554476546


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_3[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

1.0


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_4[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

0.6424460338480628


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_5[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

0.5821109379650294


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_6[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

0.28864342170660756


In [None]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_8_7[["User-ID", "ISBN", "Book-Rating"]], reader)
from surprise.model_selection import GridSearchCV

param_grid = {
    "n_epochs": [1, 5, 10, 15, 20],
    "lr_all": [0.002, 0.005, 0.009, 0.01, 0.05],
    "reg_all": [0.2, 0.4, 0.6, 1]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])

0.661831499518203
