In [1]:
import pandas as pd # data processing
import numpy as np # linear algebra
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the datasets
bk = pd.read_csv("/content/Books.csv", sep=",",error_bad_lines=False)
rt = pd.read_csv("/content/Ratings.csv")
user = pd.read_csv("/content/Users.csv")

In [3]:
# Merge the datasets based on common columns
merged_df = bk.merge(rt, on='ISBN', how='inner')
merged_df = merged_df.merge(user, on='User-ID', how='inner')

In [4]:
columns_to_drop = ['Image-URL-M', 'Image-URL-L', 'Location', 'Age']
merged_df = merged_df.drop(columns=columns_to_drop)

In [5]:
pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/772.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/772.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3156234 sha256=ef0f4143411de220fcd7be832b576fc2a66e611100b5e6c1e6c4bcb747f142da
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-

In [6]:
print(merged_df.head())

         ISBN                                         Book-Title  \
0  0195153448                                Classical Mythology   
1  0002005018                                       Clara Callan   
2  0060973129                               Decision in Normandy   
3  0374157065  Flu: The Story of the Great Influenza Pandemic...   
4  0393045218                             The Mummies of Urumchi   

            Book-Author  Year-Of-Publication                   Publisher  \
0    Mark P. O. Morford                 2002     Oxford University Press   
1  Richard Bruce Wright                 2001       HarperFlamingo Canada   
2          Carlo D'Este                 1991             HarperPerennial   
3      Gina Bari Kolata                 1999        Farrar Straus Giroux   
4       E. J. W. Barber                 1999  W. W. Norton &amp; Company   

                                         Image-URL-S  User-ID  Book-Rating  
0  http://images.amazon.com/images/P/0195153448.0...     

In [7]:
# Calculate popularity scores for each book (e.g., based on average ratings or number of ratings)
popularity_scores = merged_df.groupby('User-ID')['Book-Rating'].mean().reset_index()
popularity_scores.columns = ['User-ID', 'popularity_score']

In [8]:
# Sort books by popularity score in descending order
popularity_scores = popularity_scores.sort_values(by='popularity_score', ascending=False)

In [9]:
# Top-N recommendation evaluation using your entire dataset
N = 10  # Number of top recommendations to evaluate

In [10]:
# Calculate precision and recall for the top-N recommended books
def evaluate_popular_filtering(top_recommendations, data):
    relevant_books = set(data['User-ID'])
    recommended_books = set(top_recommendations['User-ID'][:N])

    # Calculate precision and recall
    precision = len(recommended_books.intersection(relevant_books)) / N
    recall = len(recommended_books.intersection(relevant_books)) / len(relevant_books)

    return precision, recall

precision, recall = evaluate_popular_filtering(popularity_scores, merged_df)
print(f"Precision: {precision}, Recall: {recall}")

Precision: 1.0, Recall: 0.00011690300557627337


Collaborative Filtering evaluation Recommender System

In [11]:
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import SVD  # Example collaborative filtering algorithm

In [12]:
# Create a Reader object specifying the rating scale
reader = Reader(rating_scale=(1, 5))

In [18]:
merged_df.head(2)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,User-ID,Book-Rating
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,2,0
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,8,5


In [20]:
# Load the Pandas DataFrame into a Surprise Dataset
data = Dataset.load_from_df(merged_df[['User-ID', 'ISBN', 'Book-Rating']], reader)


In [21]:
# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [22]:
# Build the full training set (convert DatasetAutoFolds to Dataset)
trainset = data.build_full_trainset()

In [23]:
# Train a collaborative filtering model (SVD in this case)
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fbc67fd1870>

In [24]:
# Make predictions on the test set
predictions = model.test(testset)

In [25]:
# Calculate RMSE (Root Mean Square Error) as a measure of prediction accuracy
from surprise import accuracy
rmse = accuracy.rmse(predictions)

RMSE: 2.1733


In [26]:
# Calculate MAE (Mean Absolute Error) as another measure of accuracy
mae = accuracy.mae(predictions)

MAE:  1.7539
