##Collaborative filtering

In [1]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Define file paths for the data
ratings_path = "Ratings.csv"

In [3]:
# Load the ratings data into a DataFrame
ratings_df = pd.read_csv(ratings_path)

# Initialize the reader
reader = Reader(rating_scale=(1, 5))

# Load the dataset
data = Dataset.load_from_df(ratings_df[['user_id', 'book_id', 'rating']], reader)

# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train the model using Singular Value Decomposition (SVD)
model = SVD()
model.fit(trainset)

# Make predictions on the testing set
predictions = model.test(testset)

# Compute the evaluation metrics
ground_truth = [pred.r_ui for pred in predictions]  # actual ratings
predicted_ratings = [pred.est for pred in predictions]  # predicted ratings

# Convert the ratings to binary classes
ground_truth_classes = [int(r >= 3) for r in ground_truth]
predicted_classes = [int(pred >= 3) for pred in predicted_ratings]

In [4]:
# Compute the confusion matrix
cm = confusion_matrix(ground_truth_classes, predicted_classes)

print(f"Confusion Matrix:\n{cm}")

Confusion Matrix:
[[  3523  12915]
 [  6007 173907]]


In [5]:
# Compute evaluation metrics
precision = precision_score(ground_truth_classes, predicted_classes)
recall = recall_score(ground_truth_classes, predicted_classes)
f1 = f1_score(ground_truth_classes, predicted_classes)

# Print the evaluation metrics
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Precision: 0.93
Recall: 0.97
F1-score: 0.95
