In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

# Load the datasets
# Assuming you have the Book-Crossing dataset as three CSV files:
# 'books.csv', 'ratings.csv', and 'users.csv'
books = pd.read_csv('books.csv', encoding='latin-1')
ratings = pd.read_csv('ratings.csv', encoding='latin-1')
users = pd.read_csv('users.csv', encoding='latin-1')

# Display the first few rows of each dataset to understand the structure
print(books.head())
print(ratings.head())
print(users.head())


In [None]:
# Step 1: Filter out users with fewer than 200 ratings
user_rating_count = ratings.groupby('user_id').size()
users_to_keep = user_rating_count[user_rating_count >= 200].index
filtered_ratings = ratings[ratings['user_id'].isin(users_to_keep)]

# Step 2: Filter out books with fewer than 100 ratings
book_rating_count = filtered_ratings.groupby('book_id').size()
books_to_keep = book_rating_count[book_rating_count >= 100].index
filtered_ratings = filtered_ratings[filtered_ratings['book_id'].isin(books_to_keep)]

# Check the new shape of the dataset after filtering
print(f"Filtered ratings data: {filtered_ratings.shape}")


In [None]:
# import csv data into dataframes
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [None]:
# Create the user-item matrix (ratings matrix)
user_item_matrix = filtered_ratings.pivot_table(index='user_id', columns='book_id', values='rating')

# Fill NaN values with 0 (assuming that un-rated books are treated as not rated)
user_item_matrix = user_item_matrix.fillna(0)

# Check the shape of the matrix
print(user_item_matrix.shape)


In [None]:
# Step 4: Use K-Nearest Neighbors to find similar books
knn = NearestNeighbors(n_neighbors=6, algorithm='auto', metric='cosine')
knn.fit(user_item_matrix.T)  # Transpose the matrix so that we are comparing books

# Function to get recommendations based on a book title
def get_recommends(book_title):
    # Get the book_id for the given title
    book_id = books[books['title'] == book_title].iloc[0]['book_id']

    # Find the nearest neighbors of the given book_id
    distances, indices = knn.kneighbors(user_item_matrix[book_id].values.reshape(1, -1))

    # Get the recommended book titles and their distances
    recommended_books = []
    for i in range(1, len(indices[0])):
        recommended_book_title = books[books['book_id'] == user_item_matrix.columns[indices[0][i]]]['title'].values[0]
        recommended_books.append([recommended_book_title, distances[0][i]])

    return [book_title, recommended_books]


In [None]:
# Test the function with a sample book title
recommended_books = get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(recommended_books)
