<a href="https://colab.research.google.com/github/Hdhsjfjdsj/Hdhsjfjdsj/blob/main/book_recommendation_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt


In [None]:
# Download and extract the dataset
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
!unzip book-crossings.zip
# Define filenames for the data
books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

--2024-08-22 15:26:17--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 172.67.70.149, 104.26.3.33, 104.26.2.33, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|172.67.70.149|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip.1’


2024-08-22 15:26:17 (122 MB/s) - ‘book-crossings.zip.1’ saved [26085508/26085508]

Archive:  book-crossings.zip
replace BX-Book-Ratings.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
# Load the books dataset
df_books = pd.read_csv(
    books_filename,
    encoding="ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'}
)

# Load the ratings dataset
df_ratings = pd.read_csv(
    ratings_filename,
    encoding="ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'}
)

In [None]:
# Merge the books and ratings data
combined_df = pd.merge(df_ratings, df_books, on='isbn')

# Filter users with less than 200 ratings and books with less than 100 ratings
user_counts = combined_df['user'].value_counts()
combined_df = combined_df[combined_df['user'].isin(user_counts[user_counts >= 200].index)]

book_counts = combined_df['isbn'].value_counts()
combined_df = combined_df[combined_df['isbn'].isin(book_counts[book_counts >= 100].index)]

# Create a pivot table of ratings
pivot_df = combined_df.pivot_table(index='title', columns='user', values='rating').fillna(0)

# Convert the pivot table to a sparse matrix
rating_matrix = csr_matrix(pivot_df.values)



In [None]:
# Initialize the NearestNeighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(rating_matrix)



In [None]:
def get_recommends(book=""):
    """
    Get book recommendations based on a given book title.

    Parameters:
    book (str): The title of the book for which recommendations are to be made.

    Returns:
    list: A list containing the input book title and a list of recommended books with distances.
    """
    # Find the index of the book title
    idx = pivot_df.index.get_loc(book)

    # Use the KNN model to find nearest neighbors
    distances, indices = model_knn.kneighbors(rating_matrix[idx], n_neighbors=6)

    # Initialize list to store the recommendations
    recommendations = []

    # The first element is the input book
    recommendations.append(book)

    # The second element is a list of recommended books with distances
    recommended_books = []
    for i in range(1, len(distances.flatten())):
        recommended_books.append([pivot_df.index[indices.flatten()[i]], distances.flatten()[i]])

    recommendations.append(recommended_books)

    return recommendations



In [None]:
# Input the book title from the user
book_title = input("Enter a book title for recommendations: ")

# Get and display the recommendations
recommendations = get_recommends(book_title)

# Display the recommendations
print(f"\nRecommendations for '{recommendations[0]}':")
for rec in recommendations[1]:
    print(f"Book: {rec[0]}, Distance: {rec[1]:.4f}")
