<a href="https://colab.research.google.com/github/OnyangoOmondie97/book_recommendation/blob/main/Book_recomendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# Loading the dataset
books = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv")
ratings = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv")

# Merging the datasets
df = pd.merge(ratings, books, on='book_id')

# Removing users with less than 200 ratings and books with less than 100 ratings
user_counts = df['user_id'].value_counts()
book_counts = df['book_id'].value_counts()
df = df[df['user_id'].isin(user_counts[user_counts >= 200].index)]
df = df[df['book_id'].isin(book_counts[book_counts >= 100].index)]


In [None]:
# Cell 2:
# Creating a pivot table for the user-item matrix
pivot_table = df.pivot_table(index='book_id', columns='user_id', values='rating', fill_value=0)

# Addind the 'title' column to the pivot_table
pivot_table['title'] = books.set_index('book_id')['title']

# Creating and fitting the Nearest Neighbors model
knn = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='cosine')
knn.fit(pivot_table.drop('title', axis=1).values)


In [None]:
# Cell 3:
def get_recommends(book_title):
    # Checking if the book title is in the dataset
    if book_title not in pivot_table['title'].values:
        return f"Book '{book_title}' not found in the dataset."

    # Finding the index of the book in the pivot table
    book_index = pivot_table.index[pivot_table['title'] == book_title].tolist()[0]

    # Get the distances and indices of the nearest neighbors
    distances, indices = knn.kneighbors(pivot_table.drop('title', axis=1).iloc[book_index, :].values.reshape(1, -1), n_neighbors=6)

    # Creating a list of recommended books with their distances
    recommended_books = [['', 0] for _ in range(5)]
    for i in range(1, len(distances.flatten())):
        recommended_books[i - 1] = [pivot_table['title'].iloc[indices.flatten()[i]], distances.flatten()[i]]

    return [book_title, recommended_books]


In [None]:
# Cell 4:
# Testing the function
get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")


"Book 'The Queen of the Damned (Vampire Chronicles (Paperback))' not found in the dataset."