 --- Section 1: Import Required Libraries ---


In [77]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt


 --- Section 2: Load and Preprocess Data ---


In [78]:
# Load the datasets
df_books = pd.read_csv('C://Users//theki//Desktop//topics project//Books.csv')
df_ratings = pd.read_csv('C://Users//theki//Desktop//topics project//Ratings.csv')

# Retain only relevant columns from the books dataset
df_books = df_books[['ISBN', 'Book-Title', 'Book-Author']]
df_books.head()



Unnamed: 0,ISBN,Book-Title,Book-Author
0,195153448,Classical Mythology,Mark P. O. Morford
1,2005018,Clara Callan,Richard Bruce Wright
2,60973129,Decision in Normandy,Carlo D'Este
3,374157065,Flu: The Story of the Great Influenza Pandemic of 1918 and the Search for the Virus That Caused It,Gina Bari Kolata
4,393045218,The Mummies of Urumchi,E. J. W. Barber


In [79]:
# Check for missing values and remove them from the books dataset
df_books.isnull().sum()
df_books.dropna(inplace=True)


In [80]:
# Check the shapes of the datasets
print("Shape of books dataset:", df_books.shape)
print("Shape of ratings dataset:", df_ratings.shape)

Shape of books dataset: (271358, 3)
Shape of ratings dataset: (1149780, 3)


In [81]:
df_ratings = pd.read_csv('C://Users//theki//Desktop//topics project//Ratings.csv')
df_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


 --- Section 3: Filter Infrequent Users and Books ---

In [82]:
# Filter users who have given less than 200 ratings
user_ratings_count = df_ratings['User-ID'].value_counts()
users_to_remove = user_ratings_count[user_ratings_count < 200].index
df_ratings_rm = df_ratings[~df_ratings['User-ID'].isin(users_to_remove)]
print("Shape after removing users with less than 200 ratings:", df_ratings_rm.shape)

# Filter books that have received less than 100 ratings
book_ratings_count = df_ratings['ISBN'].value_counts()
books_to_remove = book_ratings_count[book_ratings_count < 100].index
df_ratings_rm = df_ratings_rm[~df_ratings_rm['ISBN'].isin(books_to_remove)]
print("Shape after removing books with less than 100 ratings:", df_ratings_rm.shape)


Shape after removing users with less than 200 ratings: (527556, 3)
Shape after removing books with less than 100 ratings: (49781, 3)


 --- Section 4: Verify Specific Books in Filtered Dataset ---


In [83]:
books_to_check = [
    "Where the Heart Is (Oprah's Book Club (Paperback))",
    "I'll Be Seeing You",
    "The Weight of Water",
    "The Surgeon",
    "I Know This Much Is True"
]

for book in books_to_check:
    count = df_ratings_rm['ISBN'].isin(df_books[df_books['Book-Title'] == book]['ISBN']).sum()
    print(f"Ratings for '{book}': {count}")

Ratings for 'Where the Heart Is (Oprah's Book Club (Paperback))': 183
Ratings for 'I'll Be Seeing You': 75
Ratings for 'The Weight of Water': 49
Ratings for 'The Surgeon': 57
Ratings for 'I Know This Much Is True': 77


 --- Section 5: Create User-Item Matrix ---


In [84]:
# Create a user-item matrix and fill missing values with 0
df = df_ratings_rm.pivot_table(index=['User-ID'], columns=['ISBN'], values='Book-Rating').fillna(0).T

# Replace ISBN with book titles for better readability
df.index = df.join(df_books.set_index('ISBN'))['Book-Title']
df = df.sort_index()

# Verify the user-item matrix for a specific book
sample_book = "The Queen of the Damned (Vampire Chronicles (Paperback))"
print(df.loc[sample_book][:5])


User-ID
254     0.0
2276    0.0
2766    0.0
2977    0.0
3363    0.0
Name: The Queen of the Damned (Vampire Chronicles (Paperback)), dtype: float64


 --- Section 6: Train Nearest Neighbors Model ---


In [85]:
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(df.values)

 --- Section 7: Define Book Recommendation Function ---


In [86]:
def recommend(title=""):
    
    #Get book recommendations based on the provided title.
    
    try:
        book_vector = df.loc[title]
    except KeyError:
        print(f"The book '{title}' does not exist in the dataset.")
        return None

    # Find the 6 nearest neighbors
    distances, indices = model.kneighbors([book_vector.values], n_neighbors=6)

    # Prepare the recommended books DataFrame
    recommended_books = pd.DataFrame({
        'title': df.iloc[indices[0]].index.values,
        'distance': distances[0]
    }).sort_values(by='distance', ascending=False).head(5)

    return [title, recommended_books.values.tolist()]


 --- Section 8: Test the Recommendation System ---


In [87]:
books = recommend("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(f"Recommendations for '{books[0]}':")
for rec in books[1]:
    print(f"- {rec[0]} (Distance: {rec[1]:.2f})")


Recommendations for 'The Queen of the Damned (Vampire Chronicles (Paperback))':
- Catch 22 (Distance: 0.79)
- The Witching Hour (Lives of the Mayfair Witches) (Distance: 0.74)
- Interview with the Vampire (Distance: 0.73)
- The Tale of the Body Thief (Vampire Chronicles (Paperback)) (Distance: 0.54)
- The Vampire Lestat (Vampire Chronicles, Book II) (Distance: 0.52)


In [88]:
books = recommend("Where the Heart Is (Oprah's Book Club (Paperback))")
print(f"Recommendations for '{books[0]}':")
for rec in books[1]:
    print(f"- {rec[0]} (Distance: {rec[1]:.2f})")

Recommendations for 'Where the Heart Is (Oprah's Book Club (Paperback))':
- I'll Be Seeing You (Distance: 0.80)
- The Weight of Water (Distance: 0.77)
- The Surgeon (Distance: 0.77)
- I Know This Much Is True (Distance: 0.77)
- The Lovely Bones: A Novel (Distance: 0.72)


In [89]:
books = recommend("The Surgeon")
print(f"Recommendations for '{books[0]}':")
for rec in books[1]:
    print(f"- {rec[0]} (Distance: {rec[1]:.2f})")

Recommendations for 'The Surgeon':
- Where the Heart Is (Oprah's Book Club (Paperback)) (Distance: 0.77)
- Seinlanguage (Distance: 0.77)
- Manhattan Hunt Club (Distance: 0.77)
- Last Man Standing (Distance: 0.77)
- The Honk and Holler Opening Soon (Distance: 0.76)


In [90]:
books = recommend("The Honk and Holler Opening Soon")
print(f"Recommendations for '{books[0]}':")
for rec in books[1]:
    print(f"- {rec[0]} (Distance: {rec[1]:.2f})")

Recommendations for 'The Honk and Holler Opening Soon':
- A Lesson Before Dying (Vintage Contemporaries (Paperback)) (Distance: 0.76)
- The Virgin Suicides (Distance: 0.76)
- The Sigma Protocol (Distance: 0.75)
- Big Cherry Holler: A Big Stone Gap Novel (Ballantine Reader's Circle) (Distance: 0.74)
- The Woman Next Door (Distance: 0.73)
