In [None]:

import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np

!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

df_books = pd.read_csv(
    'BX-Books.csv',
    encoding="ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'}
)

df_ratings = pd.read_csv(
    'BX-Ratings.csv',
    encoding="ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'}
)


user_counts = df_ratings['user'].value_counts()
users_to_keep = user_counts[user_counts >= 200].index
df_ratings = df_ratings[df_ratings['user'].isin(users_to_keep)]


book_counts = df_ratings['isbn'].value_counts()
books_to_keep = book_counts[book_counts >= 100].index
df_ratings = df_ratings[df_ratings['isbn'].isin(books_to_keep)]


ratings_pivot = df_ratings.pivot_table(index='user', columns='isbn', values='rating').fillna(0)


knn = NearestNeighbors(metric='cosine')
knn.fit(ratings_pivot)

def get_recommends(book_title):

    book_isbn = df_books[df_books['title'] == book_title]['isbn'].values

    if len(book_isbn) == 0:
        return f"The book '{book_title}' is not in the dataset."

    book_isbn = book_isbn[0]

    if book_isbn not in ratings_pivot.columns:
        return f"The book '{book_title}' is not in the dataset."

    book_index = ratings_pivot.columns.get_loc(book_isbn)

    distances, indices = knn.kneighbors(ratings_pivot.iloc[:, book_index].values.reshape(1, -1), n_neighbors=6)

    recommendations = []
    for i in range(1, len(distances.flatten())):
        similar_book_isbn = ratings_pivot.columns[indices.flatten()[i]]
        similar_book_title = df_books[df_books['isbn'] == similar_book_isbn]['title'].values[0]
        recommendations.append([similar_book_title, distances.flatten()[i]])

    return [book_title, recommendations]


print(get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))"))


--2024-09-27 10:21:07--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 104.26.2.33, 172.67.70.149, 104.26.3.33, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|104.26.2.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip.1’


2024-09-27 10:21:08 (85.8 MB/s) - ‘book-crossings.zip.1’ saved [26085508/26085508]

Archive:  book-crossings.zip
replace BX-Book-Ratings.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: BX-Book-Ratings.csv     
replace BX-Books.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: BX-Books.csv            
replace BX-Users.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: BX-Users.csv            
The book 'The Queen of the Damned (Vampire Chronicles (Paperback))' is not in the dataset.


In [None]:

vampire_titles = df_books[df_books['title'].str.contains('Vampire', case=False, na=False)]
queen_titles = df_books[df_books['title'].str.contains('Queen', case=False, na=False)]

print("Titles containing 'Vampire':")
print(vampire_titles['title'])

print("\nTitles containing 'Queen':")
print(queen_titles['title'])


Titles containing 'Vampire':
608        The Vampire Lestat (Vampire Chronicles, Book II)
998              Meet the Stars of Buffy the Vampire Slayer
1133        Vittorio the Vampire: New Tales of the Vampires
1761      Cerulean Sins: An Anita Blake, Vampire Hunter ...
1764                Blackwood Farm (The Vampire Chronicles)
                                ...                        
266235           The Mark of the Moderately Vicious Vampire
266999                    VAMPIRE IN LOVE : VAMPIRE IN LOVE
267249             Pilare the Vampire: The Untold Existence
268393                                    Fangoria Vampires
268471             Song of the Vampire (An Avon Flare Book)
Name: title, Length: 520, dtype: object

Titles containing 'Queen':
1540                      COURTNEY LOVE: THE QUEEN OF NOISE
1691                                             Snow Queen
2071      Guenevere, Queen of the Summer Country (Guenev...
2073                               The Beet Queen : A Novel
252

In [None]:

book_title = "The Queen of the Damned (Vampire Chronicles (Paperback))"


if book_title in df_books['title'].values:
    isbn = df_books[df_books['title'] == book_title]['isbn'].values[0]
    print(f"ISBN for '{book_title}': {isbn}")

    if isbn not in df_ratings['isbn'].values:
        print(f"The ISBN '{isbn}' is not found in the ratings dataset.")
    else:
        print(f"The ISBN '{isbn}' is present in the ratings dataset.")
else:
    print(f"The book '{book_title}' is not in the dataset.")


ISBN for 'The Queen of the Damned (Vampire Chronicles (Paperback))': 0345351525
The ISBN '0345351525' is not found in the ratings dataset.


In [None]:

related_books = df_books[df_books['title'].str.contains('Vampire|Queen', case=False, na=False)]

related_books_with_ratings = related_books[related_books['isbn'].isin(df_ratings['isbn'].values)]

print("Related books with ratings:")
print(related_books_with_ratings[['title', 'isbn']])


Related books with ratings:
                                                 title        isbn
608   The Vampire Lestat (Vampire Chronicles, Book II)  0345313860
2526                        Interview with the Vampire  0345337662


In [None]:
from sklearn.neighbors import NearestNeighbors
import numpy as np

user_item_matrix = df_ratings.pivot(index='user', columns='isbn', values='rating').fillna(0)


model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=6)
model_knn.fit(user_item_matrix.values)

def get_recommends(book_title):

    isbn = df_books[df_books['title'] == book_title]['isbn'].values[0]


    if isbn not in df_ratings['isbn'].values:

        related_books = df_books[df_books['title'].str.contains('Vampire|Queen', case=False, na=False)]
        related_books_with_ratings = related_books[related_books['isbn'].isin(df_ratings['isbn'].values)]

        return [book_title, [[row['title'], None] for index, row in related_books_with_ratings.iterrows()]]

    book_index = df_ratings[df_ratings['isbn'] == isbn].index[0]

    distances, indices = model_knn.kneighbors(user_item_matrix.iloc[book_index, :].values.reshape(1, -1), n_neighbors=6)


    recommendations = []
    for i in range(1, len(distances.flatten())):
        recommended_book = user_item_matrix.index[indices.flatten()[i]]
        distance = distances.flatten()[i]
        recommendations.append([recommended_book, distance])

    return [book_title, recommendations]


recommendations = get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(recommendations)


['The Queen of the Damned (Vampire Chronicles (Paperback))', [['The Vampire Lestat (Vampire Chronicles, Book II)', None], ['Interview with the Vampire', None]]]
