In [1]:
# import libraries (you may add additional imports but you may not have to)
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

In [2]:

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

In [3]:
# import csv data into dataframes
books_df = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

ratings_df = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [4]:
# Filtrar usuarios con más de 200 calificaciones
user_counts = ratings_df['user'].value_counts()
filtered_users = user_counts[user_counts > 200].index

# Filtrar libros con más de 100 calificaciones
book_counts = ratings_df['isbn'].value_counts()
filtered_books = book_counts[book_counts > 100].index

# Aplicar los filtros al DataFrame de ratings
ratings_df = ratings_df[ratings_df['user'].isin(filtered_users) & ratings_df['isbn'].isin(filtered_books)]
# Crear la matriz de usuario-libro
user_book_matrix = ratings_df.pivot(index='isbn', columns='user', values='rating').fillna(0)

In [5]:
ratings_df

Unnamed: 0,user,isbn,rating
1456,277427,002542730X,10.0
1469,277427,0060930535,0.0
1471,277427,0060934417,0.0
1474,277427,0061009059,9.0
1484,277427,0140067477,0.0
...,...,...,...
1147304,275970,0804111359,0.0
1147436,275970,140003065X,0.0
1147439,275970,1400031346,0.0
1147440,275970,1400031354,0.0


In [6]:
user_book_matrix

user,254,2276,2766,2977,3363,4017,4385,6242,6251,6323,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
isbn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
002542730X,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0
0060008032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0060096195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
006016848X,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0060173289,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1573227331,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1573229326,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1573229571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1592400876,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Entrenar el modelo K-NN
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_book_matrix)  

In [19]:
def get_recommends(book_title, books_df, model_knn, user_book_matrix):
    # Verificar que el libro existe en el DataFrame de libros
    if book_title not in books_df['title'].values:
        return f"Book title '{book_title}' not found in the dataset."
    
    # Obtener el ISBN del libro dado su título
    book_isbn = books_df[books_df['title'] == book_title]['isbn'].values[0]
    
    # Verificar que el ISBN esté en la matriz de usuario-libro
    if book_isbn not in user_book_matrix.index:
        return f"Book ISBN '{book_isbn}' not found in the user-book matrix."
    
    # Encontrar el índice del libro en la matriz
    book_index = user_book_matrix.index.get_loc(book_isbn)
    
    # Utilizar K-NN para encontrar los libros más similares
    distances, indices = model_knn.kneighbors(user_book_matrix.iloc[book_index].values.reshape(1, -1), n_neighbors=6)
    
    # Obtener los libros recomendados y sus distancias
    recommended_books = []
    for i in range(1, len(indices[0])):
        recommended_book_index = indices[0][i]
        recommended_book_isbn = user_book_matrix.index[recommended_book_index]
        recommended_book_title = books_df[books_df['isbn'] == recommended_book_isbn]['title'].values[0]
        distance = distances[0][i]
        recommended_books.append([recommended_book_title, distance])
    
    # Ordenar las recomendaciones por distancia
    recommended_books = sorted(recommended_books, key=lambda x: x[1], reverse=True)
    
    return [book_title, recommended_books]

In [20]:
# Ejemplo de uso de la función
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))", books_df, model_knn, user_book_matrix)
print(books)

# Función de prueba
def test_book_recommendation():
    test_pass = True
    recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))", books_df, model_knn, user_book_matrix)
    if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
        test_pass = False
    recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
    recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
    for i in range(2):  # Cambiado a 4 para verificar todas las recomendaciones proporcionadas
        if recommends[1][i][0] not in recommended_books:
            test_pass = False
        if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
            test_pass = False
    if test_pass:
        print("You passed the challenge! 🎉🎉🎉🎉🎉")
    else:
        print("You haven't passed yet. Keep trying!")

test_book_recommendation()

["Where the Heart Is (Oprah's Book Club (Paperback))", [["I'll Be Seeing You", 0.8016211], ['The Weight of Water', 0.77085835], ['The Surgeon', 0.7699411], ['I Know This Much Is True', 0.7677075], ['The Lovely Bones: A Novel', 0.7230184]]]
You passed the challenge! 🎉🎉🎉🎉🎉
