In [13]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import requests
import zipfile
import os

In [40]:
# Descargamos el dataset usando requests, zipfile y os
url = 'https://cdn.freecodecamp.org/project-data/books/book-crossings.zip'
zip_path = 'book-crossings.zip'

response = requests.get(url)
if response.status_code == 200:
    with open(zip_path, 'wb') as file:
        file.write(response.content)
else:
    print(f"Failed to download the file: {response.status_code}")

# Descomprimimos el archivo
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall()

# Eliminamos el archivo comprimido
os.remove(zip_path)

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

In [51]:
# Creamos los dataframes de libros y ratings
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [42]:
df_ratings

Unnamed: 0,user,isbn,rating
0,276725,034545104X,0.0
1,276726,0155061224,5.0
2,276727,0446520802,0.0
3,276729,052165615X,3.0
4,276729,0521795028,6.0
...,...,...,...
1149775,276704,1563526298,9.0
1149776,276706,0679447156,0.0
1149777,276709,0515107662,10.0
1149778,276721,0590442449,10.0


In [43]:
df_books

Unnamed: 0,isbn,title,author
0,0195153448,Classical Mythology,Mark P. O. Morford
1,0002005018,Clara Callan,Richard Bruce Wright
2,0060973129,Decision in Normandy,Carlo D'Este
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata
4,0393045218,The Mummies of Urumchi,E. J. W. Barber
...,...,...,...
271374,0440400988,There's a Bat in Bunk Five,Paula Danziger
271375,0525447644,From One to One Hundred,Teri Sloat
271376,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker
271377,0192126040,Republic (World's Classics),Plato


In [52]:
# Filtramos por usuarios que hayan hecho reseña de al menos 25 libros
user_counts = df_ratings['user'].value_counts()
df_ratings = df_ratings[df_ratings['user'].isin(user_counts[user_counts >= 25].index)]

# Filtramos por libros que al menos tengan 10 reseñas
book_counts = df_ratings['isbn'].value_counts()
df_ratings = df_ratings[df_ratings['isbn'].isin(book_counts[book_counts >= 10].index)]

# Unimos los dataframes de ratings y libros
df_ratings = df_ratings.merge(df_books, on='isbn')
pivot_table = df_ratings.pivot_table(index='title', columns='user', values='rating').fillna(0)

# Inicializamos el modelo con 'coseno' como distancia y 'brute' como algoritmo
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(pivot_table.values)

In [53]:
pivot_table

user,242,243,254,383,446,487,503,507,626,638,...,278188,278194,278202,278221,278356,278418,278535,278582,278633,278843
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Murder of a Sleeping Beauty (Scumble River Mysteries (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Q-Space (Star Trek The Next Generation, Book 47)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Q-Zone (Star Trek The Next Generation, Book 48)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"\O\"" Is for Outlaw""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0
"\Surely You're Joking, Mr. Feynman!\"": Adventures of a Curious Character""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
one hundred years of solitude,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [57]:
def get_recommends(book=""):
    # 1º verificamos si el libro esta en la tabla pivote
    if book not in pivot_table.index:
        print(f"El libro '{book}' no se encuentra en la tabla pivote.")
        return [book, []]
    
    # Usamos el modelo para encontrar los libros más similares.
    distances, indices = model.kneighbors(pivot_table.loc[book, :].values.reshape(1, -1), n_neighbors=6)
    
    # se crea una lista de los seis libros incluyendo el dado por el usuario
    recommended_books = []
    for i in range(1, len(distances.flatten())):
        recommended_books.append([pivot_table.index[indices.flatten()[i]], distances.flatten()[i]])
    return [book, recommended_books]

# Llamamos a la función con un libro específico
books = get_recommends("'Salem's Lot")

# Imprimimos los libros recomendados de forma legible con su puntuación.
print(f"Recomendaciones para '{books[0]}':")
for book, score in books[1]:
    print(f"- {book} (Score: {score})")



Recomendaciones para ''Salem's Lot':
- The Matarese Circle (Score: 0.7040992379188538)
- Birds of Prey (Score: 0.7040992379188538)
- DARK ANGEL (Score: 0.7346041798591614)
- Extreme Denial (Score: 0.749720573425293)
- Legacy (Score: 0.758175253868103)


In [58]:
def get_recommends(book=""):
    # 1º verificamos si el libro esta en la tabla pivote
    if book not in pivot_table.index:
        print(f"El libro '{book}' no se encuentra en la tabla pivote.")
        return [book, []]
    
    # Usamos el modelo para encontrar los libros más similares.
    distances, indices = model.kneighbors(pivot_table.loc[book, :].values.reshape(1, -1), n_neighbors=6)
    
    # se crea una lista de los seis libros incluyendo el dado por el usuario
    recommended_books = []
    for i in range(1, len(distances.flatten())):
        recommended_books.append([pivot_table.index[indices.flatten()[i]], distances.flatten()[i]])
    return [book, recommended_books]

# Llamamos a la función con un libro específico
books = get_recommends("one hundred years of solitude")

# Imprimimos los libros recomendados de forma legible con su puntuación.
print(f"Recomendaciones para '{books[0]}':")
for book, score in books[1]:
    print(f"- {book} (Score: {score})")

Recomendaciones para 'one hundred years of solitude':
- A Tale of Two Cities (Wordsworth Classics) (Score: 0.4236096143722534)
- Mister Sandman (Harvest Book) (Score: 0.4844607710838318)
- The Floating Girl (Score: 0.5332275629043579)
- Long Day's Journey into Night (Score: 0.5388876795768738)
- APOLOGIZING TO DOGS (Score: 0.5715724229812622)


In [59]:
def get_recommends(book=""):
    # 1º verificamos si el libro esta en la tabla pivote
    if book not in pivot_table.index:
        print(f"El libro '{book}' no se encuentra en la tabla pivote.")
        return [book, []]
    
    # Usamos el modelo para encontrar los libros más similares.
    distances, indices = model.kneighbors(pivot_table.loc[book, :].values.reshape(1, -1), n_neighbors=6)
    
    # se crea una lista de los seis libros incluyendo el dado por el usuario
    recommended_books = []
    for i in range(1, len(distances.flatten())):
        recommended_books.append([pivot_table.index[indices.flatten()[i]], distances.flatten()[i]])
    return [book, recommended_books]

# Llamamos a la función con un libro específico
books = get_recommends("stardust")

# Imprimimos los libros recomendados de forma legible con su puntuación.
print(f"Recomendaciones para '{books[0]}':")
for book, score in books[1]:
    print(f"- {book} (Score: {score})")

Recomendaciones para 'stardust':
- Miss Smilla's Feeling for Snow (Score: 0.7389868497848511)
- The Floating Girl (Score: 0.7404463291168213)
- The Stargazey (Richard Jury Mystery Series/Martha Grimes) (Score: 0.7411978244781494)
- Smoke and Mirrors: Short Fictions and Illusions (Score: 0.7420641183853149)
- Sisterchicks on the Loose (Sisterchicks) (Score: 0.7487606406211853)
