## **Mecanismo de recomendaÃ§Ã£o de livros usando KNN**

Importando Bibliotecas

In [None]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

Pegando os dados

In [None]:
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

--2023-12-27 14:56:08--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 172.67.70.149, 104.26.2.33, 104.26.3.33, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|172.67.70.149|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: â€˜book-crossings.zipâ€™


2023-12-27 14:56:09 (54.7 MB/s) - â€˜book-crossings.zipâ€™ saved [26085508/26085508]

Archive:  book-crossings.zip
  inflating: BX-Book-Ratings.csv     
  inflating: BX-Books.csv            
  inflating: BX-Users.csv            


Importando os dados csv em dataframes

In [None]:
#DataFrame Books
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'titulo', 'autor'],
    usecols=['isbn', 'titulo', 'autor'],
    dtype={'isbn': 'str', 'titulo': 'str', 'autor': 'str'})

df_books.head(5)

Unnamed: 0,isbn,titulo,autor
0,195153448,Classical Mythology,Mark P. O. Morford
1,2005018,Clara Callan,Richard Bruce Wright
2,60973129,Decision in Normandy,Carlo D'Este
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata
4,393045218,The Mummies of Urumchi,E. J. W. Barber


In [None]:
#Dataframe Rating
df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['usuario', 'isbn', 'nota'],
    usecols=['usuario', 'isbn', 'nota'],
    dtype={'usuario': 'int32', 'isbn': 'str', 'nota': 'float32'})

df_ratings.head(5)

Unnamed: 0,usuario,isbn,nota
0,276725,034545104X,0.0
1,276726,0155061224,5.0
2,276727,0446520802,0.0
3,276729,052165615X,3.0
4,276729,0521795028,6.0


Excluindo linhas de usuÃ¡rios e isbn com ocorrencia menor que 100

In [None]:
qtd_usuarios = df_ratings['usuario'].value_counts()
qtd_isbn = df_ratings['isbn'].value_counts()

df_ratings = df_ratings[~df_ratings['usuario'].isin(qtd_usuarios[qtd_usuarios < 200].index)]
df_ratings = df_ratings[~df_ratings['isbn'].isin(qtd_isbn[qtd_isbn < 100].index)]

Juncao entra os dois Dataframes

In [None]:
df = pd.merge(right=df_ratings, left = df_books, on="isbn")

Removendo dados duplicados

In [None]:
df = df.drop_duplicates(['titulo', 'usuario'])

CriaÃ§Ã£o de uma tabela dinamica com substituicao de alores NaN por 0

In [None]:
piv = df.pivot(index='titulo', columns='usuario', values='nota').fillna(0)

ExtraÃ§Ã£o dos valores em uma matriz

In [None]:
matrix = piv.values

Criancao do modelo

In [None]:
model=NearestNeighbors(metric='cosine',algorithm='brute')
model.fit(matrix)

Criacao da funcao para recomendaÃ§Ã£o dos livros

In [None]:
def get_recommends(livro):

  #linha com todas as notas do livro
  linha_livro = piv.loc[livro].array.reshape(1, -1)
  #print(pd.DataFrame(linha_livro))

  #utilizaÃ§ao do modelo para encontrar os vizinhos mais proximos, suas distancia e indice
  distancias, indices = model.kneighbors(linha_livro, n_neighbors=6)
  #print(distancias)
  #print(indices)

  #itera sobre a distancia o indices dos livros, verificando se ele nÃ£o Ã© zero, evitando o proprio livro
  lista_livros=[]
  for distancia, indice in zip(distancias[0],indices[0]):
    if distancia!=0:
      livro_=piv.index[indice]
      lista_livros.append([livro_, distancia])

  return [livro, lista_livros[::-1]]

In [None]:
print(get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))"))

["Where the Heart Is (Oprah's Book Club (Paperback))", [["I'll Be Seeing You", 0.8016211], ['The Weight of Water', 0.77085835], ['The Surgeon', 0.7699411], ['I Know This Much Is True', 0.7677075], ['The Lovely Bones: A Novel', 0.7234864]]]


AvaliaÃ§Ã£o do modelo

In [None]:
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
print(books)

def test_book_recommendation():
  test_pass = True
  recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
  if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
    test_pass = False
  recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
  recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
  for i in range(2):
    if recommends[1][i][0] not in recommended_books:
      test_pass = False
    if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
      test_pass = False
  if test_pass:
    print("You passed the challenge! ðŸŽ‰ðŸŽ‰ðŸŽ‰ðŸŽ‰ðŸŽ‰")
  else:
    print("You haven't passed yet. Keep trying!")

test_book_recommendation()

["Where the Heart Is (Oprah's Book Club (Paperback))", [["I'll Be Seeing You", 0.8016211], ['The Weight of Water', 0.77085835], ['The Surgeon', 0.7699411], ['I Know This Much Is True', 0.7677075], ['The Lovely Bones: A Novel', 0.7234864]]]
You passed the challenge! ðŸŽ‰ðŸŽ‰ðŸŽ‰ðŸŽ‰ðŸŽ‰
