<a href="https://colab.research.google.com/github/GeorgeSantos1/Projetos/blob/main/Sistema_de_Recomendacao.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Sistema de Recomendação de Livros

Carlos André , George Anderson

### Carregando Pacotes

"https://www.kaggle.com/datasets/saurabhbagchi/books-dataset/"

In [None]:
# Carregando pacotes
import os
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


### Importando Bases

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
livros = pd.read_csv("/content/drive/MyDrive/books_data/books.csv", sep=";",
                    error_bad_lines=False, encoding="latin-1")
livros.columns = ['ISBN', 'bookTitle', 'bookAuthor', 'yearOfPublication', 'publisher',
                 'imageUrlS', 'imageUrlM', 'imageUrlL']

notas = pd.read_csv('/content/drive/MyDrive/books_data/ratings.csv', sep=';',
                      error_bad_lines=False, encoding="latin-1")
notas.columns = ['userID', 'ISBN', 'bookRating']

### Visualizando Bases

In [None]:
livros

Unnamed: 0,ISBN,bookTitle,bookAuthor,yearOfPublication,publisher
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company
...,...,...,...,...,...
271355,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm)
271356,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books
271357,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco
271358,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press


In [None]:
notas

Unnamed: 0,userID,ISBN,bookRating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [None]:
print(livros.shape)
print(notas.shape)

(271360, 8)
(1149780, 3)


# Modificando As Bases

In [None]:
# Retirando Colunas - Sem informações Úteis
livros.drop(['imageUrlS', 'imageUrlM', 'imageUrlL'],axis=1,inplace=True)

In [None]:
# Retirando Livros ausentes da base "Livros"
notas_novo = notas[notas.ISBN.isin(livros.ISBN)]
notas_novo.shape

(1031136, 3)

In [None]:
# Retirando Observações com nota = 0
notas_novo = notas_novo[notas_novo.bookRating > 0]
notas_novo.shape

(383842, 3)

In [None]:
# Filtrando para livros com pelo menos 30 avaliações
contagem = notas_novo["ISBN"].value_counts()
notas_novo  = notas_novo[notas_novo['ISBN'].isin(contagem[contagem >= 30].index)]
livros_novo = livros[livros['ISBN'].isin(contagem[contagem >= 30].index)]

In [None]:
# Criando Matriz de utilidade: Usuário-Item
notas_matrix = notas_novo.pivot(index='userID', columns='ISBN', values='bookRating')
userID = notas_matrix.index
ISBN = notas_matrix.columns
bookTitle = livros_novo.sort_values("ISBN")
notas_matrix.columns = bookTitle['bookTitle']
notas_matrix.head

(26401, 1130)


bookTitle,Angelas Ashes,Politically Correct Bedtime Stories: Modern Tales for Our Life and Times,Once upon a More Enlightened Time: More Politically Correct Bedtime Stories,The Death of Vishnu: A Novel,Angels,The Boy Next Door,"Men Are from Mars, Women Are from Venus: A Practical Guide for Improving Communication and Getting What You Want in Your Relationships",Divine Secrets of the Ya-Ya Sisterhood : A Novel,The Poisonwood Bible,The Professor and the Madman,...,Free,Shipping News,The Four Agreements: A Practical Guide to Personal Freedom,The Time Traveler's Wife (Today Show Book Club #15),Das Parfum: Die Geschichte Eines Morders,Der Vorleser,Illuminati.,MÃ?Â¶rder ohne Gesicht.,Russendisko.,Stupid White Men. Eine Abrechnung mit dem Amerika unter George W. Bush
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9,,,,,,,,,,,...,,,,,,,,,,
16,,,,,,,,,,,...,,,,,,,,,,
26,,,,,,,,,,,...,,,,,,,,,,
39,,,,,,,,,,,...,,,,,,,,,,
42,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278843,,,,,,,,9.0,,,...,,,,,,,,,,
278844,,,,,,,,,,,...,,,,,,,,,,
278846,,,,,,,,,,,...,,,,,,,,,,
278851,,,,,,,,,,,...,,,,,,,,,,


In [None]:

# Calculando Similaridade
itembased_cos = cosine_similarity(notas_matrix.T.fillna(0))

In [None]:
itembased_cos = pd.DataFrame(itembased_cos,columns=bookTitle.bookTitle,index=bookTitle.bookTitle)
itembased_cos

bookTitle,Angelas Ashes,Politically Correct Bedtime Stories: Modern Tales for Our Life and Times,Once upon a More Enlightened Time: More Politically Correct Bedtime Stories,The Death of Vishnu: A Novel,Angels,The Boy Next Door,"Men Are from Mars, Women Are from Venus: A Practical Guide for Improving Communication and Getting What You Want in Your Relationships",Divine Secrets of the Ya-Ya Sisterhood : A Novel,The Poisonwood Bible,The Professor and the Madman,...,Free,Shipping News,The Four Agreements: A Practical Guide to Personal Freedom,The Time Traveler's Wife (Today Show Book Club #15),Das Parfum: Die Geschichte Eines Morders,Der Vorleser,Illuminati.,MÃ?Â¶rder ohne Gesicht.,Russendisko.,Stupid White Men. Eine Abrechnung mit dem Amerika unter George W. Bush
bookTitle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Angelas Ashes,1.000000,0.059837,0.000000,0.067350,0.028577,0.000000,0.043903,0.0,0.020793,0.000000,...,0.020425,0.056499,0.015513,0.029615,0.000000,0.000000,0.031760,0.026921,0.026132,0.031333
Politically Correct Bedtime Stories: Modern Tales for Our Life and Times,0.059837,1.000000,0.245434,0.011428,0.015558,0.000000,0.014222,0.0,0.024904,0.000000,...,0.011120,0.014474,0.000000,0.016122,0.000000,0.000000,0.017291,0.014656,0.014226,0.017058
Once upon a More Enlightened Time: More Politically Correct Bedtime Stories,0.000000,0.245434,1.000000,0.000000,0.000000,0.023053,0.000000,0.0,0.017989,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
The Death of Vishnu: A Novel,0.067350,0.011428,0.000000,1.000000,0.024334,0.000000,0.039546,0.0,0.017705,0.057815,...,0.017392,0.022639,0.015097,0.025217,0.000000,0.000000,0.027044,0.022924,0.022251,0.026680
Angels,0.028577,0.015558,0.000000,0.024334,1.000000,0.036488,0.063930,0.0,0.024103,0.000000,...,0.023676,0.050082,0.000000,0.034329,0.000000,0.000000,0.036816,0.031207,0.030291,0.036321
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Der Vorleser,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.066448,1.000000,0.030943,0.000000,0.028057,0.048455
Illuminati.,0.031760,0.017291,0.000000,0.027044,0.036816,0.000000,0.033655,0.0,0.026788,0.000000,...,0.026314,0.034253,0.000000,0.038152,0.076322,0.030943,1.000000,0.078036,0.050979,0.040366
MÃ?Â¶rder ohne Gesicht.,0.026921,0.014656,0.000000,0.022924,0.031207,0.000000,0.028528,0.0,0.022706,0.000000,...,0.022304,0.029034,0.000000,0.032339,0.000000,0.000000,0.078036,1.000000,0.028536,0.054175
Russendisko.,0.026132,0.014226,0.000000,0.022251,0.030291,0.000000,0.027691,0.0,0.022040,0.000000,...,0.021650,0.028182,0.000000,0.031391,0.000000,0.028057,0.050979,0.028536,1.000000,0.117034


In [None]:
# Similaridade por cosseno - Baseado em Items
def get_sim_itembased(book):
  result = itembased_cos[book].sort_values(ascending=False)[1:6]
  return result


### Gerando recomendações para alguns livros

In [None]:
get_sim_itembased("Animal Farm")

bookTitle
1984                                                         0.148712
The Green Mile: The Mouse on the Mile (Green Mile Series)    0.120728
Lord of the Flies                                            0.109087
The Green Mile: Coffey's Hands (Green Mile Series)           0.088075
The Two Dead Girls (Green Mile Series)                       0.078501
Name: Animal Farm, dtype: float64

In [None]:
get_sim_itembased("Fight Club")

bookTitle
Invisible Monsters                           0.265240
Survivor : A Novel                           0.237804
Choke                                        0.135046
Lullaby: A Novel                             0.129517
Wizard and Glass (The Dark Tower, Book 4)    0.098695
Name: Fight Club, dtype: float64

In [None]:
get_sim_itembased("The Hobbit : The Enchanting Prelude to The Lord of the Rings")

bookTitle
The Return of the King (The Lord of the Rings, Part 3)               0.238425
The Two Towers (The Lord of the Rings, Part 2)                       0.226759
The Fellowship of the Ring (The Lord of the Rings, Part 1)           0.221640
The Silmarillion                                                     0.061428
Left Behind: A Novel of the Earth's Last Days (Left Behind No. 1)    0.060884
Name: The Hobbit : The Enchanting Prelude to The Lord of the Rings, dtype: float64

In [None]:
get_sim_itembased("The Green Mile")

bookTitle
Dreamcatcher                                  0.116914
It                                            0.113535
The Dark Half                                 0.093562
Firestarter                                   0.092849
Needful Things: The Last Castle Rock Story    0.090285
Name: The Green Mile, dtype: float64

### Gerando Recomendações para um indivíduo em específico

In [None]:
user_id = 11676

user_id_read = pd.DataFrame(notas_matrix.T[user_id].dropna(axis=0,how="all"))\
                    .sort_values(by=[user_id],ascending=False)\
                    .reset_index()\
                    .rename(columns={user_id:"ratings"})

user_id_read


Unnamed: 0,bookTitle,ratings
0,The Outsiders,10.0
1,Bleachers,10.0
2,The Cat in the Hat,10.0
3,Roses Are Red,10.0
4,Cat and Mouse,10.0
...,...,...
636,The Horse Whisperer,1.0
637,She's Come Undone (Oprah's Book Club (Paperback)),1.0
638,The Firm,1.0
639,The Lovely Bones,1.0


In [None]:
book_picked = "Pride and Prejudice"

In [None]:
# Similaridade entre livros lidos pelo usuário e 'book_picked'
book_picked_simscore = itembased_cos[book_picked].reset_index()\
                                                 .rename(columns={book_picked:"similarity_score"})



In [None]:
# Similaridade entre livros lidos pelo usuário e 'book_picked'
book_user_id_read_simscore = pd.merge(left=user_id_read,
         right=book_picked_simscore,
         on="bookTitle",
         how="inner")\
  .sort_values("similarity_score",ascending=False)[1:6]

book_user_id_read_simscore

Unnamed: 0,bookTitle,ratings,similarity_score
339,Nicolae: The Rise of Antichrist (Left Behind N...,8.0,0.084448
245,Amsterdam : A Novel,9.0,0.082439
440,Killjoy,8.0,0.081008
26,The English Patient,10.0,0.079201
399,I'll Be Seeing You,8.0,0.077192


In [None]:
predicted_rating = round(np.average(book_user_id_watched_simscore["ratings"],
                                    weights=book_user_id_watched_simscore["similarity_score"]),4)

In [None]:
print(f'A nota prevista para {book_picked} pelo usuário {user_id} é {predicted_rating}')

A nota prevista para Pride and Prejudice pelo usuário 11676 é 8.5957


### Sistema de Recomendação para um usuário específico

In [None]:
def item_based_rec(user_id, number_of_similar_items = 5,recomendations=5):
  import operator
  # Livros não lidos pelo usuário
  userid_unread = pd.DataFrame(notas_matrix.T[user_id].isna()).reset_index()
  userid_unread = userid_unread[userid_unread[user_id]==True]["bookTitle"].tolist()

  # Livros Lidos
  user_id_watched = pd.DataFrame(notas_matrix.T[user_id].dropna(axis=0,how="all"))\
                    .sort_values(by=[user_id],ascending=False)\
                    .reset_index()\
                    .rename(columns={user_id:"ratings"})

  rating_prediction = {}

  # Loop pelos livros não lidos
  for book_picked in userid_unread:
    book_picked_simscore = itembased_cos[book_picked].reset_index()\
                                                 .rename(columns={book_picked:"similarity_score"})

    book_picked_simscore = book_picked_simscore.loc[:,~book_picked_simscore.columns.duplicated()].copy()

    book_user_id_read_simscore = pd.merge(left=user_id_watched,
         right=book_picked_simscore,
         on="bookTitle",
         how="inner")\
         .sort_values("similarity_score",ascending=False)[1:number_of_similar_items+1]


    predicted_rating = round(np.average(book_user_id_read_simscore["ratings"],
                                    weights=book_user_id_read_simscore["similarity_score"]),4)

    rating_prediction[book_picked] = predicted_rating


  return sorted(rating_prediction.items(),key=operator.itemgetter(1),reverse=True)[:recomendations]

In [None]:
item_based_rec(user_id=11676,number_of_similar_items = 10)

[('Key of Light (Key Trilogy (Paperback))', 9.3979),
 ('The Princess Diaries', 9.2553),
 ('Desecration: Antichrist Takes the Throne (Left Behind No. 9)', 9.2427),
 ('Harry Potter and the Order of the Phoenix (Book 5)', 9.1836),
 ('Tears of the Moon (Irish Trilogy)', 9.1784)]