---
# IMPORTS
---

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, concatenate, Dropout

---
# DADOS
---

In [11]:
books = pd.read_csv('data/Books.csv')
ratings = pd.read_csv('data/Ratings.csv')
users = pd.read_csv('data/Users.csv')

  books = pd.read_csv('data/Books.csv')


In [12]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [13]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [14]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


---
# PREPROCESSING
---

In [15]:
users['Age'].fillna(users['Age'].mode().iloc[0], inplace = True) # Preencher valores nulos com a moda
books.drop(['Image-URL-S', 'Image-URL-M', 'Image-URL-L'], axis = 1, inplace = True) # Excluindo imagens
users.drop('Location', axis = 1, inplace = True) # Excluindo Location
users.reset_index(drop = True, inplace = True) # Resetando index

df = pd.merge(pd.merge(ratings, users, on = 'User-ID'), books, on = 'ISBN') # Merge dos Dataframes
df.dropna(inplace = True) # Apagando nulos

label_encoder_isbn = LabelEncoder() # Instanciando LabelEncoder
df['ISBN'] = label_encoder_isbn.fit_transform(df['ISBN']) # Converter ISBN para números inteiros
df['Book-Title'].drop_duplicates()

print(df.head()) # Resultado df

train, test = train_test_split(df, test_size = 0.2, random_state = 0) # Dividir os dados em treino e teste


   User-ID   ISBN  Book-Rating   Age            Book-Title Book-Author  \
0   276725  45921            0  24.0  Flesh Tones: A Novel  M. J. Rose   
1     2313  45921            5  23.0  Flesh Tones: A Novel  M. J. Rose   
2     6543  45921            0  34.0  Flesh Tones: A Novel  M. J. Rose   
3     8680  45921            5   2.0  Flesh Tones: A Novel  M. J. Rose   
4    10314  45921            9  24.0  Flesh Tones: A Novel  M. J. Rose   

  Year-Of-Publication         Publisher  
0                2002  Ballantine Books  
1                2002  Ballantine Books  
2                2002  Ballantine Books  
3                2002  Ballantine Books  
4                2002  Ballantine Books  


---
# ARQUITETURA DO MODELO
---

In [16]:
# Número de usuários e livros
n_users = len(df['User-ID'].unique())  # Conta o número de usuários únicos nos dados
n_books = len(df['ISBN'].unique())      # Conta o número de livros únicos nos dados

# Arquitetura do modelo
user_input = Input(shape=(1,)) # Entrada ID do usuário
book_input = Input(shape=(1,)) # Entrada para o ISBN do livro
user_embed = Embedding(n_users, 50)(user_input)  # Mapeia o ID para um vetor tamanho 50
book_embed = Embedding(n_books, 50)(book_input)  # Mapeia o ISBN para um vetor tamanho 50
user_flat = Flatten()(user_embed)         # Achatamento de usuário
book_flat = Flatten()(book_embed)         # Achatamento de livro
concat = concatenate([user_flat, book_flat])    # Concatenação de usuário e livro
dense1 = Dense(128, activation='relu')(concat)  # Camada densa com 128 neurônios e função de ativação ReLU
dropout1 = Dropout(0.3)(dense1)           # Regularização
dense2 = Dense(64, activation='relu')(dropout1)  # 64 neurônios
dropout2 = Dropout(0.3)(dense2)           # Regularização
dense3 = Dense(32, activation='relu')(dropout2)  # 32 neurônios
output = Dense(1)(dense3)                 # Saída para previsão

# Compilação do modelo
model = Model(inputs = [user_input, book_input], outputs = output)
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

---
# TREINAMENTO E AVALIAÇÃO DO MODELO
---

In [17]:
# Treinamento do modelo
print('Treinamento do Modelo: \n')
model.fit([train['User-ID'], train['ISBN']], train['Book-Rating'], epochs = 3, batch_size = 512, verbose = 1)

# Avaliação do modelo
print('Avaliação do Modelo: \n')
loss = model.evaluate([test['User-ID'], test['ISBN']], test['Book-Rating'])
print("Teste Loss:", loss)

Treinamento do Modelo: 

Epoch 1/3
[1m1612/1612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m515s[0m 318ms/step - loss: 14.6428
Epoch 2/3
[1m1612/1612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m484s[0m 300ms/step - loss: 11.6162
Epoch 3/3
[1m1612/1612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m497s[0m 297ms/step - loss: 9.8647
Avaliação do Modelo: 

[1m6445/6445[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 804us/step - loss: 15.2449
Teste Loss: 15.206483840942383


---
# RECOMENDAÇÕES
---

In [22]:
def recomendar(user_id, model, df, n_recomendacoes=5):
    user_ids = np.array([user_id] * len(df['ISBN'])) # Matriz de entrada com ID do usuario para cada ISBN
    ratings = model.predict([user_ids, df['ISBN']]) # Prever as classificações
    # Dataframe com os resultados
    recomendacoes = pd.DataFrame({'Book-Title': [df[df['ISBN'] == isbn]
                                 ['Book-Title'].iloc[0] for isbn in df['ISBN']],
                                  'Predicted_Rating': ratings.flatten()})
    
    # Remover duplicatas antes de ordenar
    recomendacoes = recomendacoes.drop_duplicates(subset=['Book-Title'])
    
    recomendacoes = recomendacoes.sort_values(by='Predicted_Rating', ascending = False) # Ordenar os ISBNs
    top_recomendacoes = recomendacoes.head(n_recomendacoes) # Obter as top recomendações
    
    return top_recomendacoes # Resultados

In [25]:
# Exemplo com ID 1
user_id = 1
top_recomendacoes = recomendar(user_id, model, df)
print('Top 5 Recomendações para o Usuário', user_id)
top_recomendacoes.head()

Top 5 Recomendações para o Usuário 1


Unnamed: 0,Book-Title,Predicted_Rating
414515,Chobits (Chobits),9.456205
386491,Die Orks.,8.769119
546622,Die leichten Schritte des Wahnsinns.,8.767396
838828,The Cake Mix Doctor,8.752995
396173,Life Is So Good,8.74641
