Imports:

In [None]:
!pip install streamlit
!pip install gdown

In [44]:
import pandas as pd
import numpy as np
import gdown
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

Get Database.
Database created via Google Books API.

In [20]:
FILE_ID = "16EItknbzHEvg1MHcbrEvKsQRpGHq91j4"
url = f"https://drive.google.com/uc?id={FILE_ID}"

gdown.download(url, 'books.csv', quiet=False)

Downloading...
From: https://drive.google.com/uc?id=16EItknbzHEvg1MHcbrEvKsQRpGHq91j4
To: /content/books.csv
100%|██████████| 1.54M/1.54M [00:00<00:00, 13.5MB/s]


'books.csv'

In [21]:
df = pd.read_csv('books.csv')

df.head()

Unnamed: 0,title,category,description
0,Shipwrecked A Donkey Ollie Adventure,Adventure,Donkey Ollie is a well loved character who has...
1,The Adventure to Financial Freedom,Adventure,In A Journey to Financial Freedom: A Guide for...
2,The Mystery of Adventure Island,Adventure,"In ""The Mystery of Adventure Island"" readers a..."
3,The King of Diamonds: A Tale of Mystery and Ad...,Adventure,A rags-to-riches story about a newly orphaned ...
4,The Adventure Bible for NIrV: Book of Devotion...,Adventure,Buckle up hold tight and get ready for adventu...


In [22]:
df['description'] = df['description'].str.strip().str.lower()

# transforma categoria em dados numericos
label_encoder = LabelEncoder()
df['category'] = label_encoder.fit_transform(df['category'])

X = df['description']
y = df['category']

In [23]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_tfidf = tfidf_vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [36]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

                    precision    recall  f1-score   support

         Adventure       0.67      0.21      0.32        19
             Drama       0.54      0.75      0.62        20
           Fantasy       0.95      0.58      0.72        36
Historical Fiction       0.47      0.71      0.57        49
            Horror       0.76      0.76      0.76        42
           Mystery       0.62      0.74      0.67        54
           Romance       0.43      0.12      0.19        24
   Science Fiction       0.81      0.69      0.74        51
          Thriller       0.69      0.85      0.76        34

          accuracy                           0.65       329
         macro avg       0.66      0.60      0.60       329
      weighted avg       0.67      0.65      0.64       329



In [41]:
new_description = ["At the very edge of the front lines stands a young girl. She has golden hair, blue eyes, and pale, almost translucent skin. This girl soars through the skies, mercilessly cutting down her enemies. She barks crisp orders with the unmistakable voice of a child. Her name is Tanya Degruechaff. But her true identity is that of a 40 year old Japanese elite salaryman who was forced by god to be reborn in the vessel of a little girl who must live in a tumultuous world racked by war. Concerned with being ultra-efficient and desiring self-promotion above all else, Degurechaff will join the ranks of the Imperial Army's Military Mages and become one of the most feared existences in this new world..."]
new_description_tfidf = tfidf_vectorizer.transform(new_description)

predicted_category = model.predict(new_description_tfidf)
predicted_category_name = label_encoder.inverse_transform(predicted_category)
print(f"Predicted Category: {predicted_category_name[0]}")

Predicted Category: Historical Fiction


**Streamlit:**

In [45]:
with open('book_classifier_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('tfidf_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(tfidf_vectorizer, vectorizer_file)