<a href="https://colab.research.google.com/github/MatheusMataBIO/Classificacao_de_Sentimentos_com_Deep_Learning_e_FastAPI/blob/main/Analise_Sentimento_DeepLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Visualizando as 4 primeiras linhas do dataset
df_reviews = pd.read_csv('/content/Reviews.csv')
df_reviews.head()

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...


## **Análise exploratória e processamento de dados**

In [None]:
# Padronizar os nomes das colunas com letras minúsculas e visualizar as 10 primeiras linhas
df_reviews.columns = df_reviews.columns.str.lower()
df_reviews.head(10)


Unnamed: 0,id,productid,userid,profilename,helpfulnessnumerator,helpfulnessdenominator,score,time,summary,text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...
5,6,B006K2ZZ7K,ADT0SRK1MGOEU,Twoapennything,0,0,4,1342051200,Nice Taffy,I got a wild hair for taffy and ordered this f...
6,7,B006K2ZZ7K,A1SP2KVKFXXRU1,David C. Sullivan,0,0,5,1340150400,Great! Just as good as the expensive brands!,This saltwater taffy had great flavors and was...
7,8,B006K2ZZ7K,A3JRGQVEQN31IQ,Pamela G. Williams,0,0,5,1336003200,"Wonderful, tasty taffy",This taffy is so good. It is very soft and ch...
8,9,B000E7L2R4,A1MZYO9TZK0BBI,R. James,1,1,5,1322006400,Yay Barley,Right now I'm mostly just sprouting this so my...
9,10,B00171APVA,A21BT40VZCCYT4,Carol A. Reed,0,0,5,1351209600,Healthy Dog Food,This is a very healthy dog food. Good for thei...


In [None]:
# Selecionar apenas as colunas mais importantes para análise
df_reviews = df_reviews[['score', 'summary', 'text']]

# Remover valores nulos das colunas 'summary' e 'text'
df_reviews.dropna(subset=['summary','text'], inplace=True)

# Remover valores duplicados da coluna 'text'
df_reviews.drop_duplicates(subset=['text'], inplace=True)

# Filtrar reviews com Score 1, 2, 4, 5 e ignorar neutros com score == 3
df_reviews = df_reviews[df_reviews['score'] != 3]

# Criar coluna binária de sentimento:
# Score 4 ou 5 => positivo (1)
# Score 1 ou 2 => negativo (0)
df_reviews['sentimento'] = df_reviews['score'].apply(lambda rating: 'Positivo' if rating > 3 else 'Negativo')
df_reviews.head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reviews['sentimento'] = df_reviews['score'].apply(lambda rating: 'Positivo' if rating > 3 else 'Negativo')


Unnamed: 0,score,summary,text,sentimento
0,5,Good Quality Dog Food,I have bought several of the Vitality canned d...,Positivo
1,1,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...,Negativo
2,4,"""Delight"" says it all",This is a confection that has been around a fe...,Positivo
3,2,Cough Medicine,If you are looking for the secret ingredient i...,Negativo
4,5,Great taffy,Great taffy at a great price. There was a wid...,Positivo
5,4,Nice Taffy,I got a wild hair for taffy and ordered this f...,Positivo
6,5,Great! Just as good as the expensive brands!,This saltwater taffy had great flavors and was...,Positivo
7,5,"Wonderful, tasty taffy",This taffy is so good. It is very soft and ch...,Positivo
8,5,Yay Barley,Right now I'm mostly just sprouting this so my...,Positivo
9,5,Healthy Dog Food,This is a very healthy dog food. Good for thei...,Positivo


## **Treinamento do modelo**

In [None]:
# Separar variáveis
X = df_reviews['text']
y = df_reviews['sentimento']

In [None]:
from sklearn.model_selection import train_test_split

# Converter a variável alvo para numérica (0 e 1)
y = y.apply(lambda x: 1 if x == 'Positivo' else 0)


# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Vetorizar com TF-IDF
vectorizer = TfidfVectorizer(
    max_features=1000, # Limitar vocabulário para performance
    stop_words='english', # Remove palavras comuns em inglês
    lowercase=True, # Converte as palavras para minúsculas
    ngram_range=(1, 2) # Considera unigramas (palavras únicas) e bigramas (pares de palavras consecutivas).
)

X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)




In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Converter para array denso corretamente
X_train_array = X_train_tfidf.toarray()
X_test_array = X_test_tfidf.toarray()

# Modelo leve
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_array.shape[1],)),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

# Compilar
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Treinar
history = model.fit(
    X_train_array, y_train,
    validation_split=0.2,
    epochs=30,
    batch_size=512,
    callbacks=[EarlyStopping(patience=2, restore_best_weights=True)],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.8499 - loss: 0.4128 - val_accuracy: 0.9060 - val_loss: 0.2310
Epoch 2/30
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9057 - loss: 0.2298 - val_accuracy: 0.9074 - val_loss: 0.2257
Epoch 3/30
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9113 - loss: 0.2189 - val_accuracy: 0.9077 - val_loss: 0.2248
Epoch 4/30
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9094 - loss: 0.2198 - val_accuracy: 0.9085 - val_loss: 0.2238
Epoch 5/30
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9121 - loss: 0.2157 - val_accuracy: 0.9089 - val_loss: 0.2232
Epoch 6/30
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9125 - loss: 0.2148 - val_accuracy: 0.9095 - val_loss: 0.2227
Epoch 7/30
[1m455/455[0m 

In [None]:
# Salvando o modelo e o vetor

model.save('modelo_sentimento.h5')

import joblib
joblib.dump(vectorizer, "tfidf_vectorizer.joblib")



['tfidf_vectorizer.joblib']

## **Deploy do modelo via FastAPI**

In [None]:
''' O Google colab não permite acesso externo diretamente à porta onde o FastAPI roda.
Por isso, usei o ngrok, que cria um túnel público para a API.'''

# Instalar as dependências para utilizar o FastAPI
!pip install fastapi uvicorn nest-asyncio pyngrok --quiet


In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
import tensorflow as tf

# Carregar o modelo e vetor
model = tf.keras.models.load_model('modelo_sentimento.h5')
vectorizer = joblib.load("tfidf_vectorizer.joblib")

# Criar um aplicativo FastAPI
app = FastAPI()

class InputTexto(BaseModel):
  texto: str

@app.post("/predict")
def predict_sentimento(dado: InputTexto):
  print(f"Recebido: {dado.texto}")
  texto = dado.texto
  vetor = vectorizer.transform([texto])
  pred = model.predict(vetor)[0][0]
  sentimento = 'Positivo' if pred >= 0.5 else 'Negativo'
  return {"sentimento": sentimento, 'probabilidade': float(pred)}




In [None]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from IPython.display import display, HTML

nest_asyncio.apply()

# Seu token do ngrok
ngrok.set_auth_token(""SEU_AUTHTOKEN_AQUI"")

# Criar túnel e extrair apenas a URL
public_url = ngrok.connect(8000).public_url

# Exibir as URLs reais corretamente
print(f"✅ URL pública da API: {public_url}")
print(f"✅ Swagger: {public_url}/docs")

# Link clicável no Colab
display(HTML(f'<a href="{public_url}/docs" target="_blank"><strong>🔗 Clique aqui para abrir a Swagger UI</strong></a>'))

# Rodar o servidor
uvicorn.run(app, host="0.0.0.0", port=8000)




✅ URL pública da API: https://7ddc-104-196-240-240.ngrok-free.app
✅ Swagger: https://7ddc-104-196-240-240.ngrok-free.app/docs


INFO:     Started server process [1418]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     2804:56c:d2f2:6400:b7:651a:c7a1:c846:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     2804:56c:d2f2:6400:b7:651a:c7a1:c846:0 - "GET /openapi.json HTTP/1.1" 200 OK
Recebido: I like this movie, it's amazing
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step
INFO:     2804:56c:d2f2:6400:b7:651a:c7a1:c846:0 - "POST /predict HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [1418]
