In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import joblib


In [4]:
# Lê CSV limpo
df = pd.read_csv('../data/noticias_limpo.csv')

# Separa features e target
X = df['texto_limpo']
y = df['rotulo']

# Divide treino/teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenização
max_words = 5000
max_len = 200
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_pad = pad_sequences(tokenizer.texts_to_sequences(X_train), maxlen=max_len)
X_test_pad = pad_sequences(tokenizer.texts_to_sequences(X_test), maxlen=max_len)

# Modelo simples
model = Sequential([
    Embedding(input_dim=max_words, output_dim=64, input_length=max_len),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Treina o modelo
model.fit(X_train_pad, y_train, epochs=3, batch_size=64, validation_split=0.2)

# Salva tokenizer para a API
joblib.dump(tokenizer, '../models/tokenizer.pkl')
print("Treino concluído e tokenizer salvo!")

Epoch 1/3




[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 79ms/step - accuracy: 0.9591 - loss: 0.1248 - val_accuracy: 0.9865 - val_loss: 0.0435
Epoch 2/3
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 70ms/step - accuracy: 0.9793 - loss: 0.0696 - val_accuracy: 0.9836 - val_loss: 0.0623
Epoch 3/3
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 70ms/step - accuracy: 0.9924 - loss: 0.0270 - val_accuracy: 0.9883 - val_loss: 0.0471
Treino concluído e tokenizer salvo!


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model

# Salva modelo treinado
# (Se já estiver treinado no 02_treina_modelo.py, apenas salva aqui)
model.save('../models/modelo_fake_news.h5')
print("Modelo salvo com sucesso!")




Modelo salvo com sucesso!
