In [3]:
import torch
import torch.nn as nn


In [5]:

# Texto de exemplo
texto = "oi tudo bem? espero que sim!"
chars = sorted(set(texto))
vocab_size = len(chars)
char2idx = {ch: idx for idx, ch in enumerate(chars)}
idx2char = {idx: ch for ch, idx in char2idx.items()}

# Convertendo para tensores
def texto_para_tensor(texto):
    return torch.tensor([char2idx[c] for c in texto], dtype=torch.long)

seq = texto_para_tensor(texto)

# Modelo LSTM
class LSTMTexto(nn.Module):
    def __init__(self, vocab_size, hidden_dim, num_layers):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, input, hidden):
        emb = self.embed(input)
        out, hidden = self.lstm(emb.view(len(input), 1, -1), hidden)
        out = self.fc(out.view(len(input), -1))
        return out, hidden

    def init_hidden(self):
        return (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim))

# Treinamento
hidden_dim = 64
modelo = LSTMTexto(vocab_size, hidden_dim, 1)
criterio = nn.CrossEntropyLoss()
otimizador = torch.optim.Adam(modelo.parameters(), lr=0.01)

for epoca in range(100):
    hidden = modelo.init_hidden()
    input_seq = seq[:-1]
    target_seq = seq[1:]
    
    output, hidden = modelo(input_seq, hidden)
    perda = criterio(output, target_seq)

    otimizador.zero_grad()
    perda.backward()
    otimizador.step()

    if epoca % 10 == 0:
        print(f"época {epoca}, perda: {perda.item():.4f}")

# Geração de texto
def gerar_texto(inicial='o', tamanho=50):
    modelo.eval()
    input = torch.tensor([char2idx[inicial]], dtype=torch.long)
    hidden = modelo.init_hidden()
    resultado = [inicial]

    for _ in range(tamanho):
        out, hidden = modelo(input, hidden)
        prob = torch.softmax(out[-1], dim=0).data
        idx = torch.multinomial(prob, 1)[0]
        char = idx2char[idx.item()]
        resultado.append(char)
        input = torch.tensor([idx])

    return ''.join(resultado)

print(gerar_texto("t"))


época 0, perda: 2.7279
época 10, perda: 0.4855
época 20, perda: 0.0280
época 30, perda: 0.0055
época 40, perda: 0.0025
época 50, perda: 0.0017
época 60, perda: 0.0014
época 70, perda: 0.0012
época 80, perda: 0.0011
época 90, perda: 0.0010
tque sim!? tudo bem? espero que sim!m!s!ero que sim


In [None]:
import torch
import torch.nn as nn
from torchtext.datasets import IMDB
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

# Tokenização e vocabulário
tokenizer = get_tokenizer("basic_english")

def yield_tokens(data_iter):
    for label, text in data_iter:
        yield tokenizer(text)

train_iter = IMDB(split='train')
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<pad>"])
vocab.set_default_index(vocab["<pad>"])

# Função de pré-processamento
def process(text):
    return vocab(tokenizer(text))

# Collate para batch
def collate_batch(batch):
    label_map = {"pos": 1, "neg": 0}
    texts, labels = [], []
    for label, text in batch:
        tensor = torch.tensor(process(text), dtype=torch.long)
        texts.append(tensor)
        labels.append(label_map[label])
    padded = pad_sequence(texts, batch_first=True, padding_value=vocab["<pad>"])
    return padded, torch.tensor(labels)

# DataLoader
train_iter = IMDB(split='train')
train_dataloader = DataLoader(list(train_iter), batch_size=32, shuffle=True, collate_fn=collate_batch)

# Modelo LSTM
class SentimentLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=vocab["<pad>"])
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = self.embedding(x)
        _, (hn, _) = self.lstm(x)
        out = self.fc(hn[-1])
        return torch.sigmoid(out).squeeze()

# Treino
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentimentLSTM(len(vocab), embed_dim=64, hidden_dim=128).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

for epoch in range(3):
    model.train()
    total_loss = 0
    for texts, labels in train_dataloader:
        texts, labels = texts.to(device), labels.float().to(device)
        preds = model(texts)
        loss = criterion(preds, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
