In [10]:
#!pip install datasets --upgrade


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import pickle
import json
import re
import random
from collections import Counter
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
from datasets import load_dataset


# Configuração do dispositivo
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Usando dispositivo: {device}")

In [None]:
# ============================
# 1. PREPROCESSAMENTO DOS DADOS
# ============================

class Vocabulary:
    def __init__(self):
        self.word2index = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
        self.index2word = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
        self.word_count = {}
        self.n_words = 4
        
    def add_sentence(self, sentence):
        for word in sentence.split():
            self.add_word(word)
    
    def add_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.index2word[self.n_words] = word
            self.word_count[word] = 1
            self.n_words += 1
        else:
            self.word_count[word] += 1

# def preprocess_text(text):
#     """Preprocessa o texto removendo caracteres especiais e normalizando"""
#     text = text.lower()
#     text = re.sub(r'[^\w\s]', '', text)  # Remove pontuação
#     text = re.sub(r'\s+', ' ', text)     # Remove espaços extras
#     return text.strip()

def preprocess_text(text):
    """Preprocessa o texto removendo caracteres especiais e normalizando."""
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

In [None]:
def load_dailydialog_data():
    try:
        from datasets import load_dataset
    except ImportError:
        import os
        os.system("pip install datasets")
        from datasets import load_dataset

In [None]:
def load_dailydialog_data():
    """
    Carrega dados do DailyDialog, processa e extrai pares de pergunta-resposta.
    """
    ds = load_dataset("roskoN/dailydialog", revision="refs/convert/parquet")
    pairs = []
    
    # O nome correto da coluna, 'utterances', agora é usado aqui.
    nome_da_coluna = 'utterances'
    
    for split in ("train", "validation", "test"):
        for turns in ds[split][nome_da_coluna]:
            turns = [preprocess_text(t) for t in turns]
            for i in range(len(turns) - 1):
                question = turns[i]
                answer = turns[i + 1]
                if len(question.split()) <= 20 and len(answer.split()) <= 15:
                    pairs.append((question, answer))
    return pairs

    
def prepare_data(pairs, max_length=20):
    """Prepara os dados para treinamento"""
    vocabulary = Vocabulary()
    
    # Filtra pares por comprimento e adiciona ao vocabulário
    filtered_pairs = []
    for question, answer in pairs:
        if len(question.split()) <= max_length and len(answer.split()) <= max_length:
            vocabulary.add_sentence(question)
            vocabulary.add_sentence(answer)
            filtered_pairs.append((question, answer))
    
    return filtered_pairs, vocabulary

def sentence_to_indexes(sentence, vocabulary):
    """Converte uma sentença para índices"""
    indexes = []
    for word in sentence.split():
        if word in vocabulary.word2index:
            indexes.append(vocabulary.word2index[word])
        else:
            indexes.append(vocabulary.word2index["<UNK>"])
    return indexes

def pad_sequences(sequences, max_length, pad_token=0):
    """Adiciona padding às sequências"""
    padded = []
    for seq in sequences:
        if len(seq) < max_length:
            padded.append(seq + [pad_token] * (max_length - len(seq)))
        else:
            padded.append(seq[:max_length])
    return padded


In [None]:
#============================
# 2. DATASET 
# ============================

class ChatDataset(Dataset):
    def __init__(self, pairs, vocabulary, max_length=20):
        self.pairs = pairs
        self.vocabulary = vocabulary
        self.max_length = max_length
        
    def __len__(self):
        return len(self.pairs)
    
    def __getitem__(self, idx):
        question, answer = self.pairs[idx]
        
        # Converte para índices
        question_indexes = sentence_to_indexes(question, self.vocabulary)
        answer_indexes = sentence_to_indexes(answer, self.vocabulary)
        
        # Adiciona tokens especiais
        question_indexes = [self.vocabulary.word2index["<SOS>"]] + question_indexes + [self.vocabulary.word2index["<EOS>"]]
        answer_indexes   = [self.vocabulary.word2index["<SOS>"]] + answer_indexes   + [self.vocabulary.word2index["<EOS>"]]
        
        # Padding
        question_padded = question_indexes + [0] * (self.max_length + 2 - len(question_indexes))
        answer_padded = answer_indexes + [0] * (self.max_length + 2 - len(answer_indexes))
        
        # Trunca se necessário
        question_padded = question_padded[:self.max_length + 2]
        answer_padded = answer_padded[:self.max_length + 2]
        
        return {
            'question': torch.tensor(question_padded, dtype=torch.long),
            'answer': torch.tensor(answer_padded, dtype=torch.long),
            'question_length': torch.tensor(len(question_indexes), dtype=torch.long),
            'answer_length': torch.tensor(len(answer_indexes), dtype=torch.long)
        }


In [None]:
# ============================
# 3. MODELO SEQ2SEQ COM ATENÇÃO
# ============================

class Encoder(nn.Module):
    def __init__(self, vocab_size, hidden_size, embedding_dim, n_layers=2, dropout=0.1):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, n_layers, 
                           dropout=dropout, bidirectional=True, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input_seq, input_lengths):
        embedded = self.dropout(self.embedding(input_seq))

        packed = nn.utils.rnn.pack_padded_sequence(
            embedded, input_lengths, batch_first=True, enforce_sorted=False
        )
        packed_outputs, (hidden, cell) = self.lstm(packed)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs, batch_first=True)

        # Combina as duas direções na dimensão de features (já era feito):
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:]

        # ===== NOVO: combina também hidden e cell =====
        # hidden / cell:  (n_layers*2, batch, hidden_size)
        hidden = hidden.view(self.n_layers, 2, -1, self.hidden_size).sum(1)
        cell   = cell.view(self.n_layers, 2, -1, self.hidden_size).sum(1)
        # Agora: (n_layers, batch, hidden_size)

        return outputs, hidden, cell

class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Linear(hidden_size, 1, bias=False)
        
    def forward(self, hidden, encoder_outputs):
        # hidden: (batch_size, hidden_size)
        # encoder_outputs: (batch_size, seq_len, hidden_size)
        
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        
        # Repeat decoder hidden state seq_len times
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        
        # Calculate attention weights
        energy = torch.tanh(self.attn(torch.cat([hidden, encoder_outputs], dim=2)))
        attention_weights = self.v(energy).squeeze(2)
        
        # Apply softmax to get attention weights
        attention_weights = F.softmax(attention_weights, dim=1)
        
        # Apply attention weights to encoder outputs
        context = torch.bmm(attention_weights.unsqueeze(1), encoder_outputs)
        
        return context.squeeze(1), attention_weights

class Decoder(nn.Module):
    def __init__(self, vocab_size, hidden_size, embedding_dim, n_layers=2, dropout=0.1):
        super(Decoder, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim + hidden_size * 2, hidden_size, num_layers=n_layers, dropout=dropout, batch_first=True)
        self.attention = Attention(hidden_size)
        self.out = nn.Linear(hidden_size * 2, vocab_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input_token, hidden, cell, encoder_outputs):
        # input_token: (batch_size, 1)
        # hidden: (n_layers, batch_size, hidden_size)
        # encoder_outputs: (batch_size, seq_len, hidden_size)
        
        embedded = self.dropout(self.embedding(input_token))
        
        # Get attention context
        last_hidden = hidden[-1]  # Use last layer hidden state
        context, attention_weights = self.attention(last_hidden, encoder_outputs)
        
        # Combine embedding with context
        lstm_input = torch.cat([embedded, context.unsqueeze(1)], dim=2)
        
        # Forward through LSTM
        lstm_output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
        
        # Combine LSTM output with context for prediction
        output = torch.cat([lstm_output.squeeze(1), context], dim=1)
        output = self.out(output)
        
        return output, hidden, cell, attention_weights

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
    def forward(self, src, src_len, trg, teacher_forcing_ratio=0.5):
        batch_size = src.size(0)
        trg_len = trg.size(1)
        trg_vocab_size = self.decoder.vocab_size
        
        # Tensor to store decoder outputs
        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(self.device)
        
        # Encoder
        encoder_outputs, hidden, cell = self.encoder(src, src_len)
        
        # First input to decoder is SOS token
        input_token = trg[:, 0].unsqueeze(1)
        
        for t in range(1, trg_len):
            # Forward through decoder
            output, hidden, cell, attention = self.decoder(input_token, hidden, cell, encoder_outputs)
            
            # Store output
            outputs[:, t] = output
            
            # Decide if we use teacher forcing
            teacher_force = random.random() < teacher_forcing_ratio
            
            # Get the highest predicted token
            top1 = output.argmax(1)
            
            # Update input token
            input_token = trg[:, t].unsqueeze(1) if teacher_force else top1.unsqueeze(1)
        
        return outputs

In [None]:
# ============================
# 4. TREINAMENTO
# ============================

def train_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    
    for batch in tqdm(dataloader, desc="Treinando"):
        optimizer.zero_grad()
        
        src = batch['question'].to(device)
        src_len = batch['question_length'].to(device)
        trg = batch['answer'].to(device)
        
        # Forward pass
        output = model(src, src_len, trg)
        
        # Calculate loss
        output_dim = output.shape[-1]
        output = output[:, 1:].reshape(-1, output_dim)
        trg = trg[:, 1:].reshape(-1)
        
        loss = criterion(output, trg)
        
        # Backward pass
        loss.backward()
        
        # Clip gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for batch in dataloader:
            src = batch['question'].to(device)
            src_len = batch['question_length'].to(device)
            trg = batch['answer'].to(device)
            
            # Forward pass with no teacher forcing
            output = model(src, src_len, trg, teacher_forcing_ratio=0)
            
            # Calculate loss
            output_dim = output.shape[-1]
            output = output[:, 1:].reshape(-1, output_dim)
            trg = trg[:, 1:].reshape(-1)
            
            loss = criterion(output, trg)
            total_loss += loss.item()
    
    return total_loss / len(dataloader)

def train_model(model, train_loader, val_loader, num_epochs, device):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding token
    
    train_losses = []
    val_losses = []
    
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        
        # Training
        train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
        train_losses.append(train_loss)
        
        # Validation
        val_loss = evaluate(model, val_loader, criterion, device)
        val_losses.append(val_loss)
        
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch,
                'val_loss': val_loss,
            }, 'best_chatbot_model.pth')
            print("Modelo salvo!")
    
    return train_losses, val_losses

In [None]:
# ============================
# 5. INFERÊNCIA
# ============================

def respond(model, question, vocabulary, device, max_length=20):
    model.eval()
    
    with torch.no_grad():
        # Preprocess question
        question = preprocess_text(question)
        question_indexes = sentence_to_indexes(question, vocabulary)
        question_indexes = [vocabulary.word2index["<SOS>"]] + question_indexes + [vocabulary.word2index["<EOS>"]]
        
        # Convert to tensor
        src = torch.tensor(question_indexes, dtype=torch.long).unsqueeze(0).to(device)
        src_len = torch.tensor([len(question_indexes)], dtype=torch.long).to(device)
        
        # Encode
        encoder_outputs, hidden, cell = model.encoder(src, src_len)
        
        # Decode
        input_token = torch.tensor([vocabulary.word2index["<SOS>"]], dtype=torch.long).unsqueeze(0).to(device)
        response_indexes = []
        
        for _ in range(max_length):
            output, hidden, cell, attention = model.decoder(input_token, hidden, cell, encoder_outputs)
            predicted_token = output.argmax(1).item()
            
            if predicted_token == vocabulary.word2index["<EOS>"]:
                break
                
            response_indexes.append(predicted_token)
            input_token = torch.tensor([predicted_token], dtype=torch.long).unsqueeze(0).to(device)
        
        # Convert back to words
        response_words = [vocabulary.index2word[idx] for idx in response_indexes 
                         if idx in vocabulary.index2word and idx != vocabulary.word2index["<PAD>"]]
        
        return ' '.join(response_words)

In [None]:
# ============================
# 6. FUNÇÃO PRINCIPAL
# ============================

def main():
    # Hiperparâmetros
    EMBEDDING_DIM = 128
    HIDDEN_SIZE = 256
    N_LAYERS = 2
    DROPOUT = 0.05
    BATCH_SIZE = 8
    NUM_EPOCHS = 10
    MAX_LENGTH = 20
    
    print("Carregando dados...")
    # Carrega dados (usar dataset de exemplo por enquanto)
    pairs = load_dailydialog_data()
    # 🔽 Limita para 5.000 pares para acelerar treinamento
    pairs = pairs[:10000]
    print(f"Carregados {len(pairs)} pares de pergunta-resposta")
    
    # Prepara dados
    pairs, vocabulary = prepare_data(pairs, MAX_LENGTH)
    print(f"Vocabulário contém {vocabulary.n_words} palavras")
    
    # Divide dados
    train_pairs, val_pairs = train_test_split(pairs, test_size=0.1, random_state=42)
    
    # Cria datasets
    train_dataset = ChatDataset(train_pairs, vocabulary, MAX_LENGTH)
    val_dataset = ChatDataset(val_pairs, vocabulary, MAX_LENGTH)
    
    # Cria dataloaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    # Cria modelo
    encoder = Encoder(vocabulary.n_words, HIDDEN_SIZE, EMBEDDING_DIM, N_LAYERS, DROPOUT)
    decoder = Decoder(vocabulary.n_words, HIDDEN_SIZE, EMBEDDING_DIM, N_LAYERS, DROPOUT)
    model = Seq2Seq(encoder, decoder, device).to(device)
    
    print(f"Modelo criado com {sum(p.numel() for p in model.parameters())} parâmetros")
    
    # Treina modelo
    print("Iniciando treinamento...")
    train_losses, val_losses = train_model(model, train_loader, val_loader, NUM_EPOCHS, device)
    
    # Salva vocabulário
    with open('vocabulary.pkl', 'wb') as f:
        pickle.dump(vocabulary, f)
    
    # Plota perdas
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.savefig('training_loss.png')
    plt.show()
    
    # Carrega melhor modelo
    checkpoint = torch.load('best_chatbot_model.pth')
    model.load_state_dict(checkpoint['model_state_dict'])
    
    # Testa modelo
    print("\n" + "="*50)
    print("TESTANDO O CHATBOT")
    print("="*50)
    
    test_questions = [
        "oi",
        "como vai",
        "qual seu nome",
        "voce gosta de musica",
        "conte uma piada",
        "voce e feliz"
    ]
    
    for question in test_questions:
        response = respond(model, question, vocabulary, device)
        print(f"Pergunta: {question}")
        print(f"Resposta: {response}")
        print("-" * 30)
    
    return model, vocabulary

In [None]:
class Vocabulary:
    def __init__(self):
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
        self.n_words = 4  # Contando os tokens especiais

    def add_sentence(self, sentence):
        for word in sentence.split():
            self.add_word(word)

    def add_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1


In [16]:
# ============================
# 7. INTERFACE INTERATIVA
# ============================

def interactive_chat():
    """Interface interativa para conversar com o chatbot"""
    print("Carregando modelo...")

    # Importa load_dataset se necessário
    try:
        from datasets import load_dataset
    except ImportError:
        %pip install datasets
        from datasets import load_dataset
    
    # Carrega modelo e vocabulário
    try:
        with open(r'C:\Users\rosej\OneDrive\Desktop\IA - 25\IA\Chatbot\chat\vocabulary.pkl', 'rb') as f:
            vocabulary = pickle.load(f)
        
        # Recria modelo usando os mesmos hiperparâmetros do treinamento
        # Use as classes já definidas na célula 7 (não redefina!)
        encoder = Encoder(vocabulary.n_words, HIDDEN_SIZE, EMBEDDING_DIM, N_LAYERS, DROPOUT)
        decoder = Decoder(vocabulary.n_words, HIDDEN_SIZE, EMBEDDING_DIM, N_LAYERS, DROPOUT)
        model = Seq2Seq(encoder, decoder, device).to(device)

        # Carrega pesos
        checkpoint = torch.load(r'C:\Users\rosej\OneDrive\Desktop\IA - 25\IA\Chatbot\chat\best_chatbot_model.pth')
        model.load_state_dict(checkpoint['model_state_dict'])
        
        print("Modelo carregado com sucesso!")
        print("Digite 'sair' para terminar a conversa")
        print("="*50)
        
        while True:
            user_input = input("Você: ")
            if user_input.lower() == 'sair':
                print("Chatbot: Até logo! Foi um prazer conversar com você!")
                break
            
            response = respond(model, user_input, vocabulary, device)
            print(f"Chatbot: {response}")
            
    except FileNotFoundError:
        print("Modelo não encontrado! Execute o treinamento primeiro.")

if __name__ == "__main__":
    # print("Iniciando o processo de treinamento...")
    # main()
    # print("Processo finalizado.")
    # Uncomment para treinar o modelo
    # main()
    
    #Uncomment para usar interface interativa
    
    interactive_chat()
    
    # Para demo, execute o treinamento
    print("Para executar este código:")
    print("1. Descomente 'main()' para treinar o modelo")
    print("2. Após o treinamento, descomente 'interactive_chat()' para usar o chatbot")
    print("3. Ou execute células individuais conforme necessário")
    
 

Carregando modelo...
Modelo carregado com sucesso!
Digite 'sair' para terminar a conversa
Chatbot: what you you
Chatbot: fine
Chatbot: yes i like a
Chatbot: i like a a
Chatbot: what
Chatbot: Até logo! Foi um prazer conversar com você!
Para executar este código:
1. Descomente 'main()' para treinar o modelo
2. Após o treinamento, descomente 'interactive_chat()' para usar o chatbot
3. Ou execute células individuais conforme necessário


## Teste 2

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import re
import random

# ============================
# 0. CONFIGURAÇÃO
# ============================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Usando dispositivo: {device}")

# Hiperparâmetros (devem ser os mesmos do treinamento)
EMBEDDING_DIM = 128
HIDDEN_SIZE = 256
N_LAYERS = 2
DROPOUT = 0.05
MAX_LENGTH = 20

# ============================
# 1. PRÉ-PROCESSAMENTO E VOCABULÁRIO
# ============================

class Vocabulary:
    def __init__(self):
        self.word2index = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
        self.index2word = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
        self.word_count = {}
        self.n_words = 4

def preprocess_text(text):
    """Preprocessa o texto removendo caracteres especiais e normalizando."""
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def sentence_to_indexes(sentence, vocabulary):
    """Converte uma sentença para índices."""
    indexes = []
    for word in sentence.split():
        if word in vocabulary.word2index:
            indexes.append(vocabulary.word2index[word])
        else:
            indexes.append(vocabulary.word2index["<UNK>"])
    return indexes

# ============================
# 2. MODELO SEQ2SEQ COM ATENÇÃO (ARQUITETURA CORRIGIDA)
# ============================

class Encoder(nn.Module):
    def __init__(self, vocab_size, hidden_size, embedding_dim, n_layers=2, dropout=0.1):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, n_layers,
                           dropout=dropout, bidirectional=True, batch_first=True)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_seq, input_lengths):
        embedded = self.dropout(self.embedding(input_seq))
        packed = nn.utils.rnn.pack_padded_sequence(
            embedded, input_lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        packed_outputs, (hidden, cell) = self.lstm(packed)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs, batch_first=True)

        # Soma as saídas das duas direções (forward e backward)
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:]

        # Combina os hidden e cell states das duas direções
        hidden = hidden.view(self.n_layers, 2, -1, self.hidden_size).sum(1)
        cell = cell.view(self.n_layers, 2, -1, self.hidden_size).sum(1)

        return outputs, hidden, cell

class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Linear(hidden_size, 1, bias=False)

    def forward(self, hidden, encoder_outputs):
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat([hidden, encoder_outputs], dim=2)))
        attention_weights = self.v(energy).squeeze(2)
        return F.softmax(attention_weights, dim=1)

class Decoder(nn.Module):
    def __init__(self, vocab_size, hidden_size, embedding_dim, n_layers=2, dropout=0.1):
        super(Decoder, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.attention = Attention(hidden_size)

        # CORREÇÃO: A entrada da LSTM deve ser embedding_dim + hidden_size
        # Esta era a principal fonte de erro ao carregar o modelo.
        self.lstm = nn.LSTM(embedding_dim + hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)

        self.out = nn.Linear(hidden_size * 2, vocab_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_token, hidden, cell, encoder_outputs):
        embedded = self.dropout(self.embedding(input_token))
        
        # Usa o último hidden state do decoder para a atenção
        last_hidden = hidden[-1]
        attention_weights = self.attention(last_hidden, encoder_outputs).unsqueeze(1)
        
        context = torch.bmm(attention_weights, encoder_outputs)
        
        lstm_input = torch.cat([embedded, context], dim=2)
        
        lstm_output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
        
        output = torch.cat([lstm_output.squeeze(1), context.squeeze(1)], dim=1)
        output = self.out(output)
        
        return output, hidden, cell, attention_weights

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
    
    # O forward completo não é necessário para a inferência
    def forward(self, src, src_len, trg, teacher_forcing_ratio=0.5):
        pass


# ============================
# 3. FUNÇÃO DE INFERÊNCIA
# ============================

def respond(model, question, vocabulary, device, max_length=20):
    model.eval()
    with torch.no_grad():
        question = preprocess_text(question)
        question_indexes = sentence_to_indexes(question, vocabulary)
        question_indexes = [vocabulary.word2index["<SOS>"]] + question_indexes + [vocabulary.word2index["<EOS>"]]
        
        src = torch.tensor(question_indexes, dtype=torch.long).unsqueeze(0).to(device)
        src_len = torch.tensor([len(question_indexes)], dtype=torch.long).to(device)
        
        encoder_outputs, hidden, cell = model.encoder(src, src_len)
        
        input_token = torch.tensor([[vocabulary.word2index["<SOS>"]]], dtype=torch.long).to(device)
        response_indexes = []
        
        for _ in range(max_length):
            output, hidden, cell, attention = model.decoder(input_token, hidden, cell, encoder_outputs)
            
            predicted_token = output.argmax(1).item()
            
            if predicted_token == vocabulary.word2index["<EOS>"]:
                break
            
            response_indexes.append(predicted_token)
            input_token = torch.tensor([[predicted_token]], dtype=torch.long).to(device)
            
        response_words = [vocabulary.index2word.get(idx, "<UNK>") for idx in response_indexes]
        
        return ' '.join(response_words)

# ============================
# 4. EXECUÇÃO PRINCIPAL
# ============================

def interactive_chat():
    """Interface interativa para conversar com o chatbot."""
    try:
        print("Carregando modelo e vocabulário...")
        # Carregue o vocabulário
        with open('vocabulary.pkl', 'rb') as f:
            vocabulary = pickle.load(f)
        
        # Recrie o modelo com a arquitetura correta
        encoder = Encoder(vocabulary.n_words, HIDDEN_SIZE, EMBEDDING_DIM, N_LAYERS, DROPOUT)
        decoder = Decoder(vocabulary.n_words, HIDDEN_SIZE, EMBEDDING_DIM, N_LAYERS, DROPOUT)
        model = Seq2Seq(encoder, decoder, device).to(device)
        
        # Carregue os pesos do modelo treinado
        checkpoint = torch.load('best_chatbot_model.pth', map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        
        print("✅ Modelo carregado com sucesso!")
        print("💬 Digite 'sair' para terminar a conversa.")
        print("=" * 50)
        
        while True:
            user_input = input("Você: ")
            if user_input.lower() == 'sair':
                print("Chatbot: Até logo! Foi um prazer conversar com você!")
                break
            
            response = respond(model, user_input, vocabulary, device)
            print(f"Chatbot: {response}")
            
if __name__ == "__main__":
    interactive_chat()

SyntaxError: expected 'except' or 'finally' block (2333880912.py, line 209)