In [1]:
import torch

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# Exemplo rápido para confirmar que está usando a GPU
x = torch.rand(5, 5).to(device)
y = torch.rand(5, 5).to(device)
z = x @ y
print("Tensor on GPU:", z.is_cuda)

Number of GPU:  1
GPU Name:  NVIDIA GeForce RTX 3050 Laptop GPU
Using device: cuda
Tensor on GPU: True


In [None]:
import torch
import os
import pandas as pd
import re
import numpy as np
import pickle
import random
from tqdm import tqdm

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import spacy
import nltk
from collections import Counter

In [None]:
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    print("Downloading 'punkt' NLTK package...")
    nltk.download('punkt')
try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    print("Downloading 'wordnet' NLTK package...")
    nltk.download('wordnet')

In [None]:
"""
Conta o número total de músicas a partir de arquivos CSV na pasta 'csv',
antes e depois da limpeza, filtrando letras vazias e uma frase de placeholder específica.
"""
CSV_FOLDER_PATH = 'csv'
PLACEHOLDER_PHRASE = "lyrics for this song have yet to be released please check back once the song has been released"

def clean_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^a-z0-9\s.,!?;:\'\-]', '', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text
    return ""

total_songs_raw = 0
total_songs_after_cleaning = 0

print("Iniciando a contagem de músicas em todos os arquivos CSV...")

for filename in os.listdir(CSV_FOLDER_PATH):
    if filename.endswith(".csv"):
        csv_path = os.path.join(CSV_FOLDER_PATH, filename)
        print(f"Processando arquivo: {filename}")
        
        try:
            df = pd.read_csv(csv_path, usecols=['Lyric'])
            
            total_songs_raw += len(df)
            
            df['Cleaned_Lyric'] = df['Lyric'].apply(clean_text)
            
            df_filtered = df[df['Cleaned_Lyric'] != '']
            
            df_filtered = df_filtered[~df_filtered['Cleaned_Lyric'].str.contains(PLACEHOLDER_PHRASE, na=False)]
            
            total_songs_after_cleaning += len(df_filtered)
            
        except KeyError:
            print(f"Aviso: Coluna 'Lyric' não encontrada em {filename}. Pulando este arquivo.")
        except Exception as e:
            print(f"Erro ao processar {filename}: {e}. Pulando este arquivo.")

print("\n--- Resultados ---")
print(f"Número total de músicas (antes da limpeza): {total_songs_raw}")
print(f"Número de letras válidas (após remover placeholder e vazias): {total_songs_after_cleaning}")

Iniciando a contagem de músicas em todos os arquivos CSV...
Processando arquivo: ArianaGrande.csv
Processando arquivo: Beyonce.csv
Processando arquivo: BillieEilish.csv
Processando arquivo: BTS.csv
Processando arquivo: CardiB.csv
Processando arquivo: CharliePuth.csv
Processando arquivo: ColdPlay.csv
Processando arquivo: Drake.csv
Processando arquivo: DuaLipa.csv
Processando arquivo: EdSheeran.csv
Processando arquivo: Eminem.csv
Processando arquivo: JustinBieber.csv
Processando arquivo: KatyPerry.csv
Processando arquivo: Khalid.csv
Processando arquivo: LadyGaga.csv
Processando arquivo: Maroon5.csv
Processando arquivo: NickiMinaj.csv
Processando arquivo: PostMalone.csv
Processando arquivo: Rihanna.csv
Processando arquivo: SelenaGomez.csv
Processando arquivo: TaylorSwift.csv

--- Resultados ---
Número total de músicas (antes da limpeza): 6027
Número de letras válidas (após remover placeholder e vazias): 5752


## Separação dos dados

In [None]:
"""
Configura os parâmetros para o pré-processamento de um artista específico,
define as funções e a classe de tokenização necessárias, processa as letras
do artista em sequências de caracteres e salva os conjuntos de dados de treino,
validação e teste, bem como o tokenizador.
"""
# --- Configurações Específicas desta Célula ---
SEQUENCE_LENGTH = 100
BASE_OUTPUT_DIR = 'processed_data_by_artist_char_split'
TOKENIZER_SUBDIR = 'tokenizers'
DATA_SUBDIR = 'data'
TARGET_ARTIST = 'ArianaGrande'
NUM_SONGS_TO_PROCESS = -1
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1
RANDOM_SEED = 42

# --- Geração de Nomes e Estrutura de Pastas ---
CLEAN_TARGET_ARTIST_NAME = TARGET_ARTIST.replace(" ", "_").replace(".", "").lower()
CSV_FILE_PATH = os.path.join(CSV_FOLDER_PATH, f'{TARGET_ARTIST}.csv')
OUTPUT_DIR_ARTIST = os.path.join(BASE_OUTPUT_DIR, CLEAN_TARGET_ARTIST_NAME)
os.makedirs(os.path.join(OUTPUT_DIR_ARTIST, TOKENIZER_SUBDIR), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR_ARTIST, DATA_SUBDIR), exist_ok=True)
random.seed(RANDOM_SEED)

# --- Funções e Classes Auxiliares Novas ---
def save_object(obj, path):
    with open(path, 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"Objeto salvo em: {path}")

def load_object(path):
    if os.path.exists(path):
        with open(path, 'rb') as handle:
            obj = pickle.load(handle)
        print(f"Objeto carregado de: {path}")
        return obj
    return None

class CharacterTokenizer:
    def __init__(self):
        self.char_to_int = {}
        self.int_to_char = {}
        self.vocab_size = 0

    def fit_on_texts(self, texts):
        all_chars = "".join(texts) if isinstance(texts, list) else texts
        unique_chars = sorted(list(set(all_chars)))
        self.char_to_int = {char: i + 1 for i, char in enumerate(unique_chars)}
        self.int_to_char = {i + 1: char for i, char in enumerate(unique_chars)}
        self.vocab_size = len(self.char_to_int) + 1

    def texts_to_sequences(self, texts):
        if isinstance(texts, str):
            return [self.char_to_int.get(char, 0) for char in texts]
        elif isinstance(texts, list):
            return [[self.char_to_int.get(char, 0) for char in s] for s in texts]
        return []

    def sequences_to_texts(self, sequences):
        if sequences and isinstance(sequences[0], list):
            return ["".join([self.int_to_char.get(i, '') for i in seq]) for seq in sequences]
        else:
            return "".join([self.int_to_char.get(i, '') for i in sequences])

# --- Lógica Principal de Processamento ---
print(f"Coletando e limpando as letras para o artista '{TARGET_ARTIST}'...")
all_lyrics_for_split = []
try:
    df_artist = pd.read_csv(CSV_FILE_PATH, usecols=['Lyric'])
    
    lyrics_to_process_df = df_artist
    if NUM_SONGS_TO_PROCESS != -1 and NUM_SONGS_TO_PROCESS < len(df_artist):
        lyrics_to_process_df = df_artist.sample(n=NUM_SONGS_TO_PROCESS, random_state=RANDOM_SEED)
        print(f"Selecionando aleatoriamente {NUM_SONGS_TO_PROCESS} músicas de '{TARGET_ARTIST}'.")
    else:
        print(f"Processando todas as {len(df_artist)} músicas de '{TARGET_ARTIST}'.")

    for lyric_val in tqdm(lyrics_to_process_df['Lyric'], desc=f"Limpando letras de {TARGET_ARTIST}"):
        cleaned_lyric = clean_text(lyric_val)
        if cleaned_lyric and PLACEHOLDER_PHRASE not in cleaned_lyric:
            all_lyrics_for_split.append(cleaned_lyric)

except FileNotFoundError:
    print(f"Erro: Arquivo CSV não encontrado em '{CSV_FILE_PATH}'. Verifique o caminho e nome do arquivo.")
except KeyError:
    print("Erro: Coluna 'Lyric' não encontrada no CSV. Verifique o nome da coluna (case-sensitive).")

print(f"Total de letras válidas coletadas: {len(all_lyrics_for_split)}")

random.shuffle(all_lyrics_for_split)
num_train = int(len(all_lyrics_for_split) * TRAIN_SPLIT)
num_val = int(len(all_lyrics_for_split) * VAL_SPLIT)
train_lyrics = all_lyrics_for_split[:num_train]
val_lyrics = all_lyrics_for_split[num_train : num_train + num_val]
test_lyrics = all_lyrics_for_split[num_train + num_val :]

print(f"\nDivisão das músicas: Treino={len(train_lyrics)}, Validação={len(val_lyrics)}, Teste={len(test_lyrics)}")

print("\nConstruindo vocabulário de caracteres com as letras de TREINO...")
tokenizer = CharacterTokenizer()
tokenizer.fit_on_texts(" ".join(train_lyrics))
total_chars = tokenizer.vocab_size
print(f"Vocabulário construído. Tamanho do vocabulário: {total_chars}")

def create_sequences_from_lyrics_list(lyrics_list, tokenizer, sequence_length, description):
    all_sequences, all_next_chars = [], []
    print(f"\nCriando sequências para o {description}...")
    for lyric in tqdm(lyrics_list, desc=f"Processando músicas do {description}"):
        tokenized_lyric = tokenizer.texts_to_sequences(lyric)
        if len(tokenized_lyric) < sequence_length + 1:
            continue
        for i in range(len(tokenized_lyric) - sequence_length):
            seq = tokenized_lyric[i : i + sequence_length]
            next_c = tokenized_lyric[i + sequence_length]
            all_sequences.append(seq)
            all_next_chars.append(next_c)
    print(f"Total de sequências criadas para {description}: {len(all_sequences)}")
    if not all_sequences:
        return np.array([]), np.array([])
    return np.array(all_sequences, dtype=np.int32), np.array(all_next_chars, dtype=np.int32)

X_train, y_train = create_sequences_from_lyrics_list(train_lyrics, tokenizer, SEQUENCE_LENGTH, "Conjunto de Treino")
X_val, y_val = create_sequences_from_lyrics_list(val_lyrics, tokenizer, SEQUENCE_LENGTH, "Conjunto de Validação")
X_test, y_test = create_sequences_from_lyrics_list(test_lyrics, tokenizer, SEQUENCE_LENGTH, "Conjunto de Teste")

print(f"\nShapes finais dos dados tokenizados:")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

print("\nSalvando dados processados e tokenizador...")
TOKENIZER_SAVE_PATH = os.path.join(OUTPUT_DIR_ARTIST, TOKENIZER_SUBDIR, f'char_tokenizer_{CLEAN_TARGET_ARTIST_NAME}.pkl')
save_object(tokenizer, TOKENIZER_SAVE_PATH)

output_file_x_train = os.path.join(OUTPUT_DIR_ARTIST, DATA_SUBDIR, f'X_train_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
output_file_y_train = os.path.join(OUTPUT_DIR_ARTIST, DATA_SUBDIR, f'y_train_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
np.save(output_file_x_train, X_train)
np.save(output_file_y_train, y_train)
print(f"Dados de TREINO salvos.")

output_file_x_val = os.path.join(OUTPUT_DIR_ARTIST, DATA_SUBDIR, f'X_val_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
output_file_y_val = os.path.join(OUTPUT_DIR_ARTIST, DATA_SUBDIR, f'y_val_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
np.save(output_file_x_val, X_val)
np.save(output_file_y_val, y_val)
print(f"Dados de VALIDAÇÃO salvos.")

output_file_x_test = os.path.join(OUTPUT_DIR_ARTIST, DATA_SUBDIR, f'X_test_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
output_file_y_test = os.path.join(OUTPUT_DIR_ARTIST, DATA_SUBDIR, f'y_test_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
np.save(output_file_x_test, X_test)
np.save(output_file_y_test, y_test)
print(f"Dados de TESTE salvos.")

output_file_vocab_size = os.path.join(OUTPUT_DIR_ARTIST, TOKENIZER_SUBDIR, f'vocab_size_{CLEAN_TARGET_ARTIST_NAME}_char.pkl')
save_object(total_chars, output_file_vocab_size)

print(f"\nPré-processamento para o artista '{TARGET_ARTIST}' concluído.")
print(f"Dados tokenizados salvos no diretório: {OUTPUT_DIR_ARTIST}")

Passo 1-3: Coletando e limpando as letras para o artista 'ArianaGrande' do arquivo 'csv\ArianaGrande.csv'...
Processando todas as 308 músicas de 'ArianaGrande'.


Limpando e filtrando letras de ArianaGrande: 100%|██████████| 308/308 [00:00<00:00, 5225.22it/s]


Total de letras iniciais: 308
Total de letras coletadas e limpas para 'ArianaGrande': 294

Divisão das músicas:
Treino: 235 músicas
Validação: 29 músicas
Teste: 30 músicas

Passo 4: Construindo vocabulário de caracteres com as letras de TREINO...
Vocabulário de caracteres construído. Tamanho do vocabulário: 36
Objeto salvo em: processed_data_by_artist_char_split\arianagrande\tokenizers\char_tokenizer_arianagrande.pkl

Conjunto de Treino: Criando sequências e tokenizando...


Processando Conjunto de Treino músicas: 100%|██████████| 235/235 [00:01<00:00, 148.13it/s]


Total de caracteres processados no Conjunto de Treino: 383150
Total de sequências criadas para Conjunto de Treino: 360363

Conjunto de Validação: Criando sequências e tokenizando...


Processando Conjunto de Validação músicas: 100%|██████████| 29/29 [00:00<00:00, 72.90it/s]


Total de caracteres processados no Conjunto de Validação: 42771
Total de sequências criadas para Conjunto de Validação: 39968

Conjunto de Teste: Criando sequências e tokenizando...


Processando Conjunto de Teste músicas: 100%|██████████| 30/30 [00:00<00:00, 281.56it/s]


Total de caracteres processados no Conjunto de Teste: 53235
Total de sequências criadas para Conjunto de Teste: 50300

Shapes finais dos dados tokenizados:
X_train shape: (360363, 100), y_train shape: (360363,)
X_val shape: (39968, 100), y_val shape: (39968,)
X_test shape: (50300, 100), y_test shape: (50300,)
Dados de TREINO salvos em processed_data_by_artist_char_split\arianagrande\data\X_train_arianagrande_char.npy e processed_data_by_artist_char_split\arianagrande\data\y_train_arianagrande_char.npy
Dados de VALIDAÇÃO salvos em processed_data_by_artist_char_split\arianagrande\data\X_val_arianagrande_char.npy e processed_data_by_artist_char_split\arianagrande\data\y_val_arianagrande_char.npy
Dados de TESTE salvos em processed_data_by_artist_char_split\arianagrande\data\X_test_arianagrande_char.npy e processed_data_by_artist_char_split\arianagrande\data\y_test_arianagrande_char.npy
Objeto salvo em: processed_data_by_artist_char_split\arianagrande\tokenizers\vocab_size_arianagrande_char

## Treino

In [None]:
"""
Configura os parâmetros de treinamento, define as classes de Dataset e do
modelo LSTM, carrega os dados pré-processados e inicializa os componentes
para o treinamento do modelo de geração de texto para o artista alvo.
"""
# --- Configurações do Treinamento ---
MODEL_DIR_BASE = 'models_by_artist_char_split'
BATCH_SIZE = 128
EPOCHS = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

# --- Caminhos Específicos do Modelo ---
MODEL_DIR_ARTIST = os.path.join(MODEL_DIR_BASE, CLEAN_TARGET_ARTIST_NAME)
os.makedirs(MODEL_DIR_ARTIST, exist_ok=True)

# --- Definição do Dataset e Modelo PyTorch ---
class LyricsDataset(Dataset):
    def __init__(self, x_tokens_path, y_tokens_path):
        self.x_data = np.load(x_tokens_path, mmap_mode='r')
        self.y_data = np.load(y_tokens_path, mmap_mode='r')

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        x_sample = torch.tensor(self.x_data[idx], dtype=torch.long)
        y_sample = torch.tensor(self.y_data[idx], dtype=torch.long)
        return x_sample, y_sample

class CharLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, n_layers, dropout_rate, dropout_strategy="none"):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.dropout_strategy = dropout_strategy
        self.dropout_rate = dropout_rate
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        lstm_dropout_param = dropout_rate if dropout_strategy == "between_lstm" and n_layers > 1 else 0.0
        self.lstm = nn.LSTM(embedding_dim,
                              hidden_dim,
                              n_layers,
                              dropout=lstm_dropout_param,
                              batch_first=True,
                              bidirectional=True)
        
        self.dropout_layer = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_dim * 2, vocab_size)

    def forward(self, text):
        embedded = self.embedding(text)
        output, (hidden, cell) = self.lstm(embedded)
        final_lstm_output = output[:, -1, :]

        if self.dropout_strategy == "before_fc":
            final_lstm_output = self.dropout_layer(final_lstm_output)
        
        prediction = self.fc(final_lstm_output)
        return prediction

# --- Preparação para o Treinamento do Artista Alvo ---
print(f"\nIniciando preparação para treinamento do artista: '{TARGET_ARTIST}'...")

PROCESSED_DATA_DIR_ARTIST = os.path.join(BASE_OUTPUT_DIR, CLEAN_TARGET_ARTIST_NAME)
TOKENIZER_PATH = os.path.join(PROCESSED_DATA_DIR_ARTIST, TOKENIZER_SUBDIR, f'char_tokenizer_{CLEAN_TARGET_ARTIST_NAME}.pkl')
X_train_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'X_train_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
y_train_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'y_train_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
X_val_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'X_val_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
y_val_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'y_val_{CLEAN_TARGET_ARTIST_NAME}_char.npy')

if not all(os.path.exists(p) for p in [X_train_file_path, y_train_file_path, X_val_file_path, y_val_file_path]):
    print(f"Dados tokenizados para '{TARGET_ARTIST}' não encontrados. Execute o pré-processamento primeiro.")
else:
    try:
        tokenizer = load_object(TOKENIZER_PATH)
        total_chars = load_object(os.path.join(PROCESSED_DATA_DIR_ARTIST, TOKENIZER_SUBDIR, f'vocab_size_{CLEAN_TARGET_ARTIST_NAME}_char.pkl'))
        print(f"Vocabulário de caracteres carregado. Tamanho: {total_chars}")

        train_dataset = LyricsDataset(X_train_file_path, y_train_file_path)
        val_dataset = LyricsDataset(X_val_file_path, y_val_file_path)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

        print(f"Dataset de treino: {len(train_dataset)} amostras, {len(train_loader)} batches.")
        print(f"Dataset de validação: {len(val_dataset)} amostras, {len(val_loader)} batches.")

    except FileNotFoundError as e:
        print(e)

# --- Hiperparâmetros do Modelo ---
EMBEDDING_DIM = 256
HIDDEN_DIM = 256
N_LAYERS = 4
DROPOUT_RATE = 0.2

Usando dispositivo: cuda

Iniciando treinamento de nível de caractere para o artista: 'ArianaGrande'...
Objeto carregado de: processed_data_by_artist_char_split\arianagrande\tokenizers\char_tokenizer_arianagrande.pkl
Objeto carregado de: processed_data_by_artist_char_split\arianagrande\tokenizers\vocab_size_arianagrande_char.pkl
Vocabulário de caracteres carregado. Tamanho do vocabulário: 36
Dataset de treino com 360678 amostras, 2818 batches.
Dataset de validação com 44889 amostras, 351 batches.


### Sem Dropout

In [None]:
"""
Instancia o modelo LSTM com uma estratégia de dropout específica, executa
o loop de treinamento e validação, salva o melhor modelo com base na
perda de validação e armazena o histórico completo de treinamento em um
arquivo pickle para análise posterior.
"""
CURRENT_DROPOUT_STRATEGY = "none"

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY
).to(device)

print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

best_val_loss = float('inf')
model_save_filename = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, model_save_filename)

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

print(f"Iniciando treinamento para '{TARGET_ARTIST}' com estratégia de dropout: '{CURRENT_DROPOUT_STRATEGY}'...")
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    train_loss = train_loss / len(train_dataset)
    train_accuracy = correct_predictions / total_predictions

    model.eval()
    val_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Validation]"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    val_loss = val_loss / len(val_dataset)
    val_accuracy = correct_predictions / total_predictions

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)

    print(f"Epoch {epoch+1}/{EPOCHS}: Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), model_save_filepath)
        print(f"Modelo salvo: Melhor val_loss em {best_val_loss:.4f}")

history = {
    'train_loss': train_losses,
    'val_loss': val_losses,
    'train_accuracy': train_accuracies,
    'val_accuracy': val_accuracies,
    'dropout_strategy': CURRENT_DROPOUT_STRATEGY
}
history_filename = f"training_history_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}.pkl"
history_filepath = os.path.join(MODEL_DIR_ARTIST, history_filename)
with open(history_filepath, 'wb') as f:
    pickle.dump(history, f)
print(f"\nHistórico de treinamento salvo em: {history_filepath}")

print(f"\nTreinamento para '{TARGET_ARTIST}' com estratégia '{CURRENT_DROPOUT_STRATEGY}' concluído.")
print(f"O melhor modelo foi salvo em: {model_save_filepath}")

CharLSTM(
  (embedding): Embedding(36, 256)
  (lstm): LSTM(256, 256, num_layers=4, batch_first=True, bidirectional=True)
  (dropout_layer): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=512, out_features=36, bias=True)
)
Iniciando treinamento para 'ArianaGrande' com estratégia de dropout: 'none'...


Epoch 1/10 [Train]: 100%|██████████| 2818/2818 [04:53<00:00,  9.60it/s]
Epoch 1/10 [Validation]: 100%|██████████| 351/351 [00:13<00:00, 26.81it/s]


Epoch 1/10: Train Loss: 1.5494, Train Acc: 0.5353, Val Loss: 1.3719, Val Acc: 0.5837
Modelo salvo: Melhor val_loss em 1.3719


Epoch 2/10 [Train]: 100%|██████████| 2818/2818 [04:55<00:00,  9.52it/s]
Epoch 2/10 [Validation]: 100%|██████████| 351/351 [00:16<00:00, 21.34it/s]


Epoch 2/10: Train Loss: 1.1019, Train Acc: 0.6655, Val Loss: 1.2388, Val Acc: 0.6236
Modelo salvo: Melhor val_loss em 1.2388


Epoch 3/10 [Train]: 100%|██████████| 2818/2818 [05:03<00:00,  9.30it/s]
Epoch 3/10 [Validation]: 100%|██████████| 351/351 [00:13<00:00, 26.97it/s]


Epoch 3/10: Train Loss: 0.8778, Train Acc: 0.7334, Val Loss: 1.2101, Val Acc: 0.6423
Modelo salvo: Melhor val_loss em 1.2101


Epoch 4/10 [Train]: 100%|██████████| 2818/2818 [04:52<00:00,  9.65it/s]
Epoch 4/10 [Validation]: 100%|██████████| 351/351 [00:12<00:00, 27.51it/s]


Epoch 4/10: Train Loss: 0.7146, Train Acc: 0.7834, Val Loss: 1.1789, Val Acc: 0.6602
Modelo salvo: Melhor val_loss em 1.1789


Epoch 5/10 [Train]: 100%|██████████| 2818/2818 [04:51<00:00,  9.66it/s]
Epoch 5/10 [Validation]: 100%|██████████| 351/351 [00:13<00:00, 26.89it/s]


Epoch 5/10: Train Loss: 0.5961, Train Acc: 0.8191, Val Loss: 1.1791, Val Acc: 0.6742


Epoch 6/10 [Train]: 100%|██████████| 2818/2818 [04:51<00:00,  9.66it/s]
Epoch 6/10 [Validation]: 100%|██████████| 351/351 [00:12<00:00, 27.10it/s]


Epoch 6/10: Train Loss: 0.5052, Train Acc: 0.8461, Val Loss: 1.2145, Val Acc: 0.6778


Epoch 7/10 [Train]: 100%|██████████| 2818/2818 [04:52<00:00,  9.62it/s]
Epoch 7/10 [Validation]: 100%|██████████| 351/351 [00:13<00:00, 26.89it/s]


Epoch 7/10: Train Loss: 0.4396, Train Acc: 0.8650, Val Loss: 1.2523, Val Acc: 0.6845


Epoch 8/10 [Train]: 100%|██████████| 2818/2818 [04:52<00:00,  9.63it/s]
Epoch 8/10 [Validation]: 100%|██████████| 351/351 [00:12<00:00, 27.19it/s]


Epoch 8/10: Train Loss: 0.3890, Train Acc: 0.8801, Val Loss: 1.2992, Val Acc: 0.6896


Epoch 9/10 [Train]: 100%|██████████| 2818/2818 [05:10<00:00,  9.07it/s]
Epoch 9/10 [Validation]: 100%|██████████| 351/351 [00:14<00:00, 24.59it/s]


Epoch 9/10: Train Loss: 0.3548, Train Acc: 0.8898, Val Loss: 1.3483, Val Acc: 0.6879


Epoch 10/10 [Train]: 100%|██████████| 2818/2818 [05:13<00:00,  8.99it/s]
Epoch 10/10 [Validation]: 100%|██████████| 351/351 [00:14<00:00, 24.18it/s]


Epoch 10/10: Train Loss: 0.3242, Train Acc: 0.8990, Val Loss: 1.3644, Val Acc: 0.6946

Histórico de treinamento salvo em: models_by_artist_char_split\arianagrande\training_history_arianagrande_strategy_none.pkl

Treinamento para 'ArianaGrande' com estratégia 'none' concluído.
O melhor modelo foi salvo em: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_none_best.pt


### Depois de cada camada densa

In [None]:
"""
Instancia o modelo LSTM com uma estratégia de dropout específica, executa
o loop de treinamento e validação, salva o melhor modelo com base na
perda de validação e armazena o histórico completo de treinamento em um
arquivo pickle para análise posterior.
"""
CURRENT_DROPOUT_STRATEGY = "between_lstm"

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY
).to(device)

print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

best_val_loss = float('inf')
model_save_filename = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, model_save_filename)

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

print(f"Iniciando treinamento para '{TARGET_ARTIST}' com estratégia de dropout: '{CURRENT_DROPOUT_STRATEGY}'...")
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    train_loss = train_loss / len(train_dataset)
    train_accuracy = correct_predictions / total_predictions

    model.eval()
    val_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Validation]"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    val_loss = val_loss / len(val_dataset)
    val_accuracy = correct_predictions / total_predictions

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)

    print(f"Epoch {epoch+1}/{EPOCHS}: Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), model_save_filepath)
        print(f"Modelo salvo: Melhor val_loss em {best_val_loss:.4f}")

history = {
    'train_loss': train_losses,
    'val_loss': val_losses,
    'train_accuracy': train_accuracies,
    'val_accuracy': val_accuracies,
    'dropout_strategy': CURRENT_DROPOUT_STRATEGY
}
history_filename = f"training_history_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}.pkl"
history_filepath = os.path.join(MODEL_DIR_ARTIST, history_filename)
with open(history_filepath, 'wb') as f:
    pickle.dump(history, f)
print(f"\nHistórico de treinamento salvo em: {history_filepath}")

print(f"\nTreinamento para '{TARGET_ARTIST}' com estratégia '{CURRENT_DROPOUT_STRATEGY}' concluído.")
print(f"O melhor modelo foi salvo em: {model_save_filepath}")

CharLSTM(
  (embedding): Embedding(36, 256)
  (lstm): LSTM(256, 256, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
  (dropout_layer): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=512, out_features=36, bias=True)
)
Iniciando treinamento para 'ArianaGrande' com estratégia de dropout: 'between_lstm'...


Epoch 1/10 [Train]: 100%|██████████| 2827/2827 [12:59<00:00,  3.63it/s]
Epoch 1/10 [Validation]: 100%|██████████| 364/364 [00:31<00:00, 11.40it/s]


Epoch 1/10: Train Loss: 1.5248, Train Acc: 0.5418, Val Loss: 1.3570, Val Acc: 0.5895
Modelo salvo: Melhor val_loss em 1.3570


Epoch 2/10 [Train]: 100%|██████████| 2827/2827 [12:56<00:00,  3.64it/s]
Epoch 2/10 [Validation]: 100%|██████████| 364/364 [00:32<00:00, 11.29it/s]


Epoch 2/10: Train Loss: 1.1012, Train Acc: 0.6639, Val Loss: 1.2299, Val Acc: 0.6276
Modelo salvo: Melhor val_loss em 1.2299


Epoch 3/10 [Train]: 100%|██████████| 2827/2827 [12:56<00:00,  3.64it/s]
Epoch 3/10 [Validation]: 100%|██████████| 364/364 [00:31<00:00, 11.44it/s]


Epoch 3/10: Train Loss: 0.9286, Train Acc: 0.7153, Val Loss: 1.1940, Val Acc: 0.6458
Modelo salvo: Melhor val_loss em 1.1940


Epoch 4/10 [Train]: 100%|██████████| 2827/2827 [13:04<00:00,  3.60it/s]
Epoch 4/10 [Validation]: 100%|██████████| 364/364 [00:32<00:00, 11.28it/s]


Epoch 4/10: Train Loss: 0.8194, Train Acc: 0.7479, Val Loss: 1.1849, Val Acc: 0.6566
Modelo salvo: Melhor val_loss em 1.1849


Epoch 5/10 [Train]: 100%|██████████| 2827/2827 [13:11<00:00,  3.57it/s]
Epoch 5/10 [Validation]: 100%|██████████| 364/364 [00:32<00:00, 11.05it/s]


Epoch 5/10: Train Loss: 0.7491, Train Acc: 0.7688, Val Loss: 1.1979, Val Acc: 0.6612


Epoch 6/10 [Train]: 100%|██████████| 2827/2827 [13:10<00:00,  3.58it/s]
Epoch 6/10 [Validation]: 100%|██████████| 364/364 [00:33<00:00, 10.94it/s]


Epoch 6/10: Train Loss: 0.6984, Train Acc: 0.7835, Val Loss: 1.1988, Val Acc: 0.6662


Epoch 7/10 [Train]: 100%|██████████| 2827/2827 [13:18<00:00,  3.54it/s]
Epoch 7/10 [Validation]: 100%|██████████| 364/364 [00:32<00:00, 11.12it/s]


Epoch 7/10: Train Loss: 0.6610, Train Acc: 0.7951, Val Loss: 1.2160, Val Acc: 0.6704


Epoch 8/10 [Train]: 100%|██████████| 2827/2827 [12:07<00:00,  3.88it/s]
Epoch 8/10 [Validation]: 100%|██████████| 364/364 [00:29<00:00, 12.19it/s]


Epoch 8/10: Train Loss: 0.6362, Train Acc: 0.8019, Val Loss: 1.1876, Val Acc: 0.6760


Epoch 9/10 [Train]: 100%|██████████| 2827/2827 [07:27<00:00,  6.31it/s]
Epoch 9/10 [Validation]: 100%|██████████| 364/364 [00:13<00:00, 26.32it/s]


Epoch 9/10: Train Loss: 0.6109, Train Acc: 0.8097, Val Loss: 1.2188, Val Acc: 0.6718


Epoch 10/10 [Train]: 100%|██████████| 2827/2827 [05:23<00:00,  8.73it/s]
Epoch 10/10 [Validation]: 100%|██████████| 364/364 [00:16<00:00, 22.54it/s]

Epoch 10/10: Train Loss: 0.5958, Train Acc: 0.8143, Val Loss: 1.2061, Val Acc: 0.6772

Histórico de treinamento salvo em: models_by_artist_char_split\arianagrande\training_history_arianagrande_strategy_between_lstm.pkl

Treinamento para 'ArianaGrande' com estratégia 'between_lstm' concluído.
O melhor modelo foi salvo em: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_between_lstm_best.pt





### Antes da camada de saída

In [None]:
"""
Instancia o modelo LSTM com uma estratégia de dropout específica, executa
o loop de treinamento e validação, salva o melhor modelo com base na
perda de validação e armazena o histórico completo de treinamento em um
arquivo pickle para análise posterior.
"""
CURRENT_DROPOUT_STRATEGY = "before_fc"

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY
).to(device)

print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

best_val_loss = float('inf')
model_save_filename = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, model_save_filename)

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

print(f"Iniciando treinamento para '{TARGET_ARTIST}' com estratégia de dropout: '{CURRENT_DROPOUT_STRATEGY}'...")
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    train_loss = train_loss / len(train_dataset)
    train_accuracy = correct_predictions / total_predictions

    model.eval()
    val_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Validation]"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    val_loss = val_loss / len(val_dataset)
    val_accuracy = correct_predictions / total_predictions

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)

    print(f"Epoch {epoch+1}/{EPOCHS}: Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), model_save_filepath)
        print(f"Modelo salvo: Melhor val_loss em {best_val_loss:.4f}")

history = {
    'train_loss': train_losses,
    'val_loss': val_losses,
    'train_accuracy': train_accuracies,
    'val_accuracy': val_accuracies,
    'dropout_strategy': CURRENT_DROPOUT_STRATEGY
}
history_filename = f"training_history_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}.pkl"
history_filepath = os.path.join(MODEL_DIR_ARTIST, history_filename)
with open(history_filepath, 'wb') as f:
    pickle.dump(history, f)
print(f"\nHistórico de treinamento salvo em: {history_filepath}")

print(f"\nTreinamento para '{TARGET_ARTIST}' com estratégia '{CURRENT_DROPOUT_STRATEGY}' concluído.")
print(f"O melhor modelo foi salvo em: {model_save_filepath}")

CharLSTM(
  (embedding): Embedding(36, 256)
  (lstm): LSTM(256, 256, num_layers=4, batch_first=True, bidirectional=True)
  (dropout_layer): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=512, out_features=36, bias=True)
)
Iniciando treinamento para 'ArianaGrande' com estratégia de dropout: 'before_fc'...


Epoch 1/10 [Train]: 100%|██████████| 2827/2827 [05:23<00:00,  8.73it/s]
Epoch 1/10 [Validation]: 100%|██████████| 364/364 [00:13<00:00, 27.19it/s]


Epoch 1/10: Train Loss: 1.5346, Train Acc: 0.5407, Val Loss: 1.3476, Val Acc: 0.5914
Modelo salvo: Melhor val_loss em 1.3476


Epoch 2/10 [Train]: 100%|██████████| 2827/2827 [05:06<00:00,  9.22it/s]
Epoch 2/10 [Validation]: 100%|██████████| 364/364 [00:14<00:00, 25.90it/s]


Epoch 2/10: Train Loss: 1.1014, Train Acc: 0.6644, Val Loss: 1.2490, Val Acc: 0.6268
Modelo salvo: Melhor val_loss em 1.2490


Epoch 3/10 [Train]: 100%|██████████| 2827/2827 [05:03<00:00,  9.32it/s]
Epoch 3/10 [Validation]: 100%|██████████| 364/364 [00:15<00:00, 23.87it/s]


Epoch 3/10: Train Loss: 0.8845, Train Acc: 0.7315, Val Loss: 1.1938, Val Acc: 0.6511
Modelo salvo: Melhor val_loss em 1.1938


Epoch 4/10 [Train]: 100%|██████████| 2827/2827 [05:13<00:00,  9.01it/s]
Epoch 4/10 [Validation]: 100%|██████████| 364/364 [00:13<00:00, 26.56it/s]


Epoch 4/10: Train Loss: 0.7276, Train Acc: 0.7788, Val Loss: 1.1956, Val Acc: 0.6642


Epoch 5/10 [Train]: 100%|██████████| 2827/2827 [05:57<00:00,  7.91it/s]
Epoch 5/10 [Validation]: 100%|██████████| 364/364 [00:13<00:00, 26.59it/s]


Epoch 5/10: Train Loss: 0.6135, Train Acc: 0.8131, Val Loss: 1.2070, Val Acc: 0.6689


Epoch 6/10 [Train]: 100%|██████████| 2827/2827 [05:08<00:00,  9.17it/s]
Epoch 6/10 [Validation]: 100%|██████████| 364/364 [00:15<00:00, 24.24it/s]


Epoch 6/10: Train Loss: 0.5334, Train Acc: 0.8373, Val Loss: 1.2041, Val Acc: 0.6751


Epoch 7/10 [Train]: 100%|██████████| 2827/2827 [05:14<00:00,  9.00it/s]
Epoch 7/10 [Validation]: 100%|██████████| 364/364 [00:16<00:00, 22.62it/s]


Epoch 7/10: Train Loss: 0.4698, Train Acc: 0.8558, Val Loss: 1.2417, Val Acc: 0.6836


Epoch 8/10 [Train]: 100%|██████████| 2827/2827 [05:15<00:00,  8.97it/s]
Epoch 8/10 [Validation]: 100%|██████████| 364/364 [00:14<00:00, 25.81it/s]


Epoch 8/10: Train Loss: 0.4261, Train Acc: 0.8686, Val Loss: 1.2900, Val Acc: 0.6854


Epoch 9/10 [Train]: 100%|██████████| 2827/2827 [05:18<00:00,  8.87it/s]
Epoch 9/10 [Validation]: 100%|██████████| 364/364 [00:13<00:00, 26.93it/s]


Epoch 9/10: Train Loss: 0.3929, Train Acc: 0.8779, Val Loss: 1.3158, Val Acc: 0.6909


Epoch 10/10 [Train]: 100%|██████████| 2827/2827 [05:03<00:00,  9.32it/s]
Epoch 10/10 [Validation]: 100%|██████████| 364/364 [00:14<00:00, 25.59it/s]

Epoch 10/10: Train Loss: 0.3658, Train Acc: 0.8864, Val Loss: 1.3235, Val Acc: 0.6901

Histórico de treinamento salvo em: models_by_artist_char_split\arianagrande\training_history_arianagrande_strategy_before_fc.pkl

Treinamento para 'ArianaGrande' com estratégia 'before_fc' concluído.
O melhor modelo foi salvo em: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_before_fc_best.pt





## Teste

In [None]:
"""
Define funções para avaliação do modelo (perda, acurácia, geração de texto e
métricas de qualidade). Carrega os dados de teste, o tokenizador e os recursos
linguísticos (NLTK, spaCy). Prepara os prompts e as palavras-alvo para a
avaliação qualitativa e quantitativa do modelo treinado.
"""
# --- Funções Específicas para Avaliação ---
def evaluate_model_loss_acc(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc="Avaliando Perda/Acurácia"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
    avg_loss = total_loss / total_samples
    accuracy = correct_predictions / total_samples
    return avg_loss, accuracy

def generate_next_word_from_prompt_eval(model, tokenizer, prompt_text, sequence_length, device, max_chars_word=25):
    model.eval()
    current_sequence_str = prompt_text
    generated_word_chars = []
    word_delimiters = [' ', '.', ',', '!', '?', ';', ':']
    started_word = False

    for _ in range(max_chars_word):
        tokenized_input = tokenizer.texts_to_sequences(current_sequence_str)
        if not tokenized_input or all(t == 0 for t in tokenized_input):
            break
        input_tensor = torch.tensor(tokenized_input, dtype=torch.long).unsqueeze(0).to(device)
        with torch.no_grad():
            predictions = model(input_tensor)
        next_char_id = torch.argmax(predictions.squeeze(0), dim=-1).item()
        next_char = tokenizer.int_to_char.get(next_char_id, '')

        if not started_word:
            if next_char.isspace() or next_char in word_delimiters:
                current_sequence_str = current_sequence_str[1:] + next_char
                continue
            else:
                started_word = True
                generated_word_chars.append(next_char)
        elif next_char in word_delimiters:
            break
        else:
            generated_word_chars.append(next_char)
        current_sequence_str = current_sequence_str[1:] + next_char
    return "".join(generated_word_chars).strip()

def calculate_text_quality_metrics(text_to_analyze, reference_words_set):
    words = nltk.word_tokenize(text_to_analyze.lower())
    metrics = {
        'total_words': len(words), 'unique_words': 0, 'ttr': 0.0,
        'existing_words_count': 0, 'existence_rate': 0.0, 'repetition_rate_words': 0.0,
        'repetition_rate_3grams_char': 0.0
    }
    if not words:
        return metrics

    unique_words = set(words)
    metrics['unique_words'] = len(unique_words)
    metrics['ttr'] = len(unique_words) / len(words)
    
    existing_words_in_generated = sum(1 for word in unique_words if word in reference_words_set)
    metrics['existing_words_count'] = existing_words_in_generated
    metrics['existence_rate'] = existing_words_in_generated / len(unique_words) if unique_words else 0.0
    metrics['repetition_rate_words'] = (len(words) - len(unique_words)) / len(words) if words else 0.0

    char_3grams = [text_to_analyze[i:i+3] for i in range(len(text_to_analyze) - 2)]
    if char_3grams:
        metrics['repetition_rate_3grams_char'] = (len(char_3grams) - len(set(char_3grams))) / len(char_3grams)
    return metrics

# --- Carregamento e Preparação dos Dados para Avaliação ---
try:
    tokenizer = load_object(TOKENIZER_PATH)
    total_chars = load_object(os.path.join(PROCESSED_DATA_DIR_ARTIST, TOKENIZER_SUBDIR, f'vocab_size_{CLEAN_TARGET_ARTIST_NAME}_char.pkl'))
    print(f"Tokenizador carregado. Tamanho do vocabulário: {total_chars}")

    X_test_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'X_test_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
    y_test_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'y_test_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
    
    test_dataset = LyricsDataset(X_test_file_path, y_test_file_path)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    criterion = nn.CrossEntropyLoss()
    print(f"Dataset de teste com {len(test_dataset)} amostras, {len(test_loader)} batches.")

except FileNotFoundError as e:
    print(e)

# --- Preparação dos Prompts e Alvos para Avaliação de Nível de Palavra ---
prompts_for_eval = []
true_next_words = []
true_next_pos_tags = []
existing_words_in_corpus = set()
ARTIST_LYRICS_CLEAN_ALL_FOR_VOCAB = []

try:
    df_artist_full = pd.read_csv(os.path.join(CSV_FOLDER_PATH, f'{TARGET_ARTIST}.csv'), usecols=['Lyric'])
    for lyric_val in df_artist_full['Lyric']:
        cleaned_lyric = clean_text(lyric_val)
        if cleaned_lyric and PLACEHOLDER_PHRASE not in cleaned_lyric:
            ARTIST_LYRICS_CLEAN_ALL_FOR_VOCAB.append(cleaned_lyric)
            existing_words_in_corpus.update(nltk.word_tokenize(cleaned_lyric))
    print(f"Tamanho do vocabulário de palavras no corpus do artista: {len(existing_words_in_corpus)}")

    random.seed(RANDOM_SEED)
    random.shuffle(ARTIST_LYRICS_CLEAN_ALL_FOR_VOCAB)
    num_train = int(len(ARTIST_LYRICS_CLEAN_ALL_FOR_VOCAB) * TRAIN_SPLIT)
    num_val = int(len(ARTIST_LYRICS_CLEAN_ALL_FOR_VOCAB) * VAL_SPLIT)
    ARTIST_LYRICS_FOR_TEST_PROMPTS = ARTIST_LYRICS_CLEAN_ALL_FOR_VOCAB[num_train + num_val:]
    
    print(f"\nGerando prompts de {len(ARTIST_LYRICS_FOR_TEST_PROMPTS)} músicas de teste do artista...")
    for lyric_text in tqdm(ARTIST_LYRICS_FOR_TEST_PROMPTS, desc="Gerando prompts e alvos"):
        tokenized_full_lyric = nltk.word_tokenize(lyric_text)
        for i in range(1, len(tokenized_full_lyric)):
            target_word = tokenized_full_lyric[i]
            context_string = " ".join(tokenized_full_lyric[:i]) + " "
            final_prompt = (' ' * (SEQUENCE_LENGTH - len(context_string)) + context_string)[-SEQUENCE_LENGTH:]
            if target_word:
                prompts_for_eval.append(final_prompt)
                true_next_words.append(target_word)
    
    print(f"Total de prompts para avaliação de nível de palavra gerados: {len(prompts_for_eval)}")

except Exception as e:
    print(f"Erro ao preparar dados de nível de palavra para avaliação: {e}")

# --- Carregamento do spaCy e Geração de POS Tags ---
nlp = None
try:
    nlp = spacy.load("en_core_web_sm")
    if true_next_words and nlp:
        print("\nExecutando POS Tagging nas palavras alvo...")
        docs = nlp.pipe(true_next_words, batch_size=1000)
        true_next_pos_tags = [doc[0].pos_ if doc and doc[0] else "UNKNOWN" for doc in tqdm(docs, total=len(true_next_words), desc="POS Tagging")]
    else:
        print("Nenhuma palavra alvo para POS tagging ou modelo spaCy não carregado.")
except OSError:
    print("Modelo spaCy 'en_core_web_sm' não encontrado. Baixando...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
    if true_next_words and nlp:
        print("\nExecutando POS Tagging após download...")
        docs = nlp.pipe(true_next_words, batch_size=1000)
        true_next_pos_tags = [doc[0].pos_ if doc and doc[0] else "UNKNOWN" for doc in tqdm(docs, total=len(true_next_words), desc="POS Tagging")]

Downloading 'wordnet' NLTK package...
Usando dispositivo: cuda
Object loaded from: processed_data_by_artist_char_split\arianagrande\tokenizers\char_tokenizer_arianagrande.pkl
Object loaded from: processed_data_by_artist_char_split\arianagrande\tokenizers\vocab_size_arianagrande_char.pkl
Vocabulary loaded. Vocabulary size: 36
Test dataset with 45029 samples, 352 batches.


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\gabri\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Size of existing word vocabulary in artist's corpus (for metrics): 4174

Generating prompts and targets for evaluation from 29 artist test songs...


Generating prompts and targets: 100%|██████████| 29/29 [00:00<00:00, 510.49it/s]


Total prompts and targets generated for word-level evaluation: 9916

Performing POS Tagging of true words...


POS Tagging true words: 100%|██████████| 10/10 [00:03<00:00,  2.91it/s]


In [None]:
"""
Define funções avançadas para avaliação de qualidade de texto, incluindo
métricas de diversidade (Distinct-N) e uma função de geração de texto
controlada por contagem de palavras e temperatura. Em seguida, carrega os
componentes e prepara o conjunto de teste do artista para a avaliação.
"""
# --- Novas Funções para Métricas de Qualidade de Texto ---
def get_word_ngrams(text, n):
    words = nltk.word_tokenize(text.lower())
    if len(words) < n:
        return []
    return list(nltk.ngrams(words, n))

def calculate_distinct_n_ratio(text, n):
    ngrams = get_word_ngrams(text, n)
    if not ngrams:
        return 0.0
    unique_ngrams = set(ngrams)
    return len(unique_ngrams) / len(ngrams)

def calculate_text_quality_metrics_updated(text_to_analyze, reference_words_set):
    words = nltk.word_tokenize(text_to_analyze.lower())
    metrics = {
        'total_words': len(words), 'unique_words': 0, 'ttr': 0.0,
        'distinct_1_ratio': 0.0, 'distinct_2_ratio': 0.0, 'distinct_3_ratio': 0.0,
        'repetition_rate_words': 0.0, 'repetition_rate_bigrams': 0.0, 'repetition_rate_trigrams': 0.0,
        'existing_words_count': 0, 'existence_rate': 0.0,
    }
    if not words:
        return metrics

    unique_words = set(words)
    metrics['unique_words'] = len(unique_words)
    
    metrics['ttr'] = calculate_distinct_n_ratio(text_to_analyze, 1)
    metrics['distinct_1_ratio'] = metrics['ttr']
    metrics['repetition_rate_words'] = 1 - metrics['distinct_1_ratio']

    metrics['distinct_2_ratio'] = calculate_distinct_n_ratio(text_to_analyze, 2)
    metrics['repetition_rate_bigrams'] = 1 - metrics['distinct_2_ratio']

    metrics['distinct_3_ratio'] = calculate_distinct_n_ratio(text_to_analyze, 3)
    metrics['repetition_rate_trigrams'] = 1 - metrics['distinct_3_ratio']

    existing_words_in_generated = sum(1 for word in unique_words if word in reference_words_set)
    metrics['existing_words_count'] = existing_words_in_generated
    metrics['existence_rate'] = existing_words_in_generated / len(unique_words) if unique_words else 0.0
    
    return metrics

def generate_text_by_word_count(model, tokenizer, seed_text, target_word_count, temperature=0.8, sequence_length=SEQUENCE_LENGTH, device=device, max_chars_per_word=25):
    model.eval()
    processed_seed_text = clean_text(seed_text)
    
    if len(processed_seed_text) < sequence_length:
        processed_seed_text = ' ' * (sequence_length - len(processed_seed_text)) + processed_seed_text
    elif len(processed_seed_text) > sequence_length:
        processed_seed_text = processed_seed_text[-sequence_length:]
    
    generated_text = processed_seed_text
    current_word_count = len(nltk.word_tokenize(processed_seed_text))
    pbar = tqdm(total=target_word_count, initial=current_word_count, desc="Gerando palavras")
    
    max_chars_to_generate_total = target_word_count * max_chars_per_word * 2
    chars_generated = 0
    
    while current_word_count < target_word_count and chars_generated < max_chars_to_generate_total:
        current_sequence_str = generated_text[-sequence_length:]
        tokenized_input = tokenizer.texts_to_sequences(current_sequence_str)

        if not tokenized_input or all(t == 0 for t in tokenized_input):
            break

        input_tensor = torch.tensor(tokenized_input, dtype=torch.long).unsqueeze(0).to(device)

        with torch.no_grad():
            predictions = model(input_tensor).squeeze(0)
        
        if temperature <= 0:
            next_char_id = torch.argmax(predictions, dim=-1).item()
        else:
            probabilities = torch.softmax(predictions / temperature, dim=-1)
            next_char_id = torch.multinomial(probabilities, num_samples=1).item()
        
        next_char = tokenizer.int_to_char.get(next_char_id, '')
        generated_text += next_char
        chars_generated += 1

        if chars_generated % 50 == 0 or (next_char.isspace() or next_char in '!?,.'):
            new_word_count = len(nltk.word_tokenize(generated_text))
            if new_word_count > current_word_count:
                pbar.update(new_word_count - current_word_count)
                current_word_count = new_word_count
    
    pbar.close()
    if len(nltk.word_tokenize(generated_text)) < target_word_count:
        print(f"\nAtenção: Geração interrompida antes de atingir {target_word_count} palavras.")

    return generated_text

# --- Preparação dos Dados e Recursos para Avaliação ---
CSV_FILE_PATH_TARGET_ARTIST = os.path.join(CSV_FOLDER_PATH, f'{TARGET_ARTIST}.csv')
try:
    tokenizer = load_object(TOKENIZER_PATH)
    total_chars = load_object(os.path.join(PROCESSED_DATA_DIR_ARTIST, TOKENIZER_SUBDIR, f'vocab_size_{CLEAN_TARGET_ARTIST_NAME}_char.pkl'))
    print(f"Tokenizador carregado. Tamanho do vocabulário: {total_chars}")

    X_test_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'X_test_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
    y_test_file_path = os.path.join(PROCESSED_DATA_DIR_ARTIST, DATA_SUBDIR, f'y_test_{CLEAN_TARGET_ARTIST_NAME}_char.npy')
    test_dataset = LyricsDataset(X_test_file_path, y_test_file_path)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    criterion = nn.CrossEntropyLoss()
    print(f"Dataset de teste com {len(test_dataset)} amostras, {len(test_loader)} batches.")

    ARTIST_LYRICS_CLEAN_ALL_FOR_PROMPTS = []
    existing_words_in_corpus = set()
    df_target_artist_full = pd.read_csv(CSV_FILE_PATH_TARGET_ARTIST, usecols=['Lyric'])
    for lyric_val in df_target_artist_full['Lyric']:
        cleaned_lyric = clean_text(lyric_val)
        if cleaned_lyric and PLACEHOLDER_PHRASE not in cleaned_lyric:
            ARTIST_LYRICS_CLEAN_ALL_FOR_PROMPTS.append(cleaned_lyric)
            existing_words_in_corpus.update(nltk.word_tokenize(cleaned_lyric))
    
    print(f"Tamanho do vocabulário de palavras do artista (para métricas): {len(existing_words_in_corpus)}")
    
    random.seed(RANDOM_SEED)
    random.shuffle(ARTIST_LYRICS_CLEAN_ALL_FOR_PROMPTS)
    num_total_artist_lyrics = len(ARTIST_LYRICS_CLEAN_ALL_FOR_PROMPTS)
    num_train = int(num_total_artist_lyrics * TRAIN_SPLIT)
    num_val = int(num_total_artist_lyrics * VAL_SPLIT)
    TEST_LYRICS_ARTIST_FINAL_SONGS = ARTIST_LYRICS_CLEAN_ALL_FOR_PROMPTS[num_train + num_val:]
    print(f"Total de {len(TEST_LYRICS_ARTIST_FINAL_SONGS)} músicas no conjunto de teste do artista para geração.")

except Exception as e:
    print(f"Erro ao carregar dados para avaliação: {e}")

nlp = None
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Modelo spaCy 'en_core_web_sm' não encontrado. Baixando...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

Downloading 'wordnet' NLTK package...
Using device: cuda
Object loaded from: processed_data_by_artist_char_split\arianagrande\tokenizers\char_tokenizer_arianagrande.pkl
Object loaded from: processed_data_by_artist_char_split\arianagrande\tokenizers\vocab_size_arianagrande_char.pkl
Vocabulary loaded. Vocabulary size: 36
Test dataset with 45029 samples, 352 batches.


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\gabri\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Size of existing word vocabulary in artist's corpus (for metrics): 4174
Total 29 songs in the artist's test set for generation.


In [None]:
"""
Executa a avaliação completa para um modelo pré-treinado com uma estratégia
de dropout específica. Carrega os pesos do modelo, calcula a perda e a
acurácia no conjunto de teste e, em seguida, itera sobre cada música de
teste para gerar uma nova versão. Por fim, calcula e exibe as médias das
métricas de qualidade de texto para as letras geradas e as originais,
permitindo uma comparação direta.
"""

print("\n--- AVALIAÇÃO: Modelo com Dropout 'none' ---")

CURRENT_DROPOUT_STRATEGY = "none" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Error: Trained model '{model_save_filepath}' for strategy '{CURRENT_DROPOUT_STRATEGY}' not found. Please train this model first.")
    exit() 

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY 
).to(device)

model.load_state_dict(torch.load(model_save_filepath, map_location=device))
print(f"Model '{MODEL_SAVE_FILENAME}' loaded from: {model_save_filepath}")

# --- Calcular Loss e Acurácia de Caractere (no conjunto de teste tokenizado) ---
test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)
perplexity = np.exp(test_loss) if not np.isnan(test_loss) else float('nan')

print(f"\n--- Avaliação de Geração de Músicas (Música a Música) para '{TARGET_ARTIST}' (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")

all_generated_lyrics_metrics = []
all_actual_lyrics_metrics = []
if not TEST_LYRICS_ARTIST_FINAL_SONGS:
    print("WARNING: No artist test songs available for generation. Skipping song-by-song evaluation.")
else:
    for i, original_lyric_text in enumerate(tqdm(TEST_LYRICS_ARTIST_FINAL_SONGS, desc=f"Generating & Evaluating {CURRENT_DROPOUT_STRATEGY}")):
        original_lyric_words = nltk.word_tokenize(original_lyric_text)
        target_word_count = len(original_lyric_words)

        if target_word_count < 20: 
            target_word_count = 20
        
        if len(original_lyric_text) < SEQUENCE_LENGTH:
            seed_text = ' ' * (SEQUENCE_LENGTH - len(original_lyric_text)) + original_lyric_text
        else:
            seed_text = original_lyric_text[:SEQUENCE_LENGTH]

        # Gerar a Música
        generated_lyric_text = generate_text_by_word_count(model, tokenizer, seed_text, 
                                                           target_word_count, 
                                                           temperature=0.8, 
                                                           sequence_length=SEQUENCE_LENGTH, 
                                                           device=device)
        
        # Calcular Métricas para a Letra Gerada
        metrics_generated = calculate_text_quality_metrics_updated(generated_lyric_text, existing_words_in_corpus)
        all_generated_lyrics_metrics.append(metrics_generated)

        # Calcular Métricas para a Letra Original (para comparação)
        metrics_actual = calculate_text_quality_metrics_updated(original_lyric_text, existing_words_in_corpus)
        all_actual_lyrics_metrics.append(metrics_actual)

    # Calcular a Média das Métricas
    if all_generated_lyrics_metrics:
        avg_generated_metrics = {k: np.mean([d[k] for d in all_generated_lyrics_metrics if d[k] is not None]) for k in all_generated_lyrics_metrics[0]}
        avg_actual_metrics = {k: np.mean([d[k] for d in all_actual_lyrics_metrics if d[k] is not None]) for k in all_actual_lyrics_metrics[0]}
        
        print(f"\n--- MÉDIAS DAS MÉTRICAS DE GERAÇÃO (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")
        print("\n--- Letras Geradas (Média) ---")
        print(f"Total Words (Avg): {avg_generated_metrics['total_words']:.2f}")
        print(f"Unique Words (Avg): {avg_generated_metrics['unique_words']:.2f}")
        print(f"TTR (Distinct-1 Ratio) (Avg): {avg_generated_metrics['distinct_1_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-1) (Avg): {avg_generated_metrics['repetition_rate_words']:.4f}")
        print(f"Distinct-2 Ratio (Bigrams) (Avg): {avg_generated_metrics['distinct_2_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-2) (Bigrams) (Avg): {avg_generated_metrics['repetition_rate_bigrams']:.4f}")
        print(f"Distinct-3 Ratio (Trigrams) (Avg): {avg_generated_metrics['distinct_3_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-3) (Trigrams) (Avg): {avg_generated_metrics['repetition_rate_trigrams']:.4f}")
        print(f"Generated Unique Words Existing in Corpus (Avg): {avg_generated_metrics['existing_words_count']:.2f}")
        print(f"Existence Rate (Generated Unique Words) (Avg): {avg_generated_metrics['existence_rate']:.2f}%")
        print("-----------------------------------------------------")

        print("\n--- Letras Originais do Teste (Média) ---")
        print(f"Total Words (Avg): {avg_actual_metrics['total_words']:.2f}")
        print(f"Unique Words (Avg): {avg_actual_metrics['unique_words']:.2f}")
        print(f"TTR (Distinct-1 Ratio) (Avg): {avg_actual_metrics['distinct_1_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-1) (Avg): {avg_actual_metrics['repetition_rate_words']:.4f}")
        print(f"Distinct-2 Ratio (Bigrams) (Avg): {avg_actual_metrics['distinct_2_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-2) (Bigrams) (Avg): {avg_actual_metrics['repetition_rate_bigrams']:.4f}")
        print(f"Distinct-3 Ratio (Trigrams) (Avg): {avg_actual_metrics['distinct_3_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-3) (Trigrams) (Avg): {avg_actual_metrics['repetition_rate_trigrams']:.4f}")
        print(f"Actual Unique Words Existing in Corpus (Avg): {avg_actual_metrics['existing_words_count']:.2f}")
        print(f"Existence Rate (Actual Unique Words) (Avg): {avg_actual_metrics['existence_rate']:.2f}%")
        print("-----------------------------------------------------")
    else:
        print("No metrics generated for this strategy due to data issues.")


--- AVALIAÇÃO: Modelo com Dropout 'none' ---
Model 'model_weights_arianagrande_strategy_none_best.pt' loaded from: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_none_best.pt


Evaluating Loss/Acc: 100%|██████████| 352/352 [00:13<00:00, 25.90it/s]



--- Resultados de Avaliação no Conjunto de TESTE (Caracter): 'ArianaGrande' (Dropout: none) ---
Test Loss (Cross-Entropy): 0.7584
Test Accuracy (Next Character): 0.7753
Perplexity: 2.1349
-----------------------------------------------------

--- Avaliação de Geração de Músicas (Música a Música) para 'ArianaGrande' (Dropout: none) ---


Generating words: 4it [00:00,  9.30it/s]     | 0/29 [00:00<?, ?it/s]
Generating words: 100%|██████████| 673/673 [00:46<00:00, 14.41it/s]2.33it/s]
Generating words: 100%|██████████| 775/775 [01:00<00:00, 12.84it/s]7.67s/it]
Generating words: 520it [00:56,  9.21it/s]   | 3/29 [01:47<18:27, 42.60s/it]
Generating words: 100%|██████████| 510/510 [00:45<00:00, 11.28it/s]8.08s/it]
Generating words: 100%|██████████| 402/402 [00:36<00:00, 11.06it/s]7.05s/it]
Generating words: 100%|██████████| 372/372 [00:36<00:00, 10.14it/s]3.41s/it]
Generating words: 100%|██████████| 381/381 [00:33<00:00, 11.35it/s]1.22s/it]
Generating words: 100%|██████████| 14/14 [00:02<00:00,  5.46it/s] 38.79s/it]
Generating words: 100%|██████████| 316/316 [00:30<00:00, 10.22it/s]7.47s/it]
Generating words: 100%|██████████| 526/526 [00:51<00:00, 10.20it/s]28.54s/it]
Generating words: 100%|██████████| 13/13 [00:02<00:00,  6.21it/s], 35.60s/it]
Generating words: 100%|██████████| 337/337 [00:33<00:00, 10.04it/s]25.41s/it]
Gene


--- MÉDIAS DAS MÉTRICAS DE GERAÇÃO (Dropout: none) ---

--- Letras Geradas (Média) ---
Total Words (Avg): 344.24
Unique Words (Avg): 138.28
TTR (Distinct-1 Ratio) (Avg): 0.4961
Repetition Rate (1-Distinct-1) (Avg): 0.5039
Distinct-2 Ratio (Bigrams) (Avg): 0.8432
Repetition Rate (1-Distinct-2) (Bigrams) (Avg): 0.1568
Distinct-3 Ratio (Trigrams) (Avg): 0.9225
Repetition Rate (1-Distinct-3) (Trigrams) (Avg): 0.0775
Generated Unique Words Existing in Corpus (Avg): 125.97
Existence Rate (Generated Unique Words) (Avg): 0.92%
-----------------------------------------------------

--- Letras Originais do Teste (Média) ---
Total Words (Avg): 342.93
Unique Words (Avg): 91.79
TTR (Distinct-1 Ratio) (Avg): 0.3644
Repetition Rate (1-Distinct-1) (Avg): 0.6356
Distinct-2 Ratio (Bigrams) (Avg): 0.5176
Repetition Rate (1-Distinct-2) (Bigrams) (Avg): 0.4824
Distinct-3 Ratio (Trigrams) (Avg): 0.5837
Repetition Rate (1-Distinct-3) (Trigrams) (Avg): 0.4163
Actual Unique Words Existing in Corpus (Avg): 91.




In [None]:
"""
Executa a avaliação completa para um modelo pré-treinado com uma estratégia
de dropout específica. Carrega os pesos do modelo, calcula a perda e a
acurácia no conjunto de teste e, em seguida, itera sobre cada música de
teste para gerar uma nova versão. Por fim, calcula e exibe as médias das
métricas de qualidade de texto para as letras geradas e as originais,
permitindo uma comparação direta.
"""

print("\n--- AVALIAÇÃO: Modelo com Dropout 'between_lstm' ---")

CURRENT_DROPOUT_STRATEGY = "between_lstm" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Error: Trained model '{model_save_filepath}' for strategy '{CURRENT_DROPOUT_STRATEGY}' not found. Please train this model first.")
    exit() 

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY 
).to(device)

model.load_state_dict(torch.load(model_save_filepath, map_location=device))
print(f"Model '{MODEL_SAVE_FILENAME}' loaded from: {model_save_filepath}")

# --- Calcular Loss e Acurácia de Caractere (no conjunto de teste tokenizado) ---
test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)
perplexity = np.exp(test_loss) if not np.isnan(test_loss) else float('nan')

print(f"\n--- Avaliação de Geração de Músicas (Música a Música) para '{TARGET_ARTIST}' (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")

all_generated_lyrics_metrics = []
all_actual_lyrics_metrics = []
if not TEST_LYRICS_ARTIST_FINAL_SONGS:
    print("WARNING: No artist test songs available for generation. Skipping song-by-song evaluation.")
else:
    for i, original_lyric_text in enumerate(tqdm(TEST_LYRICS_ARTIST_FINAL_SONGS, desc=f"Generating & Evaluating {CURRENT_DROPOUT_STRATEGY}")):
        original_lyric_words = nltk.word_tokenize(original_lyric_text)
        target_word_count = len(original_lyric_words)

        if target_word_count < 20: 
            target_word_count = 20
        
        if len(original_lyric_text) < SEQUENCE_LENGTH:
            seed_text = ' ' * (SEQUENCE_LENGTH - len(original_lyric_text)) + original_lyric_text
        else:
            seed_text = original_lyric_text[:SEQUENCE_LENGTH]

        # Gerar a Música
        generated_lyric_text = generate_text_by_word_count(model, tokenizer, seed_text, 
                                                           target_word_count, 
                                                           temperature=0.8, 
                                                           sequence_length=SEQUENCE_LENGTH, 
                                                           device=device)
        
        # Calcular Métricas para a Letra Gerada
        metrics_generated = calculate_text_quality_metrics_updated(generated_lyric_text, existing_words_in_corpus)
        all_generated_lyrics_metrics.append(metrics_generated)

        # Calcular Métricas para a Letra Original (para comparação)
        metrics_actual = calculate_text_quality_metrics_updated(original_lyric_text, existing_words_in_corpus)
        all_actual_lyrics_metrics.append(metrics_actual)

    # Calcular a Média das Métricas
    if all_generated_lyrics_metrics:
        avg_generated_metrics = {k: np.mean([d[k] for d in all_generated_lyrics_metrics if d[k] is not None]) for k in all_generated_lyrics_metrics[0]}
        avg_actual_metrics = {k: np.mean([d[k] for d in all_actual_lyrics_metrics if d[k] is not None]) for k in all_actual_lyrics_metrics[0]}
        
        print(f"\n--- MÉDIAS DAS MÉTRICAS DE GERAÇÃO (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")
        print("\n--- Letras Geradas (Média) ---")
        print(f"Total Words (Avg): {avg_generated_metrics['total_words']:.2f}")
        print(f"Unique Words (Avg): {avg_generated_metrics['unique_words']:.2f}")
        print(f"TTR (Distinct-1 Ratio) (Avg): {avg_generated_metrics['distinct_1_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-1) (Avg): {avg_generated_metrics['repetition_rate_words']:.4f}")
        print(f"Distinct-2 Ratio (Bigrams) (Avg): {avg_generated_metrics['distinct_2_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-2) (Bigrams) (Avg): {avg_generated_metrics['repetition_rate_bigrams']:.4f}")
        print(f"Distinct-3 Ratio (Trigrams) (Avg): {avg_generated_metrics['distinct_3_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-3) (Trigrams) (Avg): {avg_generated_metrics['repetition_rate_trigrams']:.4f}")
        print(f"Generated Unique Words Existing in Corpus (Avg): {avg_generated_metrics['existing_words_count']:.2f}")
        print(f"Existence Rate (Generated Unique Words) (Avg): {avg_generated_metrics['existence_rate']:.2f}%")
        print("-----------------------------------------------------")

        print("\n--- Letras Originais do Teste (Média) ---")
        print(f"Total Words (Avg): {avg_actual_metrics['total_words']:.2f}")
        print(f"Unique Words (Avg): {avg_actual_metrics['unique_words']:.2f}")
        print(f"TTR (Distinct-1 Ratio) (Avg): {avg_actual_metrics['distinct_1_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-1) (Avg): {avg_actual_metrics['repetition_rate_words']:.4f}")
        print(f"Distinct-2 Ratio (Bigrams) (Avg): {avg_actual_metrics['distinct_2_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-2) (Bigrams) (Avg): {avg_actual_metrics['repetition_rate_bigrams']:.4f}")
        print(f"Distinct-3 Ratio (Trigrams) (Avg): {avg_actual_metrics['distinct_3_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-3) (Trigrams) (Avg): {avg_actual_metrics['repetition_rate_trigrams']:.4f}")
        print(f"Actual Unique Words Existing in Corpus (Avg): {avg_actual_metrics['existing_words_count']:.2f}")
        print(f"Existence Rate (Actual Unique Words) (Avg): {avg_actual_metrics['existence_rate']:.2f}%")
        print("-----------------------------------------------------")
    else:
        print("No metrics generated for this strategy due to data issues.")


--- AVALIAÇÃO: Modelo com Dropout 'between_lstm' ---
Model 'model_weights_arianagrande_strategy_between_lstm_best.pt' loaded from: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_between_lstm_best.pt


Evaluating Loss/Acc: 100%|██████████| 352/352 [00:14<00:00, 23.54it/s]



--- Avaliação de Geração de Músicas (Música a Música) para 'ArianaGrande' (Dropout: between_lstm) ---


Generating words: 100%|██████████| 3/3 [00:00<00:00, 13.74it/s]:00<?, ?it/s]
Generating words: 674it [01:12,  9.27it/s]|▎         | 1/29 [00:00<00:06,  4.43it/s]
Generating words: 100%|██████████| 775/775 [01:30<00:00,  8.61it/s]19:17, 42.86s/it]
Generating words: 100%|██████████| 519/519 [00:54<00:00,  9.47it/s]27:54, 64.42s/it]
Generating words: 100%|██████████| 510/510 [00:52<00:00,  9.80it/s]25:15, 60.64s/it]
Generating words: 100%|██████████| 402/402 [00:36<00:00, 10.96it/s]23:00, 57.54s/it]
Generating words: 100%|██████████| 372/372 [00:36<00:00, 10.30it/s]19:20, 50.45s/it]
Generating words: 100%|██████████| 381/381 [00:45<00:00,  8.36it/s]16:46, 45.77s/it]
Generating words: 100%|██████████| 14/14 [00:01<00:00,  8.69it/s]8<15:59, 45.71s/it]
Generating words: 100%|██████████| 316/316 [00:36<00:00,  8.76it/s]10:38, 31.93s/it]
Generating words: 100%|██████████| 526/526 [00:56<00:00,  9.33it/s]<10:31, 33.21s/it]
Generating words: 14it [00:01,  9.41it/s]%|███▊      | 11/29 [08:02<12:0


--- MÉDIAS DAS MÉTRICAS DE GERAÇÃO (Dropout: between_lstm) ---

--- Letras Geradas (Média) ---
Total Words (Avg): 344.24
Unique Words (Avg): 120.48
TTR (Distinct-1 Ratio) (Avg): 0.4314
Repetition Rate (1-Distinct-1) (Avg): 0.5686
Distinct-2 Ratio (Bigrams) (Avg): 0.7454
Repetition Rate (1-Distinct-2) (Bigrams) (Avg): 0.2546
Distinct-3 Ratio (Trigrams) (Avg): 0.8379
Repetition Rate (1-Distinct-3) (Trigrams) (Avg): 0.1621
Generated Unique Words Existing in Corpus (Avg): 115.41
Existence Rate (Generated Unique Words) (Avg): 0.96%
-----------------------------------------------------

--- Letras Originais do Teste (Média) ---
Total Words (Avg): 342.93
Unique Words (Avg): 91.79
TTR (Distinct-1 Ratio) (Avg): 0.3644
Repetition Rate (1-Distinct-1) (Avg): 0.6356
Distinct-2 Ratio (Bigrams) (Avg): 0.5176
Repetition Rate (1-Distinct-2) (Bigrams) (Avg): 0.4824
Distinct-3 Ratio (Trigrams) (Avg): 0.5837
Repetition Rate (1-Distinct-3) (Trigrams) (Avg): 0.4163
Actual Unique Words Existing in Corpus (A




In [None]:
"""
Executa a avaliação completa para um modelo pré-treinado com uma estratégia
de dropout específica. Carrega os pesos do modelo, calcula a perda e a
acurácia no conjunto de teste e, em seguida, itera sobre cada música de
teste para gerar uma nova versão. Por fim, calcula e exibe as médias das
métricas de qualidade de texto para as letras geradas e as originais,
permitindo uma comparação direta.
"""

print("\n--- AVALIAÇÃO: Modelo com Dropout 'before_fc' ---")

CURRENT_DROPOUT_STRATEGY = "before_fc" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Error: Trained model '{model_save_filepath}' for strategy '{CURRENT_DROPOUT_STRATEGY}' not found. Please train this model first.")
    exit() 

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY 
).to(device)

model.load_state_dict(torch.load(model_save_filepath, map_location=device))
print(f"Model '{MODEL_SAVE_FILENAME}' loaded from: {model_save_filepath}")

# --- Calcular Loss e Acurácia de Caractere (no conjunto de teste tokenizado) ---
test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)
perplexity = np.exp(test_loss) if not np.isnan(test_loss) else float('nan')

print(f"\n--- Avaliação de Geração de Músicas (Música a Música) para '{TARGET_ARTIST}' (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")

all_generated_lyrics_metrics = []
all_actual_lyrics_metrics = []
if not TEST_LYRICS_ARTIST_FINAL_SONGS:
    print("WARNING: No artist test songs available for generation. Skipping song-by-song evaluation.")
else:
    for i, original_lyric_text in enumerate(tqdm(TEST_LYRICS_ARTIST_FINAL_SONGS, desc=f"Generating & Evaluating {CURRENT_DROPOUT_STRATEGY}")):
        original_lyric_words = nltk.word_tokenize(original_lyric_text)
        target_word_count = len(original_lyric_words)

        if target_word_count < 20: 
            target_word_count = 20
        
        if len(original_lyric_text) < SEQUENCE_LENGTH:
            seed_text = ' ' * (SEQUENCE_LENGTH - len(original_lyric_text)) + original_lyric_text
        else:
            seed_text = original_lyric_text[:SEQUENCE_LENGTH]

        # Gerar a Música
        generated_lyric_text = generate_text_by_word_count(model, tokenizer, seed_text, 
                                                           target_word_count, 
                                                           temperature=0.8, 
                                                           sequence_length=SEQUENCE_LENGTH, 
                                                           device=device)
        
        # Calcular Métricas para a Letra Gerada
        metrics_generated = calculate_text_quality_metrics_updated(generated_lyric_text, existing_words_in_corpus)
        all_generated_lyrics_metrics.append(metrics_generated)

        # Calcular Métricas para a Letra Original (para comparação)
        metrics_actual = calculate_text_quality_metrics_updated(original_lyric_text, existing_words_in_corpus)
        all_actual_lyrics_metrics.append(metrics_actual)

    # Calcular a Média das Métricas
    if all_generated_lyrics_metrics:
        avg_generated_metrics = {k: np.mean([d[k] for d in all_generated_lyrics_metrics if d[k] is not None]) for k in all_generated_lyrics_metrics[0]}
        avg_actual_metrics = {k: np.mean([d[k] for d in all_actual_lyrics_metrics if d[k] is not None]) for k in all_actual_lyrics_metrics[0]}
        
        print(f"\n--- MÉDIAS DAS MÉTRICAS DE GERAÇÃO (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")
        print("\n--- Letras Geradas (Média) ---")
        print(f"Total Words (Avg): {avg_generated_metrics['total_words']:.2f}")
        print(f"Unique Words (Avg): {avg_generated_metrics['unique_words']:.2f}")
        print(f"TTR (Distinct-1 Ratio) (Avg): {avg_generated_metrics['distinct_1_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-1) (Avg): {avg_generated_metrics['repetition_rate_words']:.4f}")
        print(f"Distinct-2 Ratio (Bigrams) (Avg): {avg_generated_metrics['distinct_2_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-2) (Bigrams) (Avg): {avg_generated_metrics['repetition_rate_bigrams']:.4f}")
        print(f"Distinct-3 Ratio (Trigrams) (Avg): {avg_generated_metrics['distinct_3_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-3) (Trigrams) (Avg): {avg_generated_metrics['repetition_rate_trigrams']:.4f}")
        print(f"Generated Unique Words Existing in Corpus (Avg): {avg_generated_metrics['existing_words_count']:.2f}")
        print(f"Existence Rate (Generated Unique Words) (Avg): {avg_generated_metrics['existence_rate']:.2f}%")
        print("-----------------------------------------------------")

        print("\n--- Letras Originais do Teste (Média) ---")
        print(f"Total Words (Avg): {avg_actual_metrics['total_words']:.2f}")
        print(f"Unique Words (Avg): {avg_actual_metrics['unique_words']:.2f}")
        print(f"TTR (Distinct-1 Ratio) (Avg): {avg_actual_metrics['distinct_1_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-1) (Avg): {avg_actual_metrics['repetition_rate_words']:.4f}")
        print(f"Distinct-2 Ratio (Bigrams) (Avg): {avg_actual_metrics['distinct_2_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-2) (Bigrams) (Avg): {avg_actual_metrics['repetition_rate_bigrams']:.4f}")
        print(f"Distinct-3 Ratio (Trigrams) (Avg): {avg_actual_metrics['distinct_3_ratio']:.4f}")
        print(f"Repetition Rate (1-Distinct-3) (Trigrams) (Avg): {avg_actual_metrics['repetition_rate_trigrams']:.4f}")
        print(f"Actual Unique Words Existing in Corpus (Avg): {avg_actual_metrics['existing_words_count']:.2f}")
        print(f"Existence Rate (Actual Unique Words) (Avg): {avg_actual_metrics['existence_rate']:.2f}%")
        print("-----------------------------------------------------")
    else:
        print("No metrics generated for this strategy due to data issues.")


--- AVALIAÇÃO: Modelo com Dropout 'before_fc' ---
Model 'model_weights_arianagrande_strategy_before_fc_best.pt' loaded from: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_before_fc_best.pt


Evaluating Loss/Acc: 100%|██████████| 352/352 [00:12<00:00, 27.36it/s]



--- Avaliação de Geração de Músicas (Música a Música) para 'ArianaGrande' (Dropout: before_fc) ---


Generating words: 100%|██████████| 3/3 [00:00<00:00, 12.46it/s]<?, ?it/s]
Generating words: 100%|██████████| 673/673 [00:48<00:00, 13.75it/s]06,  4.10it/s]
Generating words: 100%|██████████| 775/775 [01:06<00:00, 11.57it/s]00, 28.92s/it]
Generating words: 100%|██████████| 519/519 [00:36<00:00, 14.38it/s]04, 46.31s/it]
Generating words: 100%|██████████| 510/510 [00:38<00:00, 13.11it/s]37, 42.29s/it]
Generating words: 100%|██████████| 402/402 [00:32<00:00, 12.35it/s]25, 41.07s/it]
Generating words: 100%|██████████| 372/372 [00:31<00:00, 11.77it/s]38, 38.18s/it]
Generating words: 100%|██████████| 381/381 [00:31<00:00, 12.08it/s]12, 36.04s/it]
Generating words: 15it [00:01,  8.48it/s]█▊       | 8/29 [04:47<12:06, 34.61s/it]
Generating words: 100%|██████████| 316/316 [00:21<00:00, 14.56it/s]06, 24.34s/it]
Generating words: 100%|██████████| 526/526 [00:33<00:00, 15.87it/s]:27, 23.53s/it]
Generating words: 100%|██████████| 13/13 [00:00<00:00, 16.88it/s]07:56, 26.48s/it]
Generating words: 100%


--- MÉDIAS DAS MÉTRICAS DE GERAÇÃO (Dropout: before_fc) ---

--- Letras Geradas (Média) ---
Total Words (Avg): 344.21
Unique Words (Avg): 124.00
TTR (Distinct-1 Ratio) (Avg): 0.4449
Repetition Rate (1-Distinct-1) (Avg): 0.5551
Distinct-2 Ratio (Bigrams) (Avg): 0.7552
Repetition Rate (1-Distinct-2) (Bigrams) (Avg): 0.2448
Distinct-3 Ratio (Trigrams) (Avg): 0.8441
Repetition Rate (1-Distinct-3) (Trigrams) (Avg): 0.1559
Generated Unique Words Existing in Corpus (Avg): 115.00
Existence Rate (Generated Unique Words) (Avg): 0.93%
-----------------------------------------------------

--- Letras Originais do Teste (Média) ---
Total Words (Avg): 342.93
Unique Words (Avg): 91.79
TTR (Distinct-1 Ratio) (Avg): 0.3644
Repetition Rate (1-Distinct-1) (Avg): 0.6356
Distinct-2 Ratio (Bigrams) (Avg): 0.5176
Repetition Rate (1-Distinct-2) (Bigrams) (Avg): 0.4824
Distinct-3 Ratio (Trigrams) (Avg): 0.5837
Repetition Rate (1-Distinct-3) (Trigrams) (Avg): 0.4163
Actual Unique Words Existing in Corpus (Avg)




### Sem Dropout

In [None]:
"""
Executa uma avaliação focada na predição da próxima palavra para um modelo
específico. Carrega os pesos do modelo, calcula a perplexidade no conjunto
de teste e, em seguida, avalia a precisão da próxima palavra e da próxima
etiqueta POS. Também analisa e compara as métricas de qualidade (TTR, taxa
de repetição) do conjunto de palavras geradas versus o conjunto de palavras
reais do teste.
"""
print("\nAvaliação do Modelo com Dropout 'none': Foco em Predição da Próxima Palavra")
CURRENT_DROPOUT_STRATEGY = "none"
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para a estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado.")
else:
    model = CharLSTM(
        vocab_size=total_chars,
        embedding_dim=EMBEDDING_DIM,
        hidden_dim=HIDDEN_DIM,
        n_layers=N_LAYERS,
        dropout_rate=DROPOUT_RATE,
        dropout_strategy=CURRENT_DROPOUT_STRATEGY
    ).to(device)

    model.load_state_dict(torch.load(model_save_filepath, map_location=device))
    print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

    test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)
    perplexity = np.exp(test_loss) if not np.isnan(test_loss) else float('nan')
    print(f"\nPerplexidade no conjunto de teste: {perplexity:.4f}")

    correct_pos_predictions = 0
    correct_word_predictions = 0
    total_predictions_attempted = 0
    pos_confusion_matrix = {}
    generated_words_list = []

    if not prompts_for_eval or not true_next_words or not nlp:
        print("\nAviso: Dados insuficientes para a avaliação de nível de palavra. Pulando.")
    else:
        print(f"\nIniciando avaliação de nível de palavra para {len(prompts_for_eval)} prompts...")
        for i in tqdm(range(len(prompts_for_eval)), desc="Avaliando Próxima Palavra & POS Tag"):
            prompt = prompts_for_eval[i]
            true_word = true_next_words[i]
            true_pos = true_next_pos_tags[i]

            generated_word = generate_next_word_from_prompt_eval(model, tokenizer, prompt, SEQUENCE_LENGTH, device)
            generated_words_list.append(generated_word)

            generated_pos = "NONE"
            if generated_word:
                doc_generated = nlp(generated_word)
                generated_pos = doc_generated[0].pos_ if doc_generated and doc_generated[0] else "UNKNOWN"
            
            pos_confusion_matrix.setdefault(true_pos, {})
            pos_confusion_matrix[true_pos].setdefault(generated_pos, 0)
            pos_confusion_matrix[true_pos][generated_pos] += 1

            if generated_pos == true_pos:
                correct_pos_predictions += 1
            
            cleaned_true_word = re.sub(r'[^a-z0-9]', '', true_word).lower()
            cleaned_generated_word = re.sub(r'[^a-z0-9]', '', generated_word).lower()

            if cleaned_generated_word == cleaned_true_word and cleaned_true_word != "":
                correct_word_predictions += 1
            total_predictions_attempted += 1

        overall_pos_accuracy = (correct_pos_predictions / total_predictions_attempted) * 100 if total_predictions_attempted > 0 else 0.0
        overall_word_accuracy = (correct_word_predictions / total_predictions_attempted) * 100 if total_predictions_attempted > 0 else 0.0

        print(f"\nResultados de Coesão Gramatical e Predição da Próxima Palavra (Dropout: {CURRENT_DROPOUT_STRATEGY})")
        print(f"Total de prompts avaliados: {total_predictions_attempted}")
        print(f"Acurácia da Próxima Etiqueta POS: {overall_pos_accuracy:.2f}%")
        print(f"Acurácia da Próxima Palavra: {overall_word_accuracy:.2f}%")

        print("\nMétricas para o conjunto de PALAVRAS GERADAS (na tarefa de predição)")
        generated_word_metrics = calculate_text_quality_metrics(" ".join(generated_words_list), existing_words_in_corpus)
        for key, value in generated_word_metrics.items():
            print(f"{key.replace('_', ' ').title()}: {value:.4f}")

        print("\nMétricas para o conjunto de PALAVRAS REAIS (do conjunto de teste)")
        actual_test_word_metrics = calculate_text_quality_metrics(" ".join(true_next_words), existing_words_in_corpus)
        for key, value in actual_test_word_metrics.items():
            print(f"{key.replace('_', ' ').title()}: {value:.4f}")


--- AVALIAÇÃO: Modelo com Dropout 'none' ---
Model 'model_weights_arianagrande_strategy_none_best.pt' loaded from: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_none_best.pt


Evaluating Loss/Acc: 100%|██████████| 357/357 [00:14<00:00, 24.97it/s]



--- Resultados de Avaliação no Conjunto de TESTE (Caracter): 'ArianaGrande' (Dropout: none) ---
Perplexity: 2.1358
-----------------------------------------------------

Starting word-level evaluation for 9916 test prompts...


Evaluating Next Word & POS Tag: 100%|██████████| 9916/9916 [12:41<00:00, 13.02it/s]


--- Results: Grammatical Cohesion (POS) & Next Word Accuracy (Dropout: none) ---
Total prompts evaluated: 9916
Correct POS Tag Predictions: 5105
Overall Next POS Tag Accuracy: 51.48%
Correct Next Word Predictions: 3389
Overall Next Word Prediction Accuracy: 34.18%
-----------------------------------------------------

--- Metrics for GENERATED WORDS (from next-word prediction task) ---
Total Generated Words: 10670
Unique Generated Words: 510
TTR (Generated Words): 0.0478
Repetition Rate (Generated Words): 0.9522
Generated Unique Words Existing in Corpus: 466
Existence Rate (Generated Unique Words): 0.91%
-----------------------------------------------------

--- Metrics for ACTUAL TEST WORDS (from artist's test set) ---
Total Actual Test Words: 9916
Unique Actual Test Words: 998
TTR (Actual Test Words): 0.1006
Repetition Rate (Actual Test Words): 0.8994
Actual Unique Words Existing in Corpus: 998
Existence Rate (Actual Unique Words): 1.00%
---------------------------------------------




In [None]:
print("\n--- AVALIAÇÃO: Modelo Sem Dropout (Estratégia 'none') ---")

CURRENT_DROPOUT_STRATEGY = "none" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado. Por favor, treine este modelo primeiro.")
    exit() 

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY 
).to(device)

model.load_state_dict(torch.load(model_save_filepath, map_location=device))
print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)

print(f"\n--- Resultados da Avaliação no Conjunto de TESTE para '{TARGET_ARTIST}' (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")
print(f"Perda de Teste (Test Loss): {test_loss:.4f}")
print(f"Acurácia de Teste (Test Accuracy): {test_accuracy:.4f}")
print("-----------------------------------------------------")


--- AVALIAÇÃO: Modelo Sem Dropout (Estratégia 'none') ---
Modelo 'model_weights_arianagrande_strategy_none_best.pt' carregado de: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_none_best.pt


Avaliando: 100%|██████████| 353/353 [00:13<00:00, 27.01it/s]


--- Resultados da Avaliação no Conjunto de TESTE para 'ArianaGrande' (Dropout: none) ---
Perda de Teste (Test Loss): 0.9172
Acurácia de Teste (Test Accuracy): 0.7288
-----------------------------------------------------





### between_lstm

In [None]:
CURRENT_DROPOUT_STRATEGY = "between_lstm" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado. Por favor, treine este modelo primeiro.")
    exit()

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY 
).to(device)

model.load_state_dict(torch.load(model_save_filepath, map_location=device))
print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)

print(f"\n--- Resultados da Avaliação no Conjunto de TESTE para '{TARGET_ARTIST}' (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")
print(f"Perda de Teste (Test Loss): {test_loss:.4f}")
print(f"Acurácia de Teste (Test Accuracy): {test_accuracy:.4f}")
print("-----------------------------------------------------")

Modelo 'model_weights_arianagrande_strategy_between_lstm_best.pt' carregado de: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_between_lstm_best.pt


Avaliando: 100%|██████████| 332/332 [00:13<00:00, 25.15it/s]


--- Resultados da Avaliação no Conjunto de TESTE para 'ArianaGrande' (Dropout: between_lstm) ---
Perda de Teste (Test Loss): 1.0660
Acurácia de Teste (Test Accuracy): 0.6917
-----------------------------------------------------





In [None]:
"""
Executa uma avaliação focada na predição da próxima palavra para um modelo
específico. Carrega os pesos do modelo, calcula a perplexidade no conjunto
de teste e, em seguida, avalia a precisão da próxima palavra e da próxima
etiqueta POS. Também analisa e compara as métricas de qualidade (TTR, taxa
de repetição) do conjunto de palavras geradas versus o conjunto de palavras
reais do teste.
"""
print("\nAvaliação do Modelo com Dropout 'between_lstm': Foco em Predição da Próxima Palavra")
CURRENT_DROPOUT_STRATEGY = "between_lstm"
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para a estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado.")
else:
    model = CharLSTM(
        vocab_size=total_chars,
        embedding_dim=EMBEDDING_DIM,
        hidden_dim=HIDDEN_DIM,
        n_layers=N_LAYERS,
        dropout_rate=DROPOUT_RATE,
        dropout_strategy=CURRENT_DROPOUT_STRATEGY
    ).to(device)

    model.load_state_dict(torch.load(model_save_filepath, map_location=device))
    print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

    test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)
    perplexity = np.exp(test_loss) if not np.isnan(test_loss) else float('nan')
    print(f"\nPerplexidade no conjunto de teste: {perplexity:.4f}")

    correct_pos_predictions = 0
    correct_word_predictions = 0
    total_predictions_attempted = 0
    pos_confusion_matrix = {}
    generated_words_list = []

    if not prompts_for_eval or not true_next_words or not nlp:
        print("\nAviso: Dados insuficientes para a avaliação de nível de palavra. Pulando.")
    else:
        print(f"\nIniciando avaliação de nível de palavra para {len(prompts_for_eval)} prompts...")
        for i in tqdm(range(len(prompts_for_eval)), desc="Avaliando Próxima Palavra & POS Tag"):
            prompt = prompts_for_eval[i]
            true_word = true_next_words[i]
            true_pos = true_next_pos_tags[i]

            generated_word = generate_next_word_from_prompt_eval(model, tokenizer, prompt, SEQUENCE_LENGTH, device)
            generated_words_list.append(generated_word)

            generated_pos = "NONE"
            if generated_word:
                doc_generated = nlp(generated_word)
                generated_pos = doc_generated[0].pos_ if doc_generated and doc_generated[0] else "UNKNOWN"
            
            pos_confusion_matrix.setdefault(true_pos, {})
            pos_confusion_matrix[true_pos].setdefault(generated_pos, 0)
            pos_confusion_matrix[true_pos][generated_pos] += 1

            if generated_pos == true_pos:
                correct_pos_predictions += 1
            
            cleaned_true_word = re.sub(r'[^a-z0-9]', '', true_word).lower()
            cleaned_generated_word = re.sub(r'[^a-z0-9]', '', generated_word).lower()

            if cleaned_generated_word == cleaned_true_word and cleaned_true_word != "":
                correct_word_predictions += 1
            total_predictions_attempted += 1

        overall_pos_accuracy = (correct_pos_predictions / total_predictions_attempted) * 100 if total_predictions_attempted > 0 else 0.0
        overall_word_accuracy = (correct_word_predictions / total_predictions_attempted) * 100 if total_predictions_attempted > 0 else 0.0

        print(f"\nResultados de Coesão Gramatical e Predição da Próxima Palavra (Dropout: {CURRENT_DROPOUT_STRATEGY})")
        print(f"Total de prompts avaliados: {total_predictions_attempted}")
        print(f"Acurácia da Próxima Etiqueta POS: {overall_pos_accuracy:.2f}%")
        print(f"Acurácia da Próxima Palavra: {overall_word_accuracy:.2f}%")

        print("\nMétricas para o conjunto de PALAVRAS GERADAS (na tarefa de predição)")
        generated_word_metrics = calculate_text_quality_metrics(" ".join(generated_words_list), existing_words_in_corpus)
        for key, value in generated_word_metrics.items():
            print(f"{key.replace('_', ' ').title()}: {value:.4f}")

        print("\nMétricas para o conjunto de PALAVRAS REAIS (do conjunto de teste)")
        actual_test_word_metrics = calculate_text_quality_metrics(" ".join(true_next_words), existing_words_in_corpus)
        for key, value in actual_test_word_metrics.items():
            print(f"{key.replace('_', ' ').title()}: {value:.4f}")


--- AVALIAÇÃO: Modelo com Dropout 'none' ---
Model 'model_weights_arianagrande_strategy_between_lstm_best.pt' loaded from: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_between_lstm_best.pt


Evaluating Loss/Acc: 100%|██████████| 352/352 [00:14<00:00, 25.01it/s]



--- Resultados de Avaliação no Conjunto de TESTE (Caracter): 'ArianaGrande' (Dropout: between_lstm) ---
Perplexity: 2.1687
-----------------------------------------------------

Starting word-level evaluation for 9916 test prompts...


Evaluating Next Word & POS Tag: 100%|██████████| 9916/9916 [09:30<00:00, 17.39it/s]


--- Results: Grammatical Cohesion (POS) & Next Word Accuracy (Dropout: between_lstm) ---
Total prompts evaluated: 9916
Correct POS Tag Predictions: 4939
Overall Next POS Tag Accuracy: 49.81%
Correct Next Word Predictions: 2945
Overall Next Word Prediction Accuracy: 29.70%
-----------------------------------------------------

--- Metrics for GENERATED WORDS (from next-word prediction task) ---
Total Generated Words: 10765
Unique Generated Words: 372
TTR (Generated Words): 0.0346
Repetition Rate (Generated Words): 0.9654
Generated Unique Words Existing in Corpus: 358
Existence Rate (Generated Unique Words): 0.96%
-----------------------------------------------------

--- Metrics for ACTUAL TEST WORDS (from artist's test set) ---
Total Actual Test Words: 9916
Unique Actual Test Words: 998
TTR (Actual Test Words): 0.1006
Repetition Rate (Actual Test Words): 0.8994
Actual Unique Words Existing in Corpus: 998
Existence Rate (Actual Unique Words): 1.00%
-------------------------------------




### before_fc

In [None]:
CURRENT_DROPOUT_STRATEGY = "before_fc" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado. Por favor, treine este modelo primeiro.")
    exit()

model = CharLSTM(
    vocab_size=total_chars,
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    dropout_rate=DROPOUT_RATE,
    dropout_strategy=CURRENT_DROPOUT_STRATEGY 
).to(device)

model.load_state_dict(torch.load(model_save_filepath, map_location=device))
print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)

print(f"\n--- Resultados da Avaliação no Conjunto de TESTE para '{TARGET_ARTIST}' (Dropout: {CURRENT_DROPOUT_STRATEGY}) ---")
print(f"Perda de Teste (Test Loss): {test_loss:.4f}")
print(f"Acurácia de Teste (Test Accuracy): {test_accuracy:.4f}")
print("-----------------------------------------------------")

Modelo 'model_weights_arianagrande_strategy_before_fc_best.pt' carregado de: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_before_fc_best.pt


Avaliando: 100%|██████████| 332/332 [00:13<00:00, 24.63it/s]


--- Resultados da Avaliação no Conjunto de TESTE para 'ArianaGrande' (Dropout: before_fc) ---
Perda de Teste (Test Loss): 1.0780
Acurácia de Teste (Test Accuracy): 0.6858
-----------------------------------------------------





In [None]:
"""
Executa uma avaliação focada na predição da próxima palavra para um modelo
específico. Carrega os pesos do modelo, calcula a perplexidade no conjunto
de teste e, em seguida, avalia a precisão da próxima palavra e da próxima
etiqueta POS. Também analisa e compara as métricas de qualidade (TTR, taxa
de repetição) do conjunto de palavras geradas versus o conjunto de palavras
reais do teste.
"""
print("\nAvaliação do Modelo com Dropout 'before_fc': Foco em Predição da Próxima Palavra")
CURRENT_DROPOUT_STRATEGY = "before_fc"
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para a estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado.")
else:
    model = CharLSTM(
        vocab_size=total_chars,
        embedding_dim=EMBEDDING_DIM,
        hidden_dim=HIDDEN_DIM,
        n_layers=N_LAYERS,
        dropout_rate=DROPOUT_RATE,
        dropout_strategy=CURRENT_DROPOUT_STRATEGY
    ).to(device)

    model.load_state_dict(torch.load(model_save_filepath, map_location=device))
    print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

    test_loss, test_accuracy = evaluate_model_loss_acc(model, test_loader, criterion, device)
    perplexity = np.exp(test_loss) if not np.isnan(test_loss) else float('nan')
    print(f"\nPerplexidade no conjunto de teste: {perplexity:.4f}")

    correct_pos_predictions = 0
    correct_word_predictions = 0
    total_predictions_attempted = 0
    pos_confusion_matrix = {}
    generated_words_list = []

    if not prompts_for_eval or not true_next_words or not nlp:
        print("\nAviso: Dados insuficientes para a avaliação de nível de palavra. Pulando.")
    else:
        print(f"\nIniciando avaliação de nível de palavra para {len(prompts_for_eval)} prompts...")
        for i in tqdm(range(len(prompts_for_eval)), desc="Avaliando Próxima Palavra & POS Tag"):
            prompt = prompts_for_eval[i]
            true_word = true_next_words[i]
            true_pos = true_next_pos_tags[i]

            generated_word = generate_next_word_from_prompt_eval(model, tokenizer, prompt, SEQUENCE_LENGTH, device)
            generated_words_list.append(generated_word)

            generated_pos = "NONE"
            if generated_word:
                doc_generated = nlp(generated_word)
                generated_pos = doc_generated[0].pos_ if doc_generated and doc_generated[0] else "UNKNOWN"
            
            pos_confusion_matrix.setdefault(true_pos, {})
            pos_confusion_matrix[true_pos].setdefault(generated_pos, 0)
            pos_confusion_matrix[true_pos][generated_pos] += 1

            if generated_pos == true_pos:
                correct_pos_predictions += 1
            
            cleaned_true_word = re.sub(r'[^a-z0-9]', '', true_word).lower()
            cleaned_generated_word = re.sub(r'[^a-z0-9]', '', generated_word).lower()

            if cleaned_generated_word == cleaned_true_word and cleaned_true_word != "":
                correct_word_predictions += 1
            total_predictions_attempted += 1

        overall_pos_accuracy = (correct_pos_predictions / total_predictions_attempted) * 100 if total_predictions_attempted > 0 else 0.0
        overall_word_accuracy = (correct_word_predictions / total_predictions_attempted) * 100 if total_predictions_attempted > 0 else 0.0

        print(f"\nResultados de Coesão Gramatical e Predição da Próxima Palavra (Dropout: {CURRENT_DROPOUT_STRATEGY})")
        print(f"Total de prompts avaliados: {total_predictions_attempted}")
        print(f"Acurácia da Próxima Etiqueta POS: {overall_pos_accuracy:.2f}%")
        print(f"Acurácia da Próxima Palavra: {overall_word_accuracy:.2f}%")

        print("\nMétricas para o conjunto de PALAVRAS GERADAS (na tarefa de predição)")
        generated_word_metrics = calculate_text_quality_metrics(" ".join(generated_words_list), existing_words_in_corpus)
        for key, value in generated_word_metrics.items():
            print(f"{key.replace('_', ' ').title()}: {value:.4f}")

        print("\nMétricas para o conjunto de PALAVRAS REAIS (do conjunto de teste)")
        actual_test_word_metrics = calculate_text_quality_metrics(" ".join(true_next_words), existing_words_in_corpus)
        for key, value in actual_test_word_metrics.items():
            print(f"{key.replace('_', ' ').title()}: {value:.4f}")


--- AVALIAÇÃO: Modelo com Dropout 'none' ---
Model 'model_weights_arianagrande_strategy_before_fc_best.pt' loaded from: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_before_fc_best.pt


Evaluating Loss/Acc: 100%|██████████| 352/352 [00:12<00:00, 28.48it/s]



--- Resultados de Avaliação no Conjunto de TESTE (Caracter): 'ArianaGrande' (Dropout: before_fc) ---
Perplexity: 2.3198
-----------------------------------------------------

Starting word-level evaluation for 9916 test prompts...


Evaluating Next Word & POS Tag: 100%|██████████| 9916/9916 [09:25<00:00, 17.54it/s]


--- Results: Grammatical Cohesion (POS) & Next Word Accuracy (Dropout: before_fc) ---
Total prompts evaluated: 9916
Correct POS Tag Predictions: 4852
Overall Next POS Tag Accuracy: 48.93%
Correct Next Word Predictions: 3056
Overall Next Word Prediction Accuracy: 30.82%
-----------------------------------------------------

--- Metrics for GENERATED WORDS (from next-word prediction task) ---
Total Generated Words: 10219
Unique Generated Words: 357
TTR (Generated Words): 0.0349
Repetition Rate (Generated Words): 0.9651
Generated Unique Words Existing in Corpus: 334
Existence Rate (Generated Unique Words): 0.94%
-----------------------------------------------------

--- Metrics for ACTUAL TEST WORDS (from artist's test set) ---
Total Actual Test Words: 9916
Unique Actual Test Words: 998
TTR (Actual Test Words): 0.1006
Repetition Rate (Actual Test Words): 0.8994
Actual Unique Words Existing in Corpus: 998
Existence Rate (Actual Unique Words): 1.00%
----------------------------------------




## Geração do próximo caractere

In [None]:
"""
Seleciona 10 exemplos aleatórios do conjunto de teste, utiliza o modelo
carregado para prever o próximo caractere de cada sequência de entrada e
exibe uma comparação lado a lado da sequência, do caractere real e do
caractere previsto pelo modelo.
"""
print("\nGerando 10 exemplos aleatórios de predição do conjunto de teste")

model.eval()

num_examples_to_show = 10
random_indices = random.sample(range(len(test_dataset)), min(num_examples_to_show, len(test_dataset)))

with torch.no_grad():
    for i, idx in enumerate(random_indices):
        input_sequence, actual_next_char_id = test_dataset[idx]
        input_sequence_batch = input_sequence.unsqueeze(0).to(device)

        output = model(input_sequence_batch)
        _, predicted_next_char_id = torch.max(output, 1)

        input_text = tokenizer.sequences_to_texts(input_sequence.tolist())
        actual_char = tokenizer.sequences_to_texts([actual_next_char_id.item()])
        predicted_char = tokenizer.sequences_to_texts([predicted_next_char_id.item()])

        print(f"\nExemplo {i+1}:")
        print(f"  Entrada (tamanho={SEQUENCE_LENGTH}): '{input_text}'")
        print(f"  Caracter Real:                  '{actual_char}'")
        print(f"  Caracter Previsto:              '{predicted_char}'")
        print(f"  Correto:                        {'Sim' if actual_char == predicted_char else 'Não'}")

print("\nGeração de exemplos concluída.")


--- Gerando 10 exemplos aleatórios de predição do conjunto de TESTE ---

Exemplo 1:
  Input (sequence_length=100): 'in' 'bout you ooh oh yeah i'm just thinkin' 'bout you 'bout you 'bout you yeah i'm thinkin' 'bout yo'
  Caracter Real Seguinte: 'u'
  Caracter Previsto:      'u'
  Acurado: Sim

Exemplo 2:
  Input (sequence_length=100): 'eart only girl in the world like i'm the only one that's in command cause i'm the only one who under'
  Caracter Real Seguinte: 's'
  Caracter Previsto:      'n'
  Acurado: Não

Exemplo 3:
  Input (sequence_length=100): 'me with me we're on another mentality ain't got no tears left to cry to cry so i'm pickin' it up pic'
  Caracter Real Seguinte: 'k'
  Caracter Previsto:      'k'
  Acurado: Sim

Exemplo 4:
  Input (sequence_length=100): 'ur love to me baby let me know are you wit it are you wit it are you wit it this christmas baby let '
  Caracter Real Seguinte: 'm'
  Caracter Previsto:      'm'
  Acurado: Sim

Exemplo 5:
  Input (sequence_length=100): 

## Geração de letras

In [None]:
"""
Define uma função para geração de texto caractere a caractere, que utiliza
uma 'seed' e um parâmetro de 'temperatura' para controlar a criatividade.
Em seguida, carrega do disco o tokenizador e o tamanho do vocabulário
necessários para a geração e para a instanciação do modelo.
"""
def generate_text(model, tokenizer, seed_text, num_generate=500, temperature=1.0, sequence_length=SEQUENCE_LENGTH, device=device):
    """Gera texto a partir de uma seed usando o modelo treinado."""
    model.eval()
    
    processed_seed_text = clean_text(seed_text) 

    if len(processed_seed_text) < sequence_length:
        processed_seed_text = ' ' * (sequence_length - len(processed_seed_text)) + processed_seed_text
    elif len(processed_seed_text) > sequence_length:
        processed_seed_text = processed_seed_text[-sequence_length:]

    generated_text = processed_seed_text

    print(f"Iniciando geração com a seed (ajustada para {sequence_length} caracteres): '{generated_text}'")
    print("-" * 50)

    for _ in tqdm(range(num_generate), desc="Gerando caracteres"):
        current_sequence_str = generated_text[-sequence_length:]
        tokenized_input = tokenizer.texts_to_sequences(current_sequence_str)
        
        input_tensor = torch.tensor(tokenized_input, dtype=torch.long).unsqueeze(0).to(device)

        with torch.no_grad():
            predictions = model(input_tensor).squeeze(0)
        
        if temperature > 0:
            predictions = predictions / temperature
        
        probabilities = torch.softmax(predictions, dim=-1)
        next_char_id = torch.multinomial(probabilities, num_samples=1).item()
        next_char = tokenizer.int_to_char.get(next_char_id, '')

        generated_text += next_char

    return generated_text

try:
    tokenizer = load_object(TOKENIZER_PATH)
    total_chars = load_object(os.path.join(PROCESSED_DATA_DIR_ARTIST, TOKENIZER_SUBDIR, f'vocab_size_{CLEAN_TARGET_ARTIST_NAME}_char.pkl'))
    print(f"Vocabulário de caracteres carregado. Tamanho do vocabulário: {total_chars}")
except FileNotFoundError as e:
    print(e)

Usando dispositivo: cuda
Objeto carregado de: processed_data_by_artist_char_split\arianagrande\tokenizers\char_tokenizer_arianagrande.pkl
Objeto carregado de: processed_data_by_artist_char_split\arianagrande\tokenizers\vocab_size_arianagrande_char.pkl
Vocabulário de caracteres carregado. Tamanho do vocabulário: 36


In [None]:
# Seed de exemplo para geração. 
seed_text = "don't want nobody else around me just need you right here you're like the only thing that i see it's"
NUM_CHARS_TO_GENERATE = 120
TEMPERATURE = 0

In [20]:
print("\n--- GERAÇÃO: Modelo Sem Dropout (Estratégia 'none') ---")

CURRENT_DROPOUT_STRATEGY = "none" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado. Por favor, treine este modelo primeiro.")
else:
    model = CharLSTM(
        vocab_size=total_chars,
        embedding_dim=EMBEDDING_DIM,
        hidden_dim=HIDDEN_DIM,
        n_layers=N_LAYERS,
        dropout_rate=DROPOUT_RATE, 
        dropout_strategy=CURRENT_DROPOUT_STRATEGY 
    ).to(device)

    model.load_state_dict(torch.load(model_save_filepath, map_location=device))
    print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

    generated_lyric = generate_text(model, tokenizer, seed_text, NUM_CHARS_TO_GENERATE, TEMPERATURE)

    print("\n--- Letra Gerada ---")
    print(generated_lyric)
    print("--------------------")


--- GERAÇÃO: Modelo Sem Dropout (Estratégia 'none') ---
Modelo 'model_weights_arianagrande_strategy_none_best.pt' carregado de: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_none_best.pt
Iniciando geração com seed (ajustada para 100 caracteres): 'don't want nobody else around me just need you right here you're like the only thing that i see it's'
--------------------------------------------------


Gerando caracteres: 100%|██████████| 120/120 [00:02<00:00, 56.18it/s]


--- Letra Gerada ---
don't want nobody else around me just need you right here you're like the only thing that i see it's to you ou does it just like it when we were the one you to know that we got that's a liw can my soul is christmas is yo
--------------------





In [21]:
print("\n--- GERAÇÃO: Modelo com Dropout 'between_lstm' ---")

CURRENT_DROPOUT_STRATEGY = "between_lstm" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado. Por favor, treine este modelo primeiro.")
else:
    model = CharLSTM(
        vocab_size=total_chars,
        embedding_dim=EMBEDDING_DIM,
        hidden_dim=HIDDEN_DIM,
        n_layers=N_LAYERS,
        dropout_rate=DROPOUT_RATE, 
        dropout_strategy=CURRENT_DROPOUT_STRATEGY 
    ).to(device)

    model.load_state_dict(torch.load(model_save_filepath, map_location=device))
    print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

    generated_lyric = generate_text(model, tokenizer, seed_text, NUM_CHARS_TO_GENERATE, TEMPERATURE)

    print("\n--- Letra Gerada ---")
    print(generated_lyric)
    print("--------------------")


--- GERAÇÃO: Modelo com Dropout 'between_lstm' ---
Modelo 'model_weights_arianagrande_strategy_between_lstm_best.pt' carregado de: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_between_lstm_best.pt
Iniciando geração com seed (ajustada para 100 caracteres): 'don't want nobody else around me just need you right here you're like the only thing that i see it's'
--------------------------------------------------


Gerando caracteres: 100%|██████████| 120/120 [00:01<00:00, 65.24it/s]


--- Letra Gerada ---
don't want nobody else around me just need you right here you're like the only thing that i see it's a leal is all for for people to love and i'm the hide you heard pre i'm but almost there grows you girls we can feel it
--------------------





In [23]:
print("\n--- GERAÇÃO: Modelo com Dropout 'before_fc' ---")

CURRENT_DROPOUT_STRATEGY = "before_fc" 
MODEL_SAVE_FILENAME = f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{CURRENT_DROPOUT_STRATEGY}_best.pt"
model_save_filepath = os.path.join(MODEL_DIR_ARTIST, MODEL_SAVE_FILENAME)

if not os.path.exists(model_save_filepath):
    print(f"Erro: Modelo treinado '{model_save_filepath}' para estratégia '{CURRENT_DROPOUT_STRATEGY}' não encontrado. Por favor, treine este modelo primeiro.")
else:
    model = CharLSTM(
        vocab_size=total_chars,
        embedding_dim=EMBEDDING_DIM,
        hidden_dim=HIDDEN_DIM,
        n_layers=N_LAYERS,
        dropout_rate=DROPOUT_RATE, 
        dropout_strategy=CURRENT_DROPOUT_STRATEGY 
    ).to(device)

    model.load_state_dict(torch.load(model_save_filepath, map_location=device))
    print(f"Modelo '{MODEL_SAVE_FILENAME}' carregado de: {model_save_filepath}")

    generated_lyric = generate_text(model, tokenizer, seed_text, NUM_CHARS_TO_GENERATE, TEMPERATURE)

    print("\n--- Letra Gerada ---")
    print(generated_lyric)
    print("--------------------")


--- GERAÇÃO: Modelo com Dropout 'before_fc' ---
Modelo 'model_weights_arianagrande_strategy_before_fc_best.pt' carregado de: models_by_artist_char_split\arianagrande\model_weights_arianagrande_strategy_before_fc_best.pt
Iniciando geração com seed (ajustada para 100 caracteres): 'don't want nobody else around me just need you right here you're like the only thing that i see it's'
--------------------------------------------------


Gerando caracteres: 100%|██████████| 120/120 [00:01<00:00, 62.13it/s]


--- Letra Gerada ---
don't want nobody else around me just need you right here you're like the only thing that i see it's turnin' i ain't feelin' you but you shoulds know that you can have and for free mmmoons of cause you get it oh feel it 
--------------------





## Avaliação de acerto gramatical

In [None]:
"""
Prepara um conjunto de dados detalhado para avaliação de previsão da próxima
palavra, extraindo prompts e as classes gramaticais (POS tags) das
palavras-alvo do conjunto de teste. Define e executa uma função de avaliação
que carrega cada modelo pré-treinado (por estratégia de dropout), calcula a
acurácia da previsão de POS tags e exibe os resultados, incluindo uma matriz
de confusão.
"""
# --- Carregamento de Recursos ---
try:
    tokenizer = load_object(TOKENIZER_PATH)
    total_chars = load_object(os.path.join(PROCESSED_DATA_DIR_ARTIST, TOKENIZER_SUBDIR, f'vocab_size_{CLEAN_TARGET_ARTIST_NAME}_char.pkl'))
    print(f"Vocabulário de caracteres carregado. Tamanho: {total_chars}")
    nlp = spacy.load("en_core_web_sm")
    print("Modelo spaCy 'en_core_web_sm' carregado.")
except (FileNotFoundError, OSError) as e:
    print(f"Erro ao carregar recursos essenciais: {e}")
    print("Tentando baixar o modelo spaCy, se necessário...")
    try:
        spacy.cli.download("en_core_web_sm")
        nlp = spacy.load("en_core_web_sm")
        print("Modelo spaCy baixado e carregado.")
    except Exception as download_e:
        print(f"Falha ao baixar e carregar o modelo spaCy: {download_e}")
        nlp = None

# --- Preparação dos Dados de Avaliação ---
prompts_for_eval = []
true_next_words = []
true_next_pos_tags = []
try:
    df_artist_full = pd.read_csv(CSV_FILE_PATH, usecols=['Lyric'])
    all_lyrics_for_split = [clean_text(lyric) for lyric in df_artist_full['Lyric'] if clean_text(lyric)]
    
    random.seed(RANDOM_SEED)
    random.shuffle(all_lyrics_for_split)
    
    num_train = int(len(all_lyrics_for_split) * TRAIN_SPLIT)
    num_val = int(len(all_lyrics_for_split) * VAL_SPLIT)
    test_lyrics_clean = all_lyrics_for_split[num_train + num_val:]

    print(f"Total de letras de teste para avaliação: {len(test_lyrics_clean)}")
    for lyric_text in tqdm(test_lyrics_clean, desc="Gerando prompts e alvos"):
        words_in_lyric = lyric_text.split()
        for i in range(len(words_in_lyric) - 1):
            context_string = " ".join(words_in_lyric[:i+1])
            target_word = words_in_lyric[i+1]
            
            prompt = (' ' * (SEQUENCE_LENGTH - len(context_string)) + context_string)[-SEQUENCE_LENGTH:]
            
            if target_word:
                prompts_for_eval.append(prompt)
                true_next_words.append(target_word)
    
    print(f"Total de prompts e alvos gerados: {len(prompts_for_eval)}")
    if nlp and true_next_words:
        docs = nlp.pipe(true_next_words, batch_size=500)
        true_next_pos_tags = [doc[0].pos_ if doc and doc[0] else "UNKNOWN" for doc in tqdm(docs, total=len(true_next_words), desc="POS Tagging")]
except Exception as e:
    print(f"Erro ao preparar dados de teste: {e}")

# --- Função Principal de Avaliação ---
def run_pos_evaluation_char_level(strategy, prompts, words, pos_tags, tokenizer_obj, device_obj, nlp_model):
    print(f"\n--- AVALIAÇÃO DE CLASSE GRAMATICAL (Dropout: '{strategy}') ---")
    model_path = os.path.join(MODEL_DIR_ARTIST, f"model_weights_{CLEAN_TARGET_ARTIST_NAME}_strategy_{strategy}_best.pt")
    if not os.path.exists(model_path):
        print(f"Modelo para a estratégia '{strategy}' não encontrado em {model_path}. Pulando.")
        return

    model = CharLSTM(total_chars, EMBEDDING_DIM, HIDDEN_DIM, N_LAYERS, DROPOUT_RATE, strategy).to(device_obj)
    model.load_state_dict(torch.load(model_path, map_location=device_obj))
    model.eval()
    print(f"Modelo '{os.path.basename(model_path)}' carregado.")

    correct_pos = 0
    confusion_matrix = {}
    for i in tqdm(range(len(prompts)), desc=f"Avaliando POS Tag (Dropout: {strategy})"):
        generated_word = generate_next_word_from_prompt_eval(model, tokenizer_obj, prompts[i], SEQUENCE_LENGTH, device_obj)
        generated_pos = "NONE"
        if generated_word:
            doc = nlp_model(generated_word)
            generated_pos = doc[0].pos_ if doc and doc[0] else "UNKNOWN"
        
        true_pos = pos_tags[i]
        confusion_matrix.setdefault(true_pos, {})
        confusion_matrix[true_pos].setdefault(generated_pos, 0)
        confusion_matrix[true_pos][generated_pos] += 1
        if generated_pos == true_pos:
            correct_pos += 1
            
    accuracy = (correct_pos / len(prompts)) * 100 if prompts else 0
    print(f"Precisão Geral da Classe Gramatical da Próxima Palavra: {accuracy:.2f}%")

# --- Execução da Avaliação ---
if prompts_for_eval:
    run_pos_evaluation_char_level("none", prompts_for_eval, true_next_words, true_next_pos_tags, tokenizer, device, nlp)
    run_pos_evaluation_char_level("between_lstm", prompts_for_eval, true_next_words, true_next_pos_tags, tokenizer, device, nlp)
    run_pos_evaluation_char_level("before_fc", prompts_for_eval, true_next_words, true_next_pos_tags, tokenizer, device, nlp)
else:
    print("Avaliação não executada devido à falta de prompts gerados.")

Usando dispositivo: cuda
Objeto carregado de: processed_data_by_artist_char_split\arianagrande\tokenizers\char_tokenizer_arianagrande.pkl
Objeto carregado de: processed_data_by_artist_char_split\arianagrande\tokenizers\vocab_size_arianagrande_char.pkl
Vocabulário de caracteres carregado. Tamanho do vocabulário: 36
Modelo spaCy 'en_core_web_sm' carregado.
Total de letras limpas de teste carregadas para avaliação: 32


Gerando prompts e targets para avaliação: 100%|██████████| 32/32 [00:00<00:00, 4497.76it/s]


Total de prompts e targets gerados para avaliação: 11279

Fazendo POS Tagging das palavras verdadeiras...


POS Tagging de palavras verdadeiras: 100%|██████████| 11279/11279 [00:43<00:00, 259.96it/s]



--- AVALIAÇÃO DE CLASSE GRAMATICAL: Modelo Caractere-a-Caractere com Dropout 'none' ---
Modelo 'model_weights_arianagrande_strategy_none_best.pt' carregado.

Iniciando avaliação de classe gramatical para 11279 prompts de teste...


Avaliando POS Tag da próxima palavra: 100%|██████████| 11279/11279 [28:40<00:00,  6.55it/s]



--- Resultados da Avaliação no Conjunto de TESTE (Dropout: none) ---
Total de prompts avaliados: 11279
Acertos de Classe Gramatical: 5883
Precisão Geral da Classe Gramatical da Próxima Palavra: 52.16%

--- Precisão por Classe Gramatical (quando deveria ser a classe X) ---
  Quando deveria ser 'PRON': 76.01% (Acertos: 2376/3126)
  Quando deveria ser 'SCONJ': 36.69% (Acertos: 102/278)
  Quando deveria ser 'VERB': 45.84% (Acertos: 941/2053)
  Quando deveria ser 'ADP': 47.22% (Acertos: 306/648)
  Quando deveria ser 'NOUN': 47.20% (Acertos: 717/1519)
  Quando deveria ser 'PART': 43.28% (Acertos: 87/201)
  Quando deveria ser 'ADV': 36.73% (Acertos: 314/855)
  Quando deveria ser 'PROPN': 28.24% (Acertos: 148/524)
  Quando deveria ser 'AUX': 44.89% (Acertos: 211/470)
  Quando deveria ser 'ADJ': 46.39% (Acertos: 289/623)
  Quando deveria ser 'INTJ': 41.83% (Acertos: 215/514)
  Quando deveria ser 'CCONJ': 36.63% (Acertos: 111/303)
  Quando deveria ser 'X': 55.32% (Acertos: 26/47)
  Quando dever

Avaliando POS Tag da próxima palavra: 100%|██████████| 11279/11279 [19:36<00:00,  9.59it/s]



--- Resultados da Avaliação no Conjunto de TESTE (Dropout: between_lstm) ---
Total de prompts avaliados: 11279
Acertos de Classe Gramatical: 6033
Precisão Geral da Classe Gramatical da Próxima Palavra: 53.49%

--- Precisão por Classe Gramatical (quando deveria ser a classe X) ---
  Quando deveria ser 'PRON': 77.06% (Acertos: 2409/3126)
  Quando deveria ser 'SCONJ': 25.54% (Acertos: 71/278)
  Quando deveria ser 'VERB': 51.92% (Acertos: 1066/2053)
  Quando deveria ser 'ADP': 44.29% (Acertos: 287/648)
  Quando deveria ser 'NOUN': 47.73% (Acertos: 725/1519)
  Quando deveria ser 'PART': 46.77% (Acertos: 94/201)
  Quando deveria ser 'ADV': 33.45% (Acertos: 286/855)
  Quando deveria ser 'PROPN': 38.74% (Acertos: 203/524)
  Quando deveria ser 'AUX': 44.04% (Acertos: 207/470)
  Quando deveria ser 'ADJ': 49.44% (Acertos: 308/623)
  Quando deveria ser 'INTJ': 37.35% (Acertos: 192/514)
  Quando deveria ser 'CCONJ': 38.28% (Acertos: 116/303)
  Quando deveria ser 'X': 40.43% (Acertos: 19/47)
  Quan

Avaliando POS Tag da próxima palavra: 100%|██████████| 11279/11279 [17:43<00:00, 10.60it/s]



--- Resultados da Avaliação no Conjunto de TESTE (Dropout: before_fc) ---
Total de prompts avaliados: 11279
Acertos de Classe Gramatical: 5693
Precisão Geral da Classe Gramatical da Próxima Palavra: 50.47%

--- Precisão por Classe Gramatical (quando deveria ser a classe X) ---
  Quando deveria ser 'PRON': 74.86% (Acertos: 2340/3126)
  Quando deveria ser 'SCONJ': 35.25% (Acertos: 98/278)
  Quando deveria ser 'VERB': 46.08% (Acertos: 946/2053)
  Quando deveria ser 'ADP': 42.28% (Acertos: 274/648)
  Quando deveria ser 'NOUN': 45.56% (Acertos: 692/1519)
  Quando deveria ser 'PART': 47.76% (Acertos: 96/201)
  Quando deveria ser 'ADV': 29.82% (Acertos: 255/855)
  Quando deveria ser 'PROPN': 25.00% (Acertos: 131/524)
  Quando deveria ser 'AUX': 45.53% (Acertos: 214/470)
  Quando deveria ser 'ADJ': 44.94% (Acertos: 280/623)
  Quando deveria ser 'INTJ': 33.46% (Acertos: 172/514)
  Quando deveria ser 'CCONJ': 42.24% (Acertos: 128/303)
  Quando deveria ser 'X': 51.06% (Acertos: 24/47)
  Quando d