In [2]:
import pandas as pd

In [3]:
# Load dataset
df = pd.read_json("./data/processed/te/train.json", orient="records", lines=True)

df.head()

Unnamed: 0,docid,tokens,tags,labels
0,file01,"[జమ్మూవిల్లే, భారీ వర్షపాతం ద్వారా, చాలా, స్థా...","[B-PLACE-ARG, B-REASON-ARG, O, O, B-NATURAL_EV...",{'NATURAL_EVENT.LAND_SLIDE': 5}
1,file02,"[భూమధ్యరేఖ, బస్సు, ఓవర్‌షాడో, ప్రమాదం:, 11 మంద...","[B-PLACE-ARG, B-MAN_MADE_EVENT.TRANSPORT_HAZAR...",{'MAN_MADE_EVENT.TRANSPORT_HAZARDS': 8}
2,file03,"[ఇరాన్లో, శక్తి, కుడి, భూకంపం:, 2, పాత్ర, త్యా...","[B-PLACE-ARG, B-INTENSITY-ARG, I-INTENSITY-ARG...",{'NATURAL_EVENT.EARTHQUAKE': 7}
3,file04,"[10, చనిపోతారు, ఇన్, ఈజిప్ట్, చర్చి, దాడి., పద...","[B-CASUALTIES-ARG, I-CASUALTIES-ARG, O, B-PLAC...",{'MAN_MADE_EVENT.TERRORIST_ATTACK': 7}
4,file05,"[50, చనిపోయిన, ఇన్, ఆత్మహత్య, బాంబు, దాడి, వద్...","[B-CASUALTIES-ARG, I-CASUALTIES-ARG, O, B-MAN_...",{'MAN_MADE_EVENT.SUICIDE_ATTACK': 7}


In [26]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import optuna
import os

# 1. Load Data
df = pd.read_json("./data/processed/te/train.json", orient="records", lines=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Token and Label Index Mapping (renamed)
token2idx_a_multi = {token: idx for idx, token in enumerate(set(token for row in df.tokens for token in row), start=1)}
token2idx_a_multi["<PAD>"] = 0
label2idx_a_multi = {label: idx for idx, label in enumerate(set(label for row in df.tags for label in row), start=1)}
label2idx_a_multi["<PAD>"] = 0
idx2label_a_multi = {v: k for k, v in label2idx_a_multi.items()}

# 3. Encoding Function
def encode(row, max_len=150):
    token_ids = [token2idx_a_multi[token] for token in row['tokens']]
    tag_ids = [label2idx_a_multi[tag] for tag in row['tags']]
    pad_len = max_len - len(token_ids)
    token_ids += [token2idx_a_multi["<PAD>"]] * pad_len
    tag_ids += [label2idx_a_multi["<PAD>"]] * pad_len
    return token_ids[:max_len], tag_ids[:max_len]

df['encoded'] = df.apply(encode, axis=1)

# 4. Split
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
val_data, test_data = train_test_split(test_data, test_size=0.5, random_state=42)

# 5. Dataset
class TokenDataset(Dataset):
    def __init__(self, data):
        self.tokens = [x[0] for x in data.encoded]
        self.labels = [x[1] for x in data.encoded]

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        return torch.tensor(self.tokens[idx]), torch.tensor(self.labels[idx])

# 6. BiLSTM + Attention Model with improvements
class BiLSTMAttention(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim, hidden_dim, dropout):
        super(BiLSTMAttention, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.bilstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.layer_norm = nn.LayerNorm(hidden_dim * 2)
        self.attn_w = nn.Linear(hidden_dim * 2, hidden_dim)
        self.attn_context = nn.Linear(hidden_dim, 1, bias=False)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2, tagset_size)
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn_w.weight)
        nn.init.xavier_uniform_(self.attn_context.weight)
        nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.bilstm(emb)
        lstm_out = self.layer_norm(lstm_out)
        u = torch.tanh(self.attn_w(lstm_out))
        attn_weights = torch.softmax(self.attn_context(u), dim=1)
        attn_applied = lstm_out * attn_weights
        out = self.fc(self.dropout(attn_applied))
        return out

# 7. Train Function
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=30):
    model.to(device)
    best_val_acc = 0
    final_loss = None

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        train_correct, train_total = 0, 0

        for tokens, labels in train_loader:
            tokens, labels = tokens.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(tokens)
            outputs = outputs.view(-1, outputs.shape[-1])
            labels = labels.view(-1)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

            preds = torch.argmax(outputs, dim=-1)
            mask = labels != 0
            train_correct += (preds[mask] == labels[mask]).sum().item()
            train_total += mask.sum().item()

        train_acc = train_correct / train_total
        avg_loss = total_loss / len(train_loader)
        final_loss = avg_loss

        # Validation accuracy
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for tokens, labels in val_loader:
                tokens, labels = tokens.to(device), labels.to(device)
                outputs = model(tokens)
                preds = torch.argmax(outputs, dim=-1)
                mask = labels != 0
                all_preds.extend(preds[mask].cpu().numpy())
                all_labels.extend(labels[mask].cpu().numpy())

        val_acc = accuracy_score(all_labels, all_preds)
        print(f"  Epoch {epoch+1}/{epochs} - Train Acc: {train_acc:.4f}, Loss: {avg_loss:.4f} - Val Acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc

    return best_val_acc, final_loss

# 8. Optuna Objective
def objective(trial):
    embedding_dim = trial.suggest_int('embedding_dim', 64, 128)
    hidden_dim = trial.suggest_int('hidden_dim', 128, 256)
    dropout = trial.suggest_float('dropout', 0.2, 0.5)
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])

    train_loader = DataLoader(TokenDataset(train_data), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TokenDataset(val_data), batch_size=batch_size)

    model = BiLSTMAttention(len(token2idx_a_multi), len(label2idx_a_multi), embedding_dim, hidden_dim, dropout)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    print(f"\nTrial {trial.number + 1}: emb={embedding_dim}, hid={hidden_dim}, drop={dropout:.2f}, lr={lr:.5f}, batch={batch_size}")
    val_acc, final_loss = train_model(model, train_loader, val_loader, optimizer, criterion, device)
    print(f"Trial {trial.number + 1} Done: Val Accuracy = {val_acc:.4f}, Final Loss = {final_loss:.4f}")

    return val_acc

# 9. Run Optuna
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.RandomSampler())
study.optimize(objective, n_trials=20)

# 10. Best Hyperparameters
print("\nBest Trial Hyperparameters:")
for key, val in study.best_params.items():
    print(f"  {key}: {val}")

# 11. Final Model Training
params = study.best_params
train_loader = DataLoader(TokenDataset(train_data), batch_size=params['batch_size'], shuffle=True)
val_loader = DataLoader(TokenDataset(val_data), batch_size=params['batch_size'])
test_loader = DataLoader(TokenDataset(test_data), batch_size=params['batch_size'])

model = BiLSTMAttention(len(token2idx_a_multi), len(label2idx_a_multi), params['embedding_dim'], params['hidden_dim'], params['dropout'])
optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
criterion = nn.CrossEntropyLoss(ignore_index=0)

print("\nFinal Training with Best Hyperparameters...")
train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=30)

# 12. Test Accuracy
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for tokens, labels in test_loader:
        tokens, labels = tokens.to(device), labels.to(device)
        outputs = model(tokens)
        preds = torch.argmax(outputs, dim=-1)
        mask = labels != 0
        all_preds.extend(preds[mask].cpu().numpy())
        all_labels.extend(labels[mask].cpu().numpy())

test_acc = accuracy_score(all_labels, all_preds)
print(f"\nFinal Test Accuracy: {test_acc:.4f}")

# 13. Save Model
os.makedirs('./model', exist_ok=True)
torch.save(model.state_dict(), './model/bilstm_attention.pt')
print("Model saved to './model/bilstm_attention.pt'")


[I 2025-05-04 10:33:32,746] A new study created in memory with name: no-name-fcec8e9e-a99c-4b09-bcb0-9360d834f0f5



Trial 1: emb=105, hid=170, drop=0.41, lr=0.00089, batch=32
  Epoch 1/30 - Train Acc: 0.2848, Loss: 4.4565 - Val Acc: 0.4734
  Epoch 2/30 - Train Acc: 0.5479, Loss: 4.0616 - Val Acc: 0.6283
  Epoch 3/30 - Train Acc: 0.6444, Loss: 3.6265 - Val Acc: 0.6360
  Epoch 4/30 - Train Acc: 0.6716, Loss: 3.1770 - Val Acc: 0.6659
  Epoch 5/30 - Train Acc: 0.6854, Loss: 2.7492 - Val Acc: 0.6706
  Epoch 6/30 - Train Acc: 0.6926, Loss: 2.3784 - Val Acc: 0.6749
  Epoch 7/30 - Train Acc: 0.6950, Loss: 2.0801 - Val Acc: 0.6774
  Epoch 8/30 - Train Acc: 0.6957, Loss: 1.8504 - Val Acc: 0.6780
  Epoch 9/30 - Train Acc: 0.6959, Loss: 1.6924 - Val Acc: 0.6780
  Epoch 10/30 - Train Acc: 0.6960, Loss: 1.5779 - Val Acc: 0.6778
  Epoch 11/30 - Train Acc: 0.6960, Loss: 1.5014 - Val Acc: 0.6781
  Epoch 12/30 - Train Acc: 0.6961, Loss: 1.4432 - Val Acc: 0.6780
  Epoch 13/30 - Train Acc: 0.6961, Loss: 1.4023 - Val Acc: 0.6778
  Epoch 14/30 - Train Acc: 0.6961, Loss: 1.3699 - Val Acc: 0.6781
  Epoch 15/30 - Train Acc

[I 2025-05-04 10:35:07,016] Trial 0 finished with value: 0.6781277807439046 and parameters: {'embedding_dim': 105, 'hidden_dim': 170, 'dropout': 0.4106635325123241, 'lr': 0.00089426356785894, 'batch_size': 32}. Best is trial 0 with value: 0.6781277807439046.


  Epoch 30/30 - Train Acc: 0.7005, Loss: 1.0873 - Val Acc: 0.6744
Trial 1 Done: Val Accuracy = 0.6781, Final Loss = 1.0873

Trial 2: emb=93, hid=187, drop=0.46, lr=0.00106, batch=16
  Epoch 1/30 - Train Acc: 0.6824, Loss: 4.1346 - Val Acc: 0.6784
  Epoch 2/30 - Train Acc: 0.6962, Loss: 3.1026 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 2.1981 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.7016 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 1.5064 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 1.4237 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 1.3767 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 1.3439 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 1.3122 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 1.2885 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 1.2658 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 1.2403 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962, 

[I 2025-05-04 10:37:05,960] Trial 1 finished with value: 0.6838583377825236 and parameters: {'embedding_dim': 93, 'hidden_dim': 187, 'dropout': 0.46242385404643693, 'lr': 0.0010568415679659353, 'batch_size': 16}. Best is trial 1 with value: 0.6838583377825236.


  Epoch 30/30 - Train Acc: 0.7843, Loss: 0.8004 - Val Acc: 0.6839
Trial 2 Done: Val Accuracy = 0.6839, Final Loss = 0.8004

Trial 3: emb=104, hid=205, drop=0.48, lr=0.00075, batch=64
  Epoch 1/30 - Train Acc: 0.4868, Loss: 4.5297 - Val Acc: 0.6784
  Epoch 2/30 - Train Acc: 0.6962, Loss: 4.3193 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 4.1303 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 3.9331 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 3.7241 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 3.5080 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 3.3021 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 3.0808 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 2.8741 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 2.6805 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 2.4996 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 2.3355 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962,

[I 2025-05-04 10:37:50,122] Trial 2 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 104, 'hidden_dim': 205, 'dropout': 0.4829299933786664, 'lr': 0.0007488276419263299, 'batch_size': 64}. Best is trial 1 with value: 0.6838583377825236.


  Epoch 30/30 - Train Acc: 0.6961, Loss: 1.3015 - Val Acc: 0.6779
Trial 3 Done: Val Accuracy = 0.6784, Final Loss = 1.3015

Trial 4: emb=126, hid=210, drop=0.41, lr=0.00048, batch=32
  Epoch 1/30 - Train Acc: 0.6185, Loss: 4.4849 - Val Acc: 0.6784
  Epoch 2/30 - Train Acc: 0.6962, Loss: 4.2198 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 3.9665 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 3.6994 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 3.4261 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 3.1553 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 2.8970 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 2.6579 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 2.4395 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 2.2429 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 2.0808 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 1.9398 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962,

[I 2025-05-04 10:38:45,253] Trial 3 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 126, 'hidden_dim': 210, 'dropout': 0.4134055223891382, 'lr': 0.0004787198758857231, 'batch_size': 32}. Best is trial 1 with value: 0.6838583377825236.


  Epoch 30/30 - Train Acc: 0.6962, Loss: 1.2331 - Val Acc: 0.6766
Trial 4 Done: Val Accuracy = 0.6784, Final Loss = 1.2331

Trial 5: emb=85, hid=142, drop=0.25, lr=0.00045, batch=16
  Epoch 1/30 - Train Acc: 0.2468, Loss: 4.4585 - Val Acc: 0.4203
  Epoch 2/30 - Train Acc: 0.5286, Loss: 4.0907 - Val Acc: 0.5922
  Epoch 3/30 - Train Acc: 0.6530, Loss: 3.6908 - Val Acc: 0.6518
  Epoch 4/30 - Train Acc: 0.6802, Loss: 3.2788 - Val Acc: 0.6702
  Epoch 5/30 - Train Acc: 0.6935, Loss: 2.8807 - Val Acc: 0.6779
  Epoch 6/30 - Train Acc: 0.6962, Loss: 2.5232 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 2.2242 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 1.9874 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 1.8011 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 1.6699 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 1.5741 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 1.5085 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962, 

[I 2025-05-04 10:40:04,574] Trial 4 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 85, 'hidden_dim': 142, 'dropout': 0.24654572010311984, 'lr': 0.0004547147374385302, 'batch_size': 16}. Best is trial 1 with value: 0.6838583377825236.


  Epoch 30/30 - Train Acc: 0.6962, Loss: 1.1534 - Val Acc: 0.6766
Trial 5 Done: Val Accuracy = 0.6784, Final Loss = 1.1534

Trial 6: emb=114, hid=191, drop=0.27, lr=0.00066, batch=64
  Epoch 1/30 - Train Acc: 0.1229, Loss: 4.5866 - Val Acc: 0.2235
  Epoch 2/30 - Train Acc: 0.2864, Loss: 4.3974 - Val Acc: 0.3416
  Epoch 3/30 - Train Acc: 0.4246, Loss: 4.2357 - Val Acc: 0.4924
  Epoch 4/30 - Train Acc: 0.5488, Loss: 4.0642 - Val Acc: 0.6005
  Epoch 5/30 - Train Acc: 0.6113, Loss: 3.8838 - Val Acc: 0.6325
  Epoch 6/30 - Train Acc: 0.6433, Loss: 3.7023 - Val Acc: 0.6250
  Epoch 7/30 - Train Acc: 0.6540, Loss: 3.5121 - Val Acc: 0.6367
  Epoch 8/30 - Train Acc: 0.6620, Loss: 3.3288 - Val Acc: 0.6488
  Epoch 9/30 - Train Acc: 0.6714, Loss: 3.1457 - Val Acc: 0.6512
  Epoch 10/30 - Train Acc: 0.6776, Loss: 2.9638 - Val Acc: 0.6635
  Epoch 11/30 - Train Acc: 0.6834, Loss: 2.7951 - Val Acc: 0.6695
  Epoch 12/30 - Train Acc: 0.6874, Loss: 2.6311 - Val Acc: 0.6680
  Epoch 13/30 - Train Acc: 0.6902,

[I 2025-05-04 10:41:39,619] Trial 5 finished with value: 0.6781277807439046 and parameters: {'embedding_dim': 114, 'hidden_dim': 191, 'dropout': 0.2732478099757905, 'lr': 0.0006625175180949738, 'batch_size': 64}. Best is trial 1 with value: 0.6838583377825236.


  Epoch 30/30 - Train Acc: 0.6964, Loss: 1.3808 - Val Acc: 0.6778
Trial 6 Done: Val Accuracy = 0.6781, Final Loss = 1.3808

Trial 7: emb=90, hid=250, drop=0.43, lr=0.00017, batch=32
  Epoch 1/30 - Train Acc: 0.2119, Loss: 4.5786 - Val Acc: 0.5130
  Epoch 2/30 - Train Acc: 0.6710, Loss: 4.4165 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 4.2953 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 4.1946 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 4.0937 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 3.9895 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 3.8840 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 3.7770 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 3.6689 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 3.5612 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 3.4536 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 3.3469 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962, 

[I 2025-05-04 10:42:59,939] Trial 6 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 90, 'hidden_dim': 250, 'dropout': 0.4262985751361407, 'lr': 0.00017437187993963647, 'batch_size': 32}. Best is trial 1 with value: 0.6838583377825236.


  Epoch 30/30 - Train Acc: 0.6962, Loss: 1.8939 - Val Acc: 0.6784
Trial 7 Done: Val Accuracy = 0.6784, Final Loss = 1.8939

Trial 8: emb=101, hid=156, drop=0.27, lr=0.00023, batch=64
  Epoch 1/30 - Train Acc: 0.5709, Loss: 4.5916 - Val Acc: 0.6781
  Epoch 2/30 - Train Acc: 0.6961, Loss: 4.5362 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 4.4768 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 4.4036 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 4.3435 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 4.2900 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 4.2374 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 4.1842 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 4.1298 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 4.0759 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 4.0214 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 3.9664 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962,

[I 2025-05-04 10:44:13,288] Trial 7 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 101, 'hidden_dim': 156, 'dropout': 0.26650323770560774, 'lr': 0.0002287890430242527, 'batch_size': 64}. Best is trial 1 with value: 0.6838583377825236.


  Epoch 30/30 - Train Acc: 0.6962, Loss: 2.9599 - Val Acc: 0.6784
Trial 8 Done: Val Accuracy = 0.6784, Final Loss = 2.9599

Trial 9: emb=84, hid=156, drop=0.41, lr=0.00608, batch=64
  Epoch 1/30 - Train Acc: 0.5140, Loss: 4.1646 - Val Acc: 0.6502
  Epoch 2/30 - Train Acc: 0.6884, Loss: 2.9218 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 1.8811 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.4803 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 1.3764 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 1.3301 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 1.2890 - Val Acc: 0.6781
  Epoch 8/30 - Train Acc: 0.6958, Loss: 1.2569 - Val Acc: 0.6776
  Epoch 9/30 - Train Acc: 0.6955, Loss: 1.2205 - Val Acc: 0.6761
  Epoch 10/30 - Train Acc: 0.6952, Loss: 1.1838 - Val Acc: 0.6763
  Epoch 11/30 - Train Acc: 0.6951, Loss: 1.1491 - Val Acc: 0.6746
  Epoch 12/30 - Train Acc: 0.6957, Loss: 1.1174 - Val Acc: 0.6740
  Epoch 13/30 - Train Acc: 0.6994, 

[I 2025-05-04 10:45:27,263] Trial 8 finished with value: 0.7123687488877024 and parameters: {'embedding_dim': 84, 'hidden_dim': 156, 'dropout': 0.41188938009939935, 'lr': 0.0060797505820611025, 'batch_size': 64}. Best is trial 8 with value: 0.7123687488877024.


  Epoch 30/30 - Train Acc: 0.8889, Loss: 0.4811 - Val Acc: 0.7090
Trial 9 Done: Val Accuracy = 0.7124, Final Loss = 0.4811

Trial 10: emb=121, hid=202, drop=0.24, lr=0.00855, batch=64
  Epoch 1/30 - Train Acc: 0.5729, Loss: 3.8627 - Val Acc: 0.6784
  Epoch 2/30 - Train Acc: 0.6962, Loss: 2.0948 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 1.4452 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.3440 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 1.2861 - Val Acc: 0.6783
  Epoch 6/30 - Train Acc: 0.6959, Loss: 1.2309 - Val Acc: 0.6775
  Epoch 7/30 - Train Acc: 0.6956, Loss: 1.1718 - Val Acc: 0.6763
  Epoch 8/30 - Train Acc: 0.7155, Loss: 1.0886 - Val Acc: 0.6825
  Epoch 9/30 - Train Acc: 0.7449, Loss: 0.9925 - Val Acc: 0.6987
  Epoch 10/30 - Train Acc: 0.7829, Loss: 0.8833 - Val Acc: 0.7084
  Epoch 11/30 - Train Acc: 0.8110, Loss: 0.7845 - Val Acc: 0.7135
  Epoch 12/30 - Train Acc: 0.8335, Loss: 0.6974 - Val Acc: 0.7191
  Epoch 13/30 - Train Acc: 0.8527

[I 2025-05-04 10:46:28,983] Trial 9 finished with value: 0.7393842320697633 and parameters: {'embedding_dim': 121, 'hidden_dim': 202, 'dropout': 0.23970141173967827, 'lr': 0.00855370882946973, 'batch_size': 64}. Best is trial 9 with value: 0.7393842320697633.


  Epoch 30/30 - Train Acc: 0.9607, Loss: 0.1584 - Val Acc: 0.7390
Trial 10 Done: Val Accuracy = 0.7394, Final Loss = 0.1584

Trial 11: emb=85, hid=160, drop=0.22, lr=0.00015, batch=64
  Epoch 1/30 - Train Acc: 0.0330, Loss: 4.6613 - Val Acc: 0.0771
  Epoch 2/30 - Train Acc: 0.0938, Loss: 4.6023 - Val Acc: 0.1183
  Epoch 3/30 - Train Acc: 0.1409, Loss: 4.5499 - Val Acc: 0.1906
  Epoch 4/30 - Train Acc: 0.2459, Loss: 4.5017 - Val Acc: 0.2894
  Epoch 5/30 - Train Acc: 0.3393, Loss: 4.4609 - Val Acc: 0.3735
  Epoch 6/30 - Train Acc: 0.4379, Loss: 4.4261 - Val Acc: 0.4612
  Epoch 7/30 - Train Acc: 0.5542, Loss: 4.3937 - Val Acc: 0.6212
  Epoch 8/30 - Train Acc: 0.6845, Loss: 4.3624 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 4.3297 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 4.2973 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 4.2645 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 4.2316 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962

[I 2025-05-04 10:47:42,974] Trial 10 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 85, 'hidden_dim': 160, 'dropout': 0.21553270760196677, 'lr': 0.00014519770367721425, 'batch_size': 64}. Best is trial 9 with value: 0.7393842320697633.


  Epoch 30/30 - Train Acc: 0.6962, Loss: 3.6011 - Val Acc: 0.6784
Trial 11 Done: Val Accuracy = 0.6784, Final Loss = 3.6011

Trial 12: emb=99, hid=212, drop=0.50, lr=0.00474, batch=16
  Epoch 1/30 - Train Acc: 0.6346, Loss: 2.7668 - Val Acc: 0.6784
  Epoch 2/30 - Train Acc: 0.6962, Loss: 1.4336 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 1.3603 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.2737 - Val Acc: 0.6782
  Epoch 5/30 - Train Acc: 0.7071, Loss: 1.1744 - Val Acc: 0.6932
  Epoch 6/30 - Train Acc: 0.7404, Loss: 1.0493 - Val Acc: 0.7015
  Epoch 7/30 - Train Acc: 0.7750, Loss: 0.9114 - Val Acc: 0.7180
  Epoch 8/30 - Train Acc: 0.8072, Loss: 0.7945 - Val Acc: 0.7257
  Epoch 9/30 - Train Acc: 0.8291, Loss: 0.7052 - Val Acc: 0.7218
  Epoch 10/30 - Train Acc: 0.8490, Loss: 0.6185 - Val Acc: 0.7342
  Epoch 11/30 - Train Acc: 0.8647, Loss: 0.5516 - Val Acc: 0.7379
  Epoch 12/30 - Train Acc: 0.8776, Loss: 0.4940 - Val Acc: 0.7368
  Epoch 13/30 - Train Acc: 0.8900

[I 2025-05-04 10:49:51,684] Trial 11 finished with value: 0.7493860117458623 and parameters: {'embedding_dim': 99, 'hidden_dim': 212, 'dropout': 0.49925576681424444, 'lr': 0.004737349647790637, 'batch_size': 16}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.9729, Loss: 0.0965 - Val Acc: 0.7468
Trial 12 Done: Val Accuracy = 0.7494, Final Loss = 0.0965

Trial 13: emb=83, hid=160, drop=0.21, lr=0.00397, batch=32
  Epoch 1/30 - Train Acc: 0.4917, Loss: 3.9442 - Val Acc: 0.6695
  Epoch 2/30 - Train Acc: 0.6943, Loss: 2.3359 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 1.5515 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.3934 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6963, Loss: 1.3288 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 1.2909 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6958, Loss: 1.2329 - Val Acc: 0.6770
  Epoch 8/30 - Train Acc: 0.6958, Loss: 1.1702 - Val Acc: 0.6777
  Epoch 9/30 - Train Acc: 0.7153, Loss: 1.1010 - Val Acc: 0.6853
  Epoch 10/30 - Train Acc: 0.7363, Loss: 1.0257 - Val Acc: 0.6944
  Epoch 11/30 - Train Acc: 0.7610, Loss: 0.9485 - Val Acc: 0.7004
  Epoch 12/30 - Train Acc: 0.7886, Loss: 0.8627 - Val Acc: 0.7049
  Epoch 13/30 - Train Acc: 0.8119

[I 2025-05-04 10:51:23,030] Trial 12 finished with value: 0.7219789998220324 and parameters: {'embedding_dim': 83, 'hidden_dim': 160, 'dropout': 0.20894821299144145, 'lr': 0.003973397629439353, 'batch_size': 32}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.9330, Loss: 0.2924 - Val Acc: 0.7186
Trial 13 Done: Val Accuracy = 0.7220, Final Loss = 0.2924

Trial 14: emb=81, hid=215, drop=0.46, lr=0.00182, batch=16
  Epoch 1/30 - Train Acc: 0.5883, Loss: 3.7965 - Val Acc: 0.6784
  Epoch 2/30 - Train Acc: 0.6962, Loss: 2.1342 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 1.5336 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.4206 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 1.3704 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 1.3300 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 1.3009 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 1.2705 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 1.2488 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6963, Loss: 1.2135 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6961, Loss: 1.1810 - Val Acc: 0.6776
  Epoch 12/30 - Train Acc: 0.6953, Loss: 1.1568 - Val Acc: 0.6761
  Epoch 13/30 - Train Acc: 0.6951

[I 2025-05-04 10:53:31,363] Trial 13 finished with value: 0.6910126357003026 and parameters: {'embedding_dim': 81, 'hidden_dim': 215, 'dropout': 0.4574868100882938, 'lr': 0.0018158450285380551, 'batch_size': 16}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.8174, Loss: 0.7331 - Val Acc: 0.6910
Trial 14 Done: Val Accuracy = 0.6910, Final Loss = 0.7331

Trial 15: emb=93, hid=167, drop=0.22, lr=0.00235, batch=64
  Epoch 1/30 - Train Acc: 0.2436, Loss: 4.4465 - Val Acc: 0.4211
  Epoch 2/30 - Train Acc: 0.4898, Loss: 3.9509 - Val Acc: 0.5769
  Epoch 3/30 - Train Acc: 0.6169, Loss: 3.3827 - Val Acc: 0.5924
  Epoch 4/30 - Train Acc: 0.6697, Loss: 2.8108 - Val Acc: 0.6551
  Epoch 5/30 - Train Acc: 0.6885, Loss: 2.3023 - Val Acc: 0.6760
  Epoch 6/30 - Train Acc: 0.6941, Loss: 1.9279 - Val Acc: 0.6770
  Epoch 7/30 - Train Acc: 0.6956, Loss: 1.6899 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 1.5393 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 1.4510 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 1.3897 - Val Acc: 0.6783
  Epoch 11/30 - Train Acc: 0.6962, Loss: 1.3490 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6961, Loss: 1.3166 - Val Acc: 0.6782
  Epoch 13/30 - Train Acc: 0.6959

[I 2025-05-04 10:55:07,092] Trial 14 finished with value: 0.6863142907990746 and parameters: {'embedding_dim': 93, 'hidden_dim': 167, 'dropout': 0.21518333194611458, 'lr': 0.002354646328328524, 'batch_size': 64}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.7536, Loss: 0.8839 - Val Acc: 0.6863
Trial 15 Done: Val Accuracy = 0.6863, Final Loss = 0.8839

Trial 16: emb=90, hid=195, drop=0.43, lr=0.00195, batch=32
  Epoch 1/30 - Train Acc: 0.3983, Loss: 4.2370 - Val Acc: 0.6152
  Epoch 2/30 - Train Acc: 0.6559, Loss: 3.2885 - Val Acc: 0.6745
  Epoch 3/30 - Train Acc: 0.6942, Loss: 2.3636 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.7781 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 1.5238 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 1.4307 - Val Acc: 0.6785
  Epoch 7/30 - Train Acc: 0.6963, Loss: 1.3734 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 1.3353 - Val Acc: 0.6785
  Epoch 9/30 - Train Acc: 0.6963, Loss: 1.3030 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 1.2747 - Val Acc: 0.6783
  Epoch 11/30 - Train Acc: 0.6962, Loss: 1.2470 - Val Acc: 0.6781
  Epoch 12/30 - Train Acc: 0.6961, Loss: 1.2191 - Val Acc: 0.6771
  Epoch 13/30 - Train Acc: 0.6959

[I 2025-05-04 10:56:22,375] Trial 15 finished with value: 0.7062822566292934 and parameters: {'embedding_dim': 90, 'hidden_dim': 195, 'dropout': 0.4285579688745075, 'lr': 0.001953805143343164, 'batch_size': 32}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.8569, Loss: 0.5953 - Val Acc: 0.6997
Trial 16 Done: Val Accuracy = 0.7063, Final Loss = 0.5953

Trial 17: emb=86, hid=226, drop=0.50, lr=0.00048, batch=64
  Epoch 1/30 - Train Acc: 0.1151, Loss: 4.6003 - Val Acc: 0.2494
  Epoch 2/30 - Train Acc: 0.2913, Loss: 4.4255 - Val Acc: 0.3178
  Epoch 3/30 - Train Acc: 0.3709, Loss: 4.2936 - Val Acc: 0.4039
  Epoch 4/30 - Train Acc: 0.4650, Loss: 4.1620 - Val Acc: 0.5301
  Epoch 5/30 - Train Acc: 0.5367, Loss: 4.0257 - Val Acc: 0.5613
  Epoch 6/30 - Train Acc: 0.5833, Loss: 3.8853 - Val Acc: 0.6008
  Epoch 7/30 - Train Acc: 0.6115, Loss: 3.7437 - Val Acc: 0.6240
  Epoch 8/30 - Train Acc: 0.6355, Loss: 3.5992 - Val Acc: 0.6238
  Epoch 9/30 - Train Acc: 0.6478, Loss: 3.4506 - Val Acc: 0.6393
  Epoch 10/30 - Train Acc: 0.6576, Loss: 3.3105 - Val Acc: 0.6397
  Epoch 11/30 - Train Acc: 0.6675, Loss: 3.1628 - Val Acc: 0.6573
  Epoch 12/30 - Train Acc: 0.6730, Loss: 3.0240 - Val Acc: 0.6580
  Epoch 13/30 - Train Acc: 0.6781

[I 2025-05-04 10:57:27,895] Trial 16 finished with value: 0.6784125289197366 and parameters: {'embedding_dim': 86, 'hidden_dim': 226, 'dropout': 0.4988813209001345, 'lr': 0.0004790178925101501, 'batch_size': 64}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.6960, Loss: 1.5695 - Val Acc: 0.6784
Trial 17 Done: Val Accuracy = 0.6784, Final Loss = 1.5695

Trial 18: emb=98, hid=221, drop=0.48, lr=0.00045, batch=32
  Epoch 1/30 - Train Acc: 0.1870, Loss: 4.5408 - Val Acc: 0.3144
  Epoch 2/30 - Train Acc: 0.4243, Loss: 4.2775 - Val Acc: 0.4867
  Epoch 3/30 - Train Acc: 0.5621, Loss: 4.0296 - Val Acc: 0.5534
  Epoch 4/30 - Train Acc: 0.6270, Loss: 3.7733 - Val Acc: 0.6356
  Epoch 5/30 - Train Acc: 0.6508, Loss: 3.5120 - Val Acc: 0.6390
  Epoch 6/30 - Train Acc: 0.6647, Loss: 3.2524 - Val Acc: 0.6507
  Epoch 7/30 - Train Acc: 0.6770, Loss: 3.0003 - Val Acc: 0.6590
  Epoch 8/30 - Train Acc: 0.6859, Loss: 2.7602 - Val Acc: 0.6723
  Epoch 9/30 - Train Acc: 0.6917, Loss: 2.5402 - Val Acc: 0.6744
  Epoch 10/30 - Train Acc: 0.6945, Loss: 2.3408 - Val Acc: 0.6768
  Epoch 11/30 - Train Acc: 0.6956, Loss: 2.1689 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 2.0202 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962

[I 2025-05-04 10:58:43,787] Trial 17 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 98, 'hidden_dim': 221, 'dropout': 0.47614192308589415, 'lr': 0.0004461463011943076, 'batch_size': 32}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.6962, Loss: 1.2709 - Val Acc: 0.6784
Trial 18 Done: Val Accuracy = 0.6784, Final Loss = 1.2709

Trial 19: emb=100, hid=241, drop=0.46, lr=0.00369, batch=64
  Epoch 1/30 - Train Acc: 0.5592, Loss: 4.2047 - Val Acc: 0.5674
  Epoch 2/30 - Train Acc: 0.6781, Loss: 3.2399 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 2.2926 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.6983 - Val Acc: 0.6784
  Epoch 5/30 - Train Acc: 0.6962, Loss: 1.4696 - Val Acc: 0.6784
  Epoch 6/30 - Train Acc: 0.6962, Loss: 1.3829 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 1.3421 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 1.3044 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 1.2667 - Val Acc: 0.6783
  Epoch 10/30 - Train Acc: 0.6962, Loss: 1.2362 - Val Acc: 0.6778
  Epoch 11/30 - Train Acc: 0.6962, Loss: 1.1994 - Val Acc: 0.6776
  Epoch 12/30 - Train Acc: 0.6962, Loss: 1.1594 - Val Acc: 0.6775
  Epoch 13/30 - Train Acc: 0.696

[I 2025-05-04 10:59:55,295] Trial 18 finished with value: 0.7113009432283325 and parameters: {'embedding_dim': 100, 'hidden_dim': 241, 'dropout': 0.45788722886423083, 'lr': 0.0036912383390022407, 'batch_size': 64}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.8868, Loss: 0.4846 - Val Acc: 0.7113
Trial 19 Done: Val Accuracy = 0.7113, Final Loss = 0.4846

Trial 20: emb=79, hid=239, drop=0.37, lr=0.00013, batch=32
  Epoch 1/30 - Train Acc: 0.1411, Loss: 4.5881 - Val Acc: 0.2463
  Epoch 2/30 - Train Acc: 0.4914, Loss: 4.4722 - Val Acc: 0.6011
  Epoch 3/30 - Train Acc: 0.6258, Loss: 4.3719 - Val Acc: 0.6226
  Epoch 4/30 - Train Acc: 0.6524, Loss: 4.2938 - Val Acc: 0.6478
  Epoch 5/30 - Train Acc: 0.6741, Loss: 4.2178 - Val Acc: 0.6629
  Epoch 6/30 - Train Acc: 0.6898, Loss: 4.1415 - Val Acc: 0.6784
  Epoch 7/30 - Train Acc: 0.6962, Loss: 4.0632 - Val Acc: 0.6784
  Epoch 8/30 - Train Acc: 0.6962, Loss: 3.9838 - Val Acc: 0.6784
  Epoch 9/30 - Train Acc: 0.6962, Loss: 3.9049 - Val Acc: 0.6784
  Epoch 10/30 - Train Acc: 0.6962, Loss: 3.8251 - Val Acc: 0.6784
  Epoch 11/30 - Train Acc: 0.6962, Loss: 3.7455 - Val Acc: 0.6784
  Epoch 12/30 - Train Acc: 0.6962, Loss: 3.6666 - Val Acc: 0.6784
  Epoch 13/30 - Train Acc: 0.6962

[I 2025-05-04 11:01:15,797] Trial 19 finished with value: 0.6784481224417156 and parameters: {'embedding_dim': 79, 'hidden_dim': 239, 'dropout': 0.36982765507320664, 'lr': 0.00013294594104599347, 'batch_size': 32}. Best is trial 11 with value: 0.7493860117458623.


  Epoch 30/30 - Train Acc: 0.6962, Loss: 2.3777 - Val Acc: 0.6784
Trial 20 Done: Val Accuracy = 0.6784, Final Loss = 2.3777

Best Trial Hyperparameters:
  embedding_dim: 99
  hidden_dim: 212
  dropout: 0.49925576681424444
  lr: 0.004737349647790637
  batch_size: 16

Final Training with Best Hyperparameters...
  Epoch 1/30 - Train Acc: 0.6529, Loss: 2.7485 - Val Acc: 0.6784
  Epoch 2/30 - Train Acc: 0.6962, Loss: 1.4366 - Val Acc: 0.6784
  Epoch 3/30 - Train Acc: 0.6962, Loss: 1.3562 - Val Acc: 0.6784
  Epoch 4/30 - Train Acc: 0.6962, Loss: 1.2936 - Val Acc: 0.6783
  Epoch 5/30 - Train Acc: 0.6956, Loss: 1.2223 - Val Acc: 0.6760
  Epoch 6/30 - Train Acc: 0.7057, Loss: 1.1405 - Val Acc: 0.6912
  Epoch 7/30 - Train Acc: 0.7346, Loss: 1.0428 - Val Acc: 0.6962
  Epoch 8/30 - Train Acc: 0.7694, Loss: 0.9185 - Val Acc: 0.7053
  Epoch 9/30 - Train Acc: 0.8031, Loss: 0.7945 - Val Acc: 0.7117
  Epoch 10/30 - Train Acc: 0.8304, Loss: 0.6917 - Val Acc: 0.7269
  Epoch 11/30 - Train Acc: 0.8488, Los

In [27]:
import pickle
import os

# Create a directory if it doesn't exist
os.makedirs('./model', exist_ok=True)

# Save token2idx_a_multi
with open('./model/token2idx_a_multi.pkl', 'wb') as f:
    pickle.dump(token2idx_a_multi, f)

# Save label2idx_a_multi
with open('./model/label2idx_a_multi.pkl', 'wb') as f:
    pickle.dump(label2idx_a_multi, f)

print("Saved token2idx_a_multi and label2idx_a_multi to './model/'")


Saved token2idx_a_multi and label2idx_a_multi to './model/'


In [28]:
df.encoded.head()

0    ([33882, 35887, 24163, 830, 10594, 32539, 3554...
1    ([36588, 11186, 19350, 10737, 18230, 2163, 439...
2    ([34551, 5132, 22017, 16906, 13196, 19054, 216...
3    ([22498, 19692, 22660, 29501, 33075, 2192, 294...
4    ([20079, 23821, 22660, 16903, 2756, 18104, 542...
Name: encoded, dtype: object

In [29]:
print(model)

BiLSTMAttention(
  (embedding): Embedding(38338, 99, padding_idx=0)
  (bilstm): LSTM(99, 212, batch_first=True, bidirectional=True)
  (layer_norm): LayerNorm((424,), eps=1e-05, elementwise_affine=True)
  (attn_w): Linear(in_features=424, out_features=212, bias=True)
  (attn_context): Linear(in_features=212, out_features=1, bias=False)
  (dropout): Dropout(p=0.49925576681424444, inplace=False)
  (fc): Linear(in_features=424, out_features=107, bias=True)
)


In [30]:
from sklearn.metrics import classification_report

# Get only labels present in your predictions (excluding 0 for <PAD>)
used_label_ids = sorted(set(all_labels))  # this includes only labels in the test set
used_label_ids = [x for x in used_label_ids if x != 0]  # remove padding

# Now get corresponding label names
target_names = [idx2label[i] for i in used_label_ids]

# Print Report
print("Test Classification Report:")
print(classification_report(all_labels, all_preds, labels=used_label_ids, target_names=target_names))


Test Classification Report:
                                      precision    recall  f1-score   support

             I-NATURAL_EVENT.CYCLONE       0.50      0.38      0.43         8
     B-MAN_MADE_EVENT.NORMAL_BOMBING       0.22      0.13      0.17        15
                          B-TIME-ARG       0.62      0.59      0.60       255
              B-MAN_MADE_EVENT.RIOTS       0.00      0.00      0.00         4
                     B-EPICENTRE-ARG       0.25      0.12      0.17         8
         I-NATURAL_EVENT.HAIL_STORMS       1.00      0.17      0.29         6
   I-MAN_MADE_EVENT.SURGICAL_STRIKES       1.00      0.50      0.67         2
         B-NATURAL_EVENT.FOREST_FIRE       0.38      0.14      0.21        21
   B-MAN_MADE_EVENT.TERRORIST_ATTACK       0.82      0.53      0.64        17
           I-NATURAL_EVENT.COLD_WAVE       1.00      0.50      0.67         2
                         B-SPEED-ARG       0.67      0.50      0.57         8
                     I-MAGNITUDE-AR

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [31]:
# Flatten all tags into one big list
all_labels = [label for row in df['tags'] for label in row]

# Get the unique labels
unique_labels = set(all_labels)

# Print count and the unique labels
print("Number of unique labels:", len(unique_labels))
print("Unique labels:", unique_labels)


Number of unique labels: 106
Unique labels: {'I-NATURAL_EVENT.CYCLONE', 'B-MAN_MADE_EVENT.NORMAL_BOMBING', 'B-TIME-ARG', 'B-MAN_MADE_EVENT.RIOTS', 'B-EPICENTRE-ARG', 'B-NATURAL_EVENT.SEISMIC_RISK', 'I-NATURAL_EVENT.HAIL_STORMS', 'I-MAN_MADE_EVENT.ARMED_CONFLICTS', 'I-MAN_MADE_EVENT.SURGICAL_STRIKES', 'B-NATURAL_EVENT.FOREST_FIRE', 'B-MAN_MADE_EVENT.TERRORIST_ATTACK', 'I-NATURAL_EVENT.COLD_WAVE', 'B-SPEED-ARG', 'I-NATURAL_EVENT.AVALANCHES', 'I-MAGNITUDE-ARG', 'B-NATURAL_EVENT.HEAVY_RAINFALL', 'B-MAN_MADE_EVENT.SHOOT_OUT', 'I-SPEED-ARG', 'I-MAN_MADE_EVENT.TRAIN_COLLISION', 'I-NATURAL_EVENT.HEAT_WAVE', 'I-MAN_MADE_EVENT.AVIATION_HAZARD', 'B-MAN_MADE_EVENT.ARMED_CONFLICTS', 'B-CASUALTIES-ARG', 'B-MAN_MADE_EVENT.CRIME', 'I-MAN_MADE_EVENT.MISCELLANEOUS', 'I-AFTER_EFFECTS-ARG', 'B-NATURAL_EVENT.FAMINE', 'B-REASON-ARG', 'I-MAN_MADE_EVENT.SUICIDE_ATTACK', 'I-NATURAL_EVENT.BLIZZARD', 'B-NATURAL_EVENT.LAND_SLIDE', 'I-NATURAL_EVENT.STORM', 'B-NATURAL_EVENT.CYCLONE', 'I-INTENSITY-ARG', 'I-EPICENTRE

In [32]:
def predict_tags(model, sentence_tokens, token2idx, idx2label, max_len=150):
    model.eval()
    model.to(device)

    # Encode input
    token_ids = [token2idx.get(token, token2idx["<PAD>"]) for token in sentence_tokens]
    pad_len = max_len - len(token_ids)
    token_ids += [token2idx["<PAD>"]] * pad_len
    token_ids = token_ids[:max_len]

    input_tensor = torch.tensor([token_ids]).to(device)

    with torch.no_grad():
        outputs = model(input_tensor)
        preds = torch.argmax(outputs, dim=-1).squeeze(0).cpu().numpy()

    # Convert predicted indices to labels and remove padding
    predicted_tags = [idx2label[idx] for idx in preds[:len(sentence_tokens)]]
    return list(zip(sentence_tokens, predicted_tags))


In [33]:
def predict_from_sentence(model, sentence, token2idx, idx2label, max_len=50):
    model.eval()
    model.to(device)

    # Tokenize by space (like in your train.json)
    tokens = sentence.strip().split()

    # Encode
    token_ids = [token2idx.get(token, token2idx["<PAD>"]) for token in tokens]
    pad_len = max_len - len(token_ids)
    token_ids += [token2idx["<PAD>"]] * pad_len
    token_ids = token_ids[:max_len]

    input_tensor = torch.tensor([token_ids]).to(device)

    with torch.no_grad():
        outputs = model(input_tensor)
        preds = torch.argmax(outputs, dim=-1).squeeze(0).cpu().numpy()

    predicted_tags = [idx2label[idx] for idx in preds[:len(tokens)]]
    return list(zip(tokens, predicted_tags))


In [34]:
sample_sentence =  """అగ్ని ప్రమాదం భవనాన్ని భస్మం చేసింది. చనిపోయిన వారి సంఖ్య పెరుగుతోంది."""

predictions = predict_from_sentence(model, sample_sentence, token2idx, idx2label)

for token, tag in predictions:
    print(f"{token:10} -> {tag}")


అగ్ని      -> B-MAN_MADE_EVENT.FIRE
ప్రమాదం    -> I-MAN_MADE_EVENT.FIRE
భవనాన్ని   -> O
భస్మం      -> O
చేసింది.   -> O
చనిపోయిన   -> B-CASUALTIES-ARG
వారి       -> O
సంఖ్య      -> I-CASUALTIES-ARG
పెరుగుతోంది. -> I-CASUALTIES-ARG


## without autotune

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import os
import pickle

# Step 1: Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Step 2: Load dataset
df = pd.read_json("./data/processed/te/train.json", orient="records", lines=True)

# Step 3: Token to Index mapping
token2idx_multi = {token: idx for idx, token in enumerate(set(token for row in df.tokens for token in row), start=1)}
token2idx_multi["<PAD>"] = 0

# Step 4: Label to Index mapping
label2idx_multi = {label: idx for idx, label in enumerate(set(label for row in df.tags for label in row), start=1)}
label2idx_multi["<PAD>"] = 0

# Save the mappings
os.makedirs('./model', exist_ok=True)
with open('./model/token2idx_multi.pkl', 'wb') as f:
    pickle.dump(token2idx_multi, f)
with open('./model/label2idx_multi.pkl', 'wb') as f:
    pickle.dump(label2idx_multi, f)

# Step 5: Convert tokens and labels to indices
def encode_data(tokens, tags):
    token_ids = [token2idx_multi[token] for token in tokens]
    label_ids = [label2idx_multi[label] for label in tags]
    return token_ids, label_ids

df["encoded"] = df.apply(lambda row: encode_data(row.tokens, row.tags), axis=1)

# Step 6: Dataset class
class NewsDataset(Dataset):
    def __init__(self, df, max_len=150):
        self.data = df
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tokens, labels = self.data.iloc[idx]["encoded"]
        tokens = tokens[:self.max_len] + [0] * (self.max_len - len(tokens))
        labels = labels[:self.max_len] + [0] * (self.max_len - len(labels))
        return torch.tensor(tokens, dtype=torch.long), torch.tensor(labels, dtype=torch.long)

# Step 7: Split into Train, Validation, Test
dataset = NewsDataset(df)
train_size = int(0.8 * len(dataset))
valid_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - valid_size

train_dataset, valid_dataset, test_dataset = random_split(dataset, [train_size, valid_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Step 8: Define BiLSTM model
class BiLSTM(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=128, hidden_dim=256):
        super(BiLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.3)
        self.norm = nn.LayerNorm(hidden_dim * 2)
        self.fc = nn.Linear(hidden_dim * 2, tagset_size)

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        lstm_out = self.dropout(lstm_out)
        lstm_out = self.norm(lstm_out)
        logits = self.fc(lstm_out)
        return logits

# Step 9: Loss, Optimizer, Scheduler
vocab_size = len(token2idx_multi)
tagset_size = len(label2idx_multi)

model = BiLSTM(vocab_size, tagset_size).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=0).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=2, factor=0.5)

# Step 10: Evaluation function
def evaluate_model(model, data_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for tokens, labels in data_loader:
            tokens, labels = tokens.to(device), labels.to(device)
            outputs = model(tokens)
            predictions = torch.argmax(outputs, dim=2)
            mask = labels != 0
            correct += (predictions[mask] == labels[mask]).sum().item()
            total += mask.sum().item()
    return correct / total if total > 0 else 0

# Step 11: Training loop with Early Stopping
def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, epochs=50, patience=5):
    best_val_accuracy = 0
    patience_counter = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0

        for tokens, labels in train_loader:
            tokens, labels = tokens.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(tokens)
            outputs = outputs.permute(0, 2, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            predictions = torch.argmax(outputs, dim=1)
            mask = labels != 0
            correct += (predictions[mask] == labels[mask]).sum().item()
            total += mask.sum().item()

        train_accuracy = correct / total if total > 0 else 0
        val_accuracy = evaluate_model(model, valid_loader)
        scheduler.step(val_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Acc: {val_accuracy:.4f}")

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            print(f"EarlyStopping patience: {patience_counter}/{patience}")
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    if best_model_state:
        os.makedirs("./model", exist_ok=True)
        torch.save(best_model_state, "./model/bilstm_multi.pt")
        print("Best model saved to ./model/bilstm_multi.pt")

# Step 12: Train the model
train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, epochs=50, patience=7)

# Step 13: Final Test Accuracy
test_accuracy = evaluate_model(model, test_loader)
print(f"Test Accuracy: {test_accuracy:.4f}")


Using device: cuda
Epoch 1/50, Loss: 179.6452, Train Acc: 0.6736, Val Acc: 0.7126
Epoch 2/50, Loss: 124.5848, Train Acc: 0.7264, Val Acc: 0.7273
Epoch 3/50, Loss: 108.8451, Train Acc: 0.7435, Val Acc: 0.7398
Epoch 4/50, Loss: 96.6145, Train Acc: 0.7617, Val Acc: 0.7436
Epoch 5/50, Loss: 86.8606, Train Acc: 0.7779, Val Acc: 0.7506
Epoch 6/50, Loss: 78.1002, Train Acc: 0.7958, Val Acc: 0.7573
Epoch 7/50, Loss: 70.6709, Train Acc: 0.8109, Val Acc: 0.7610
Epoch 8/50, Loss: 62.5519, Train Acc: 0.8297, Val Acc: 0.7620
Epoch 9/50, Loss: 56.7361, Train Acc: 0.8437, Val Acc: 0.7625
Epoch 10/50, Loss: 50.4117, Train Acc: 0.8604, Val Acc: 0.7649
Epoch 11/50, Loss: 45.2039, Train Acc: 0.8724, Val Acc: 0.7661
Epoch 12/50, Loss: 40.8167, Train Acc: 0.8850, Val Acc: 0.7625
EarlyStopping patience: 1/7
Epoch 13/50, Loss: 36.8348, Train Acc: 0.8953, Val Acc: 0.7670
Epoch 14/50, Loss: 32.8762, Train Acc: 0.9067, Val Acc: 0.7675
Epoch 15/50, Loss: 30.2474, Train Acc: 0.9130, Val Acc: 0.7711
Epoch 16/50, L

In [36]:
def predict_paragraph(model, paragraph, max_len=100):
    model.eval()
    
    # Tokenize paragraph into words (assuming space-based for now)
    sentences = paragraph.strip().split(" ")  # Replace with better tokenizer if needed

    # Encode each token
    token_ids = [token2idx.get(token, 0) for token in sentences]
    
    # If longer than max_len, split into chunks of max_len
    chunks = [token_ids[i:i+max_len] for i in range(0, len(token_ids), max_len)]
    
    predicted_labels = []
    
    # Process in batches
    for chunk in chunks:
        actual_len = len(chunk)  # Store actual length for filtering
        chunk = chunk + [0] * (max_len - len(chunk))  # Pad to max_len

        input_tensor = torch.tensor([chunk], dtype=torch.long).to(device)

        with torch.no_grad():
            output = model(input_tensor)  # Expected shape: (1, seq_len, num_classes)
            predictions = torch.argmax(output, dim=-1).cpu().numpy()[0]

        # Reverse mapping index to labels, keeping only valid tokens
        idx2label = {v: k for k, v in label2idx.items()}
        predicted_labels.extend([idx2label[idx] for idx in predictions[:actual_len]])

    return predicted_labels


In [37]:
example_paragraph = """అగ్ని ప్రమాదం భవనాన్ని భస్మం చేసింది. చనిపోయిన వారి సంఖ్య పెరుగుతోంది."""
predicted_labels = predict_paragraph(model, example_paragraph)
print("Predicted Labels:", predicted_labels)


Predicted Labels: ['B-MAN_MADE_EVENT.FIRE', 'I-MAN_MADE_EVENT.FIRE', 'O', 'O', 'O', 'B-CASUALTIES-ARG', 'I-CASUALTIES-ARG', 'I-CASUALTIES-ARG', 'I-CASUALTIES-ARG']


In [38]:
example_paragraph = """ఢిల్లీ లో ప్రమాదం జరిగింది మృతుల సంఖ్య పెరిగింది"""
predicted_labels = predict_paragraph(model, example_paragraph)
print("Predicted Labels:", predicted_labels)


Predicted Labels: ['O', 'O', 'B-MAN_MADE_EVENT.ACCIDENTS', 'O', 'O', 'I-CASUALTIES-ARG', 'I-CASUALTIES-ARG']
