In [9]:
! pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.5.0


Dynamic FFnn

In [10]:
import argparse
import json
import math
import os
import sys
import random
from pathlib import Path
from typing import List, Tuple, Dict

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import optuna
from optuna.pruners import MedianPruner

In [68]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

NUM_CLASSES = 5
UNK = "<UNK>"

DEFAULT_TRAIN = "/content/training.json"
DEFAULT_VALID = "/content/validation.json"
DEFAULT_TEST  = "/content/test.json"
DEFAULT_OUT = "/content/drive/MyDrive/vvkr_fnn"




**Running epochs in GPU**

In [55]:
def get_device():
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [56]:
def read_json_any(path: str):
    p = Path(path)
    text = p.read_text(encoding="utf-8").strip()
    if not text:
        return []
    if text[0] == "{":
        return [json.loads(line) for line in text.splitlines() if line.strip()]
    return json.loads(text)

In [57]:
def tokenize(s: str) -> List[str]:
    return s.split()

In [58]:
def make_pairs(raw):
    pairs = []
    for e in raw:
        toks = tokenize(e["text"])
        y = int(e["stars"]) - 1
        pairs.append((toks, y))
    return pairs

In [59]:
def build_vocab(train_pairs: List[Tuple[List[str], int]], min_freq: int = 1) -> Dict[str, int]:
    from collections import Counter
    c = Counter()
    for toks, _ in train_pairs:
        c.update(toks)
    itos = [w for w, f in c.items() if f >= min_freq]
    stoi = {w: i for i, w in enumerate(itos)}
    stoi.setdefault(UNK, len(stoi))
    return stoi

In [60]:
def bow_vectorize(toks: List[str], vocab: Dict[str, int]) -> np.ndarray:
    vec = np.zeros(len(vocab), dtype=np.float32)
    unk_idx = vocab[UNK]
    for t in toks:
        idx = vocab.get(t, unk_idx)
        vec[idx] += 1.0
    return vec

using Pytorch dataset n dataloader

In [61]:
class BOWDataset(Dataset):
    def __init__(self, pairs: List[Tuple[List[str], int]], vocab: Dict[str, int]):
        self.pairs = pairs
        self.vocab = vocab

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx: int):
        toks, y = self.pairs[idx]
        x = bow_vectorize(toks, self.vocab)
        return torch.from_numpy(x), torch.tensor(y, dtype=torch.long)

**using sequential containers**

In [62]:
class FFNN(nn.Module):
    def __init__(self, input_dim: int, layer_dims: List[int], dropout: float):
        super().__init__()
        layers = []
        in_dim = input_dim
        for h in layer_dims:
            layers += [nn.Linear(in_dim, h), nn.ReLU(), nn.Dropout(dropout)]
            in_dim = h
        layers.append(nn.Linear(in_dim, NUM_CLASSES))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

In [63]:
def run_epoch(model, loader, opt, device):
    model.train()
    total_loss, total, correct = 0.0, 0, 0
    crit = nn.CrossEntropyLoss()
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        logits = model(xb)
        loss = crit(logits, yb)
        loss.backward()
        opt.step()
        total_loss += loss.item() * yb.size(0)
        correct += (logits.argmax(-1) == yb).sum().item()
        total += yb.size(0)
    return total_loss / total, correct / total

In [64]:
@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    total_loss, total, correct = 0.0, 0, 0
    crit = nn.CrossEntropyLoss()
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss = crit(logits, yb)
        total_loss += loss.item() * yb.size(0)
        correct += (logits.argmax(-1) == yb).sum().item()
        total += yb.size(0)
    return total_loss / total, correct / total

In [65]:
@torch.no_grad()
def predict(model, loader, device, out_path):
    model.eval()
    preds = []
    for xb, _ in loader:
        xb = xb.to(device)
        preds.extend((model(xb).argmax(-1).cpu() + 1).tolist())
    Path(out_path).write_text("\n".join(map(str, preds)))
    print(f"[done] Predictions written to {out_path}")


#Optuna is exploring hyperparameters like:

learning rate,
hidden layer sizes
dropout
optimizer
batch size
patience

In [66]:
def objective(trial, train_pairs, valid_pairs, vocab, args, device):

    depth = trial.suggest_int("depth", 1, 4)
    hidden_choices = [64, 128, 256, 512, 768]
    layer_dims = [trial.suggest_categorical(f"h{i+1}", hidden_choices) for i in range(depth)]
    dropout = trial.suggest_float("dropout", 0.1, 0.6)


    lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    opt_name = trial.suggest_categorical("optimizer", ["adamw", "adam", "sgd", "rmsprop"])
    batch_size = trial.suggest_categorical("batch", [32, 64, 128, 256])

    patience = trial.suggest_int("patience", 2, 5)

    use_pin = (device.type == "cuda")
    train_ds = BOWDataset(train_pairs, vocab)
    valid_ds = BOWDataset(valid_pairs, vocab)
    train_ld = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  pin_memory=use_pin)
    valid_ld = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, pin_memory=use_pin)


    model = FFNN(input_dim=len(vocab), layer_dims=layer_dims, dropout=dropout).to(device)

    if opt_name == "adamw":
        optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif opt_name == "adam":
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif opt_name == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    else:  # rmsprop
        optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)

    best_val = 0.0
    wait = 0
    for ep in range(args.epochs):
        run_epoch(model, train_ld, optimizer, device)
        _, val_acc = evaluate(model, valid_ld, device)

        # report to Optuna includs pruning
        trial.report(val_acc, ep)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val:
            best_val = val_acc
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

    return best_val


In [69]:
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--train", default=DEFAULT_TRAIN)
    ap.add_argument("--valid", default=DEFAULT_VALID)
    ap.add_argument("--test",  default=DEFAULT_TEST)
    ap.add_argument("--epochs", type=int, default=5)
    ap.add_argument("--n_trials", type=int, default=20)
    ap.add_argument("--out_dir", default=DEFAULT_OUT)

    argv = [] if ("ipykernel" in sys.modules or "COLAB_RELEASE_TAG" in os.environ) else None
    args = ap.parse_args(argv)

    device = get_device()
    print(f"[device] {device} (cuda={torch.cuda.is_available()})")

    Path(args.out_dir).mkdir(parents=True, exist_ok=True)
    train_raw = read_json_any(args.train)
    valid_raw = read_json_any(args.valid)
    test_raw  = read_json_any(args.test)
    train_pairs, valid_pairs, test_pairs = make_pairs(train_raw), make_pairs(valid_raw), make_pairs(test_raw)

    vocab = build_vocab(train_pairs)
    print(f"[vocab] size: {len(vocab)} (includes UNK)")

    study = optuna.create_study(direction="maximize", pruner=MedianPruner(n_warmup_steps=2))
    study.optimize(lambda tr: objective(tr, train_pairs, valid_pairs, vocab, args, device), n_trials=args.n_trials)

    best = study.best_params
    print("\n[optuna] best val acc:", study.best_value)
    print("[optuna] best params:", best)

    # Save Optuna results
    out_dir = Path(args.out_dir)
    (out_dir / "optuna_best_params.json").write_text(json.dumps(best, indent=2))
    study.trials_dataframe().to_csv(out_dir / "optuna_trials.csv", index=False)

    # Final model with best config
    depth = best["depth"]
    layer_dims = [best[f"h{i+1}"] for i in range(depth)]
    dropout = best["dropout"]
    lr = best["lr"]
    batch_size = best["batch"]

    print(f"[final config] depth={depth}, layers={layer_dims}, dropout={dropout}, lr={lr}, batch={batch_size}")

    all_train = train_pairs + valid_pairs
    train_ds = BOWDataset(all_train, vocab)
    test_ds  = BOWDataset(test_pairs, vocab)
    train_ld = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    test_ld  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

    model = FFNN(len(vocab), layer_dims, dropout).to(device)
    opt = optim.AdamW(model.parameters(), lr=lr)

    best_state, best_acc = None, -math.inf
    for ep in range(args.epochs):
        tr_loss, tr_acc = run_epoch(model, train_ld, opt, device)
        print(f"[final][epoch {ep+1}/{args.epochs}] loss={tr_loss:.4f} acc={tr_acc:.4f}")
        if tr_acc > best_acc:
            best_acc = tr_acc
            best_state = {k: v.detach().cpu() for k, v in model.state_dict().items()}

    if best_state: model.load_state_dict(best_state)

    model_path = out_dir / "best_ffnn_bow.pt"
    torch.save(model.state_dict(), model_path)
    preds_path = out_dir / "test.out"
    predict(model, test_ld, device, preds_path)
    print(f"[done] Model saved to {model_path}")
    print(f"[done] Predictions saved to {preds_path}")
    print(f"[done] Optuna logs: {out_dir/'optuna_best_params.json'} and {out_dir/'optuna_trials.csv'}")





here the optuna stops the entire trail if that underperform compared to other trials

if the model hit the plateaued then the patience will skip the epochs

here the patience 2,3,5 ..etc are like skipping epochs after 2,3..etc

In [70]:
if __name__ == "__main__":
  main()

[device] cuda (cuda=True)


[I 2025-10-26 18:00:10,377] A new study created in memory with name: no-name-c720b0da-1248-48fe-a2f9-719c00993f92


[vocab] size: 65667 (includes UNK)


[I 2025-10-26 18:00:40,243] Trial 0 finished with value: 0.6325 and parameters: {'depth': 4, 'h1': 768, 'h2': 512, 'h3': 128, 'h4': 768, 'dropout': 0.4506802020741487, 'lr': 0.004002204900748412, 'weight_decay': 8.074185411738528e-06, 'optimizer': 'adam', 'batch': 32, 'patience': 3}. Best is trial 0 with value: 0.6325.
[I 2025-10-26 18:00:54,903] Trial 1 finished with value: 0.61375 and parameters: {'depth': 4, 'h1': 128, 'h2': 512, 'h3': 512, 'h4': 512, 'dropout': 0.14983006931148932, 'lr': 0.00023896830822005952, 'weight_decay': 0.0003420367218940379, 'optimizer': 'rmsprop', 'batch': 64, 'patience': 3}. Best is trial 0 with value: 0.6325.
[I 2025-10-26 18:01:09,239] Trial 2 finished with value: 0.535 and parameters: {'depth': 2, 'h1': 128, 'h2': 64, 'dropout': 0.4536060881879488, 'lr': 0.0028366557972284503, 'weight_decay': 1.8910492875298816e-05, 'optimizer': 'sgd', 'batch': 64, 'patience': 4}. Best is trial 0 with value: 0.6325.
[I 2025-10-26 18:01:20,095] Trial 3 finished with val


[optuna] best val acc: 0.6475
[optuna] best params: {'depth': 1, 'h1': 512, 'dropout': 0.1171865130386747, 'lr': 0.0023006004363684803, 'weight_decay': 0.0004835154688741754, 'optimizer': 'adamw', 'batch': 256, 'patience': 5}
[final config] depth=1, layers=[512], dropout=0.1171865130386747, lr=0.0023006004363684803, batch=256
[final][epoch 1/5] loss=0.9462 acc=0.5749
[final][epoch 2/5] loss=0.3949 acc=0.8744
[final][epoch 3/5] loss=0.1473 acc=0.9652
[final][epoch 4/5] loss=0.0466 acc=0.9933
[final][epoch 5/5] loss=0.0164 acc=0.9994
[done] Predictions written to /content/drive/MyDrive/vvkr_fnn/test.out
[done] Model saved to /content/drive/MyDrive/vvkr_fnn/best_ffnn_bow.pt
[done] Predictions saved to /content/drive/MyDrive/vvkr_fnn/test.out
[done] Optuna logs: /content/drive/MyDrive/vvkr_fnn/optuna_best_params.json and /content/drive/MyDrive/vvkr_fnn/optuna_trials.csv


In [72]:
import pandas as pd
files=pd.read_csv("/content/drive/MyDrive/vvkr_fnn/optuna_trials.csv")
print(files)

    number    value              datetime_start           datetime_complete  \
0        0  0.63250  2025-10-26 18:00:10.377981  2025-10-26 18:00:40.243777   
1        1  0.61375  2025-10-26 18:00:40.244856  2025-10-26 18:00:54.903338   
2        2  0.53500  2025-10-26 18:00:54.904194  2025-10-26 18:01:09.239576   
3        3  0.62500  2025-10-26 18:01:09.240470  2025-10-26 18:01:20.095308   
4        4  0.64750  2025-10-26 18:01:20.096629  2025-10-26 18:01:41.705849   
5        5  0.62750  2025-10-26 18:01:41.706795  2025-10-26 18:02:03.525387   
6        6  0.40000  2025-10-26 18:02:03.526242  2025-10-26 18:02:13.760085   
7        7  0.60625  2025-10-26 18:02:13.760881  2025-10-26 18:02:21.784248   
8        8  0.51875  2025-10-26 18:02:21.784991  2025-10-26 18:02:35.132997   
9        9  0.64125  2025-10-26 18:02:35.133771  2025-10-26 18:02:58.951913   
10      10  0.63125  2025-10-26 18:02:58.952739  2025-10-26 18:03:18.531834   
11      11  0.62125  2025-10-26 18:03:18.532737  202

In [73]:
file_path = "/content/drive/MyDrive/vvkr_fnn/test.out"

with open(file_path, "r") as f:
    lines = f.read().strip().splitlines()

print(f" Loaded {len(lines)} predictions.")
print("First 10 predictions:", lines[:50])


 Loaded 800 predictions.
First 10 predictions: ['2', '3', '3', '3', '3', '2', '3', '3', '2', '1', '2', '1', '1', '2', '2', '3', '2', '3', '3', '3', '1', '2', '3', '2', '1', '2', '2', '2', '3', '3', '3', '2', '1', '2', '3', '3', '3', '2', '2', '3', '3', '3', '2', '3', '2', '2', '3', '2', '2', '3']
