### **Import Required Libraries**


In [None]:
import os
import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from tqdm.notebook import tqdm
import logging
import pandas as pd
from pathlib import Path
import torchvision.transforms as transforms
from torchvision.models import resnet50, ResNet50_Weights
from sklearn.metrics import classification_report
from PIL import Image
from torch.amp import GradScaler, autocast
import optuna
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from sklearn.metrics import classification_report

# 1) Logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('training_classification.log')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.addHandler(logging.StreamHandler())

print("Torch version:", torch.__version__)

Torch version: 2.6.0+cu126


In [13]:
# Parse old file & build 'tested_params' set
import ast

def load_old_trials(file_path):
    """
    Reads lines in format:
      Trial 0 | Val Loss: 0.977... | Params: {...}
    Returns a list of (trial_number, val_loss, param_dict).
    Skips lines with val_loss=None or invalid format.
    """
    import ast

    old_trials = []  # We'll store (trial_num, val_loss, params_dict)

    with open(file_path, "r") as f:
        for line in f:
            line = line.strip()
            # Must start with "Trial "
            if not line.startswith("Trial "):
                continue

            # We expect exactly 3 parts => "Trial X", "Val Loss: Y", "Params: Z"
            parts = line.split(" | ", maxsplit=2)
            if len(parts) != 3:
                print(f"[SKIP] Unexpected format: {line}")
                continue

            # 1) parse "Trial X"
            trial_str = parts[0].strip()  # e.g. "Trial 0"
            # e.g. trial_str.split(" ")[1] => "0"
            try:
                trial_num = int(trial_str.split(" ")[1])
            except:
                print(f"[SKIP] Could not parse trial number: {trial_str}")
                continue

            # 2) parse "Val Loss: Y"
            val_part = parts[1].strip()
            if not val_part.startswith("Val Loss: "):
                print(f"[SKIP] val_part doesn't start with 'Val Loss: ': {val_part}")
                continue
            val_str = val_part.split("Val Loss: ", 1)[1]  # e.g. "0.977..." or "None"
            if val_str == "None":
                # Means pruned or failed => skip
                continue
            else:
                try:
                    val_loss = float(val_str)
                except:
                    print(f"[SKIP] could not parse val_loss as float: {val_str}")
                    continue

            # 3) parse "Params: {...}"
            params_part = parts[2].strip()
            if not params_part.startswith("Params: "):
                print(f"[SKIP] params_part doesn't start with 'Params: ': {params_part}")
                continue
            param_str = params_part.split("Params: ", 1)[1]
            try:
                param_dict = ast.literal_eval(param_str)
                if not isinstance(param_dict, dict):
                    print(f"[SKIP] Param part not a dict: {param_dict}")
                    continue
            except Exception as e:
                print(f"[SKIP] Could not parse param dict: {param_str} => {e}")
                continue

            # Append the 3-element tuple
            old_trials.append((trial_num, val_loss, param_dict))

    return old_trials


TESTED_PARAMS = set()
try:
    TESTED_PARAMS = load_old_trials("optuna_trials_backup.txt")
    print(f"[INFO] Found {len(TESTED_PARAMS)} tested combos from the old file.")
except FileNotFoundError:
    print("[WARN] No 'optuna_trials_backup.txt' found. Proceeding with empty tested set.")
except Exception as e:
    print(f"[WARN] Could not parse old backup file: {e}")

[WARN] No 'optuna_trials_backup.txt' found. Proceeding with empty tested set.


In [2]:
# paths 
DATASET_ROOT = Path("C:/Users/abhis/Downloads/Documents/Learner Engagement Project/data/DAiSEE/DataSet").resolve()
FRAMES_ROOT  = Path("C:/Users/abhis/Downloads/Documents/Learner Engagement Project/data/DAiSEE/ExtractedFrames").resolve()

#Helper function
def get_csv_clip_id(video_stem: str) -> str:
    """
    Maps old filenames to new ones if needed (like 110001 -> 202614).
    """
    base = video_stem.strip()
    if base.startswith("110001"):
        base = base.replace("110001", "202614", 1)
    return base

#numercial sort key for frames
import re
def numeric_sort_key(path):
    match = re.search(r'frame_(\d+)\.jpg', path.name)
    if match:
        return int(match.group(1))
    return 999999

### Define the DAiSEEDataset Class

This class loads video sequences and pairs them with engagement metrics.

- **Features**: Uses precomputed features for faster training.
- **Error Handling**: Skips missing video directories and logs errors.


In [3]:
class DAiSEEDataset(Dataset):
    def __init__(self, root, csv_path, transform=None, seq_len=15):
        self.root = Path(root)
        self.transform = transform
        self.seq_len = seq_len
        self.video_paths = []
        self.labels = []
        self.missing_videos = 0
        self.total_videos = 0

        df = pd.read_csv(csv_path, dtype=str)
        df.columns = df.columns.str.strip()
        split = Path(csv_path).stem.replace("Labels", "").strip()

        for idx, row in df.iterrows():
            self.total_videos += 1
            try:
                clip_id = row['ClipID'].strip()
                filename = clip_id.split('/')[-1] if '/' in clip_id else clip_id
                video_stem = filename.rsplit('.', 1)[0]
                mapped_id = get_csv_clip_id(video_stem)

                video_dir = self.root / split / mapped_id
                if not video_dir.exists():
                    self.missing_videos += 1
                    continue
                
                frames = list(video_dir.glob('frame_*.jpg'))
                if len(frames) < self.seq_len:
                    self.missing_videos += 1
                    continue

                boredom    = int(row['Boredom'])
                engagement = int(row['Engagement'])
                confusion  = int(row['Confusion'])
                frustrate  = int(row['Frustration'])

                self.video_paths.append(video_dir)
                self.labels.append([boredom, engagement, confusion, frustrate])
            except Exception as e:
                print(f"Error processing row {idx}: {e}")

        if not self.video_paths:
            raise ValueError("No valid videos found for classification dataset.")

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_dir = self.video_paths[idx]
        label_list = self.labels[idx]
        frames_list = sorted(video_dir.glob('frame_*.jpg'), key=numeric_sort_key)[:self.seq_len]

        frame_tensors = []
        for path in frames_list:
            img = Image.open(path).convert("RGB")
            if self.transform:
                img = self.transform(img)
            else:
                from torchvision import transforms as T
                img = T.ToTensor()(img)
            frame_tensors.append(img)

        sequence = torch.stack(frame_tensors)            # [seq_len, 3, H, W]
        label_tensor = torch.tensor(label_list, dtype=torch.long)  # [4]
        return sequence, label_tensor


### **Define the CNN-LSTM Model**

**CNN-LSTM Model**:

1.  ResNet50 extracts features from each frame.
2.  LSTM processes temporal dependencies in sequences.
3.  Outputs four-dimensional regression values (engagement metrics).


In [4]:
class CNN_LSTM_Classification(nn.Module):
    def __init__(self, freeze_until='layer3'):
        """
        We'll produce 16 logits total => 4 states × 4 classes each = 16.
        Then we do a custom cross entropy for each dimension.
        """
        super(CNN_LSTM_Classification, self).__init__()

        # Use official weights to avoid deprecation warnings
        self.resnet = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

        # Optionally freeze everything at first
        for param in self.resnet.parameters():
            param.requires_grad = False

        # Unfreeze from 'layer3' forward (this is partial unfreezing)
        unfreeze = False
        for name, child in self.resnet.named_children():
            if name == freeze_until:
                unfreeze = True
            if unfreeze:
                for param in child.parameters():
                    param.requires_grad = True

        # We'll skip the final fc layer from resnet, we do LSTM + custom FC
        # The output of resnet.avgpool is 2048-d
        self.lstm_hidden = 512
        self.lstm = nn.LSTM(2048, self.lstm_hidden, batch_first=True)
        # 16 logits: 4 states × 4 classes each
        self.fc = nn.Linear(self.lstm_hidden, 16)

    def forward(self, x):
        """
        x: [batch_size, seq_len, 3, H, W]
        Returns: [batch_size, 16 logits]
        We'll shape them to [batch_size, 4, 4] for multi cross-entropy.
        """
        bsz, seq_len, c, h, w = x.shape
        x = x.view(-1, c, h, w)  # flatten for ResNet forward

        # forward pass up to avgpool
        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)

        x = self.resnet.layer1(x)
        x = self.resnet.layer2(x)
        x = self.resnet.layer3(x)
        x = self.resnet.layer4(x)

        x = self.resnet.avgpool(x)  # shape: [batch_size*seq_len, 2048, 1, 1]
        x = x.view(x.size(0), -1)   # [batch_size*seq_len, 2048]

        # reshape for LSTM
        x = x.view(bsz, seq_len, -1)  # [bsz, seq_len, 2048]
        lstm_out, _ = self.lstm(x)
        # take the last time step
        last_step = lstm_out[:, -1, :]  # [bsz, hidden]
        logits = self.fc(last_step)     # [bsz, 16]
        return logits

#### ConvLSMT Implementation


In [4]:
class ConvLSTMCell(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size=3):
        super().__init__()
        padding = (kernel_size - 1) // 2
        self.input_channels = input_channels
        self.hidden_channels = hidden_channels
        self.kernel_size = kernel_size
        self.conv = nn.Conv2d(
            in_channels=input_channels + hidden_channels,
            out_channels=4 * hidden_channels,
            kernel_size=kernel_size,
            padding=padding
        )

    def forward(self, x, h, c):
        combined = torch.cat([x, h], dim=1)
        gates = self.conv(combined)
        i, f, o, g = torch.chunk(gates, 4, dim=1)
        i = torch.sigmoid(i)
        f = torch.sigmoid(f)
        o = torch.sigmoid(o)
        g = torch.tanh(g)
        c_next = f * c + i * g
        h_next = o * torch.tanh(c_next)
        return h_next, c_next

class ConvLSTM(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size=3):
        super().__init__()
        self.cell = ConvLSTMCell(input_channels, hidden_channels, kernel_size)

    def forward(self, x):
        """
        x => [bsz, seq_len, in_ch, H, W]
        """
        bsz, seq_len, in_ch, H, W = x.shape
        h = torch.zeros((bsz, self.cell.hidden_channels, H, W), device=x.device)
        c = torch.zeros((bsz, self.cell.hidden_channels, H, W), device=x.device)
        for t in range(seq_len):
            frame = x[:, t, :, :, :]
            h, c = self.cell(frame, h, c)
        return h, c


#### EfficientNetB0 + ConvLSTM + Final FC => 16 Logits


In [5]:
class EfficientNetB0ConvLSTM(nn.Module):
    def __init__(self, freeze_until_block=2, convlstm_hidden=128, out_dim=16):
        super().__init__()
        effnet = efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
        self.features = effnet.features  # [bsz, 1280, h', w']

        # freeze blocks 0..(freeze_until_block-1)
        for i, child in enumerate(self.features.children()):
            if i < freeze_until_block:
                for param in child.parameters():
                    param.requires_grad = False

        # reduce conv from 1280 => 256
        self.reduce_conv = nn.Conv2d(in_channels=1280, out_channels=256, kernel_size=1, stride=1, padding=0)

        # convlstm
        self.convlstm = ConvLSTM(input_channels=256, hidden_channels=convlstm_hidden, kernel_size=3)

        # final classification
        self.class_conv = nn.Conv2d(convlstm_hidden, 128, kernel_size=3, padding=1)
        self.fc = nn.Linear(128, out_dim)

    def forward(self, x):
        # x => [bsz, seq_len, 3, 224, 224]
        bsz, seq_len, c, H, W = x.shape
        x = x.view(bsz * seq_len, c, H, W)
        feats = self.features(x)         # => [bsz*seq_len, 1280, h', w']
        feats = self.reduce_conv(feats)  # => [bsz*seq_len, 256, h', w']

        _, c2, h2, w2 = feats.shape
        feats = feats.view(bsz, seq_len, c2, h2, w2)

        # convlstm => returns last hidden => [bsz, hidden, h2, w2]
        h, c_ = self.convlstm(feats)

        # classification
        out = self.class_conv(h)   # [bsz, 128, h2, w2]
        out = out.mean(dim=[2,3])  # global avg => [bsz, 128]
        logits = self.fc(out)      # => [bsz, 16]
        return logits


In [6]:
# Multi-CrossEntropy for 4 states
def multi_ce_loss(logits, labels):
    """
    logits: [batch_size, 16]
    labels: [batch_size, 4], each label in [0..3]
    We'll reshape logits => [batch_size, 4, 4], then do CrossEntropy for each dimension.
    Final loss is the average or sum of the 4 cross entropies.
    """
    batch_size = logits.size(0)
    # reshape => [bsz, 4 states, 4 classes]
    logits_reshaped = logits.view(batch_size, 4, 4)  # e.g. [bsz, 4, 4]

    # separate each dimension’s logits & label
    ce = nn.CrossEntropyLoss()
    # We'll compute CE for each dimension (boredom, engagement, etc.)
    total_loss = 0.0
    for d in range(4):
        # logits for dimension d: [bsz, 4]
        dim_logits = logits_reshaped[:, d, :]
        # labels for dimension d: [bsz]
        dim_labels = labels[:, d]
        loss_d = ce(dim_logits, dim_labels)
        total_loss += loss_d

    # average or sum
    return total_loss / 4.0  # average across the 4 states

### **Data Loaders**

**Configures train, validation, and test loaders with optimal settings.**


In [7]:
def get_classification_dataloaders(batch_size=16, seq_len=15):
    """
    We'll load up to 'seq_len' frames from each folder. 
    """
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    labels_path = DATASET_ROOT / "Labels"

    train_ds = DAiSEEDataset(FRAMES_ROOT, labels_path / "TrainLabels.csv", transform=train_transform, seq_len=seq_len)
    val_ds   = DAiSEEDataset(FRAMES_ROOT, labels_path / "ValidationLabels.csv", transform=val_transform, seq_len=seq_len)
    test_ds  = DAiSEEDataset(FRAMES_ROOT, labels_path / "TestLabels.csv", transform=val_transform, seq_len=seq_len)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=0, pin_memory=True)
    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

    return train_loader, val_loader, test_loader


### Training Loop with Optimizations

- **Mixed Precision**: Uses FP16 for faster training.
- **Checkpointing**: Saves the best model based on validation loss


In [74]:
#checkpointing
def save_checkpoint(model, optimizer, epoch, best_val_loss, directory="models_class"):
    from pathlib import Path
    import datetime
    timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    save_dir = Path(directory) / timestamp
    save_dir.mkdir(parents=True, exist_ok=True)

    checkpoint_path = save_dir / "ResNet50_CNNLSTM_classification.pth"
    torch.save({
        'epoch': epoch,
        'best_val_loss': best_val_loss,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, checkpoint_path)
    print(f"Checkpoint saved to {checkpoint_path}")

def load_latest_checkpoint(model, optimizer, model_dir="models_class", filename="ResNet50_CNNLSTM_classification.pth",
                           device=None):
    from pathlib import Path
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dir = Path(model_dir)
    checkpoints = list(model_dir.rglob(filename))
    if not checkpoints:
        print("No checkpoint found. Starting from scratch.")
        return 0, float('inf')

    latest_cpt = max(checkpoints, key=lambda p: p.stat().st_mtime)
    print(f"Loading checkpoint from {latest_cpt}")
    cpoint = torch.load(latest_cpt, map_location=device)

    model.load_state_dict(cpoint['model_state_dict'], strict=False)
    optimizer.load_state_dict(cpoint['optimizer_state_dict'])
    start_epoch = cpoint.get('epoch', 0) + 1
    best_val_loss = cpoint.get('best_val_loss', float('inf'))

    print(f"Resuming training from epoch {start_epoch}")
    return start_epoch, best_val_loss


# Training Loop (Classification)
def train_classification_model(model, train_loader, val_loader,
                               epochs=10, lr=1e-4, early_stopping_patience=2):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scaler = GradScaler()

    start_epoch = 0
    best_val_loss = float('inf')
    patience_counter = 0

    model_save_dir = "models_class"
    from torch.optim.lr_scheduler import ReduceLROnPlateau
    scheduler = ReduceLROnPlateau(optimizer, patience=1)

    for epoch in range(start_epoch, epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} Train"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast(enabled=True, device_type='cuda'):
                logits = model(inputs)          # [batch, 16]
                loss   = multi_ce_loss(logits, labels)  # custom multi-dim cross entropy

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            running_loss += loss.item() * inputs.size(0)

        train_loss = running_loss / len(train_loader.dataset)

        # validation
        model.eval()
        val_running_loss = 0.0
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1} Val"):
                inputs, labels = inputs.to(device), labels.to(device)
                with autocast(enabled=True, device_type='cuda'):
                    logits = model(inputs)
                    loss = multi_ce_loss(logits, labels)
                val_running_loss += loss.item() * inputs.size(0)

        val_loss = val_running_loss / len(val_loader.dataset)
        logger.info(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
        scheduler.step(val_loss)

        # checkpoint / early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            save_checkpoint(model, optimizer, epoch, best_val_loss, model_save_dir)
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                logger.info("Early stopping triggered.")
                print("Early stopping triggered; training stopped.")
                break

    print("Training finished.")

#### EfficientNetB0 + ConvLSTM Training Loop


In [8]:
def save_checkpoint(model, optimizer, epoch, best_val_loss, directory="models_effb0_convlstm_final"):
    from pathlib import Path
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    save_dir = Path(directory)/ts
    save_dir.mkdir(parents=True, exist_ok=True)
    ckpt_path = save_dir/"EffB0_ConvLSTM_class.pth"
    torch.save({
        "epoch": epoch,
        "best_val_loss": best_val_loss,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict()
    }, ckpt_path)
    print(f"[Checkpoint Saved] {ckpt_path}")

def load_latest_checkpoint(model, optimizer, model_dir="models_effb0_convlstm_final", filename="EffB0_ConvLSTM_class.pth",
                           device=None):
    from pathlib import Path
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dir=Path(model_dir)
    cpts = list(model_dir.rglob(filename))
    if not cpts:
        print("[Resume] No checkpoint found => starting from scratch.")
        return 0, float("inf")
    latest_ckpt = max(cpts, key=lambda p: p.stat().st_mtime)
    print(f"[Resume] Loading from {latest_ckpt}")
    checkpoint = torch.load(latest_ckpt, map_location=device)
    model.load_state_dict(checkpoint["model_state_dict"], strict=False)
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    start_epoch = checkpoint.get("epoch",0)+1
    best_val = checkpoint.get("best_val_loss", float("inf"))
    print(f"[Resume] Starting from epoch={start_epoch}")
    return start_epoch, best_val

# Evaluate
def evaluate_classification(model, loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    all_preds, all_labels=[], []
    with torch.no_grad():
        for (inputs, labels) in tqdm(loader, desc="[Evaluate Final]"):
            inputs, labels = inputs.to(device), labels.to(device)
            logits = model(inputs)
            bsz = logits.size(0)
            logits_reshaped = logits.view(bsz,4,4)
            preds = torch.argmax(logits_reshaped, dim=2)
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())
    all_preds = torch.cat(all_preds, dim=0).numpy()
    all_labels= torch.cat(all_labels, dim=0).numpy()
    dims = ["Boredom","Engagement","Confusion","Frustration"]
    from sklearn.metrics import classification_report
    for d in range(4):
        print(f"\nDimension: {dims[d]}")
        print(classification_report(all_labels[:,d], all_preds[:,d], labels=[0,1,2,3], digits=3))

**Model Evaluation: CNN_LSTM** After training, use the evaluation function to compute additional metrics:


In [11]:
def evaluate_classification(model, test_loader):
    """
    We'll compute predicted classes for each dimension,
    then compare them with ground truth to get classification_report
    or custom accuracy for each dimension.
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            logits = model(inputs)  # [batch, 16]
            # shape => [batch, 4, 4]
            logits_reshaped = logits.view(-1, 4, 4)  # 4 dims × 4 classes

            # For each dimension, pick argmax
            # shape => [batch, 4]
            dimension_preds = torch.argmax(logits_reshaped, dim=2)
            all_preds.append(dimension_preds.cpu())
            all_labels.append(labels.cpu())

    all_preds = torch.cat(all_preds, dim=0).numpy()   # shape [N, 4]
    all_labels= torch.cat(all_labels, dim=0).numpy()  # shape [N, 4]

    # We can do classification_report for each dimension
    dimension_names = ["Boredom", "Engagement", "Confusion", "Frustration"]

    for d in range(4):
        print(f"\nDimension: {dimension_names[d]}")
        # We'll do classification report
        print(classification_report(all_labels[:, d], all_preds[:, d],
              labels=[0,1,2,3],
              digits=3))

    return all_preds, all_labels


#### Main Execution for CNN_LSTM Model


In [9]:
# 1. Get Dataloaders
train_loader, val_loader, test_loader = get_classification_dataloaders(
    batch_size=16,  # or 16 if your GPU can handle it
    seq_len=15     # bigger sequence length for more temporal data
)

# 2. Instantiate Model
# e.g. freeze_until='layer3' or 'layer2' or 'layer1' depending on how much you want to fine-tune
model = CNN_LSTM_Classification(freeze_until='layer2')

# 3. Train
train_classification_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=15,
    lr=5e-5,              # adjust as needed
    early_stopping_patience=2
)

# 4. Evaluate Best Checkpoint
# Re-instantiate the same architecture
model_eval = CNN_LSTM_Classification(freeze_until='layer3')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_eval.to(device)

# load the best checkpoint
from pathlib import Path
model_dir = Path("models_class")
cpts = list(model_dir.rglob("ResNet50_CNNLSTM_classification.pth"))
if cpts:
    best_ckpt = max(cpts, key=lambda p: p.stat().st_mtime)
    print(f"Loading best checkpoint for evaluation: {best_ckpt}")
    cpoint = torch.load(best_ckpt, map_location=device)
    model_eval.load_state_dict(cpoint['model_state_dict'], strict=False)

# classification evaluation
all_preds, all_labels = evaluate_classification(model_eval, test_loader)
# you get a classification_report per dimension


Epoch 1 Train: 100%|██████████| 304/304 [16:37<00:00,  3.28s/it]
Epoch 1 Val: 100%|██████████| 90/90 [01:07<00:00,  1.33it/s]
Epoch 1/15 | Train Loss: 0.8531 | Val Loss: 0.9780


Checkpoint saved to models_class\20250213-140249\ResNet50_CNNLSTM_classification.pth


Epoch 2 Train: 100%|██████████| 304/304 [13:54<00:00,  2.74s/it]
Epoch 2 Val: 100%|██████████| 90/90 [00:49<00:00,  1.83it/s]
Epoch 2/15 | Train Loss: 0.8068 | Val Loss: 0.9567


Checkpoint saved to models_class\20250213-141733\ResNet50_CNNLSTM_classification.pth


Epoch 3 Train: 100%|██████████| 304/304 [13:16<00:00,  2.62s/it]
Epoch 3 Val: 100%|██████████| 90/90 [00:57<00:00,  1.56it/s]
Epoch 3/15 | Train Loss: 0.7904 | Val Loss: 0.9583
Epoch 4 Train: 100%|██████████| 304/304 [15:53<00:00,  3.14s/it]
Epoch 4 Val: 100%|██████████| 90/90 [00:57<00:00,  1.55it/s]
Epoch 4/15 | Train Loss: 0.7716 | Val Loss: 0.9696
Early stopping triggered.


Early stopping triggered; training stopped.
Training finished.
Loading best checkpoint for evaluation: models_class\20250213-141733\ResNet50_CNNLSTM_classification.pth


Evaluating: 100%|██████████| 103/103 [10:16<00:00,  5.98s/it]


Dimension: Boredom
              precision    recall  f1-score   support

           0      0.570     0.560     0.565       747
           1      0.395     0.495     0.440       519
           2      0.263     0.200     0.227       335
           3      0.000     0.000     0.000        37

    accuracy                          0.453      1638
   macro avg      0.307     0.314     0.308      1638
weighted avg      0.439     0.453     0.443      1638


Dimension: Engagement
              precision    recall  f1-score   support

           0      0.000     0.000     0.000         4
           1      0.000     0.000     0.000        81
           2      0.548     0.678     0.606       849
           3      0.500     0.416     0.454       704

    accuracy                          0.531      1638
   macro avg      0.262     0.274     0.265      1638
weighted avg      0.499     0.531     0.509      1638


Dimension: Confusion
              precision    recall  f1-score   support

          


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Optuna Hyperparameter Tuning


In [9]:
def add_previous_trials_to_study(study, old_trials):
    import optuna
    from optuna.distributions import CategoricalDistribution, IntDistribution, FloatDistribution
    from optuna.trial import TrialState

    # --- Step 1: Define a key mapping from backup keys to objective keys ---
    key_mapping = {
        'batch_sz': 'batch_size',
        'freeze_block': 'freeze_until_block',
        'hidden_ch': 'convlstm_hidden'
    }

    # --- Step 2: Define the distributions exactly as in your objective ---
    param_dists = {
        "seq_len": CategoricalDistribution([15]),
        "batch_size": CategoricalDistribution([8, 16]),
        "freeze_until_block": IntDistribution(0, 4),
        "convlstm_hidden": CategoricalDistribution([64, 128, 256]),
        "lr": FloatDistribution(1e-5, 5e-4, log=True)
    }

    # --- Step 3: Loop over each backup trial and convert keys/values ---
    for trial_number, val_loss, param_dict in old_trials:
        if val_loss is None:
            continue  # Skip trials with no valid objective value

        # Remap the old parameters into a new dict with the correct keys and types.
        new_params = {}
        for key, value in param_dict.items():
            new_key = key_mapping.get(key, key)  # Use the mapped key if available; else keep as is.
            # Convert to proper type: for integer keys and float for lr.
            if new_key in ["seq_len", "batch_size", "freeze_until_block", "convlstm_hidden"]:
                try:
                    value = int(value)
                except Exception as e:
                    print(f"[SKIP] Conversion error for key {new_key}: {value} -> {e}")
                    continue
            elif new_key == "lr":
                try:
                    value = float(value)
                except Exception as e:
                    print(f"[SKIP] Conversion error for key {new_key}: {value} -> {e}")
                    continue
            new_params[new_key] = value

        # --- Step 4: Validate that the converted parameters fall into the expected distributions ---
        skip_trial = False
        for p_name, dist in param_dists.items():
            if p_name in new_params:
                if not _check_if_value_in_distribution(new_params[p_name], dist):
                    print(f"[SKIP param mismatch] param={p_name}, value={new_params[p_name]} not in {dist}")
                    skip_trial = True
                    break
        if skip_trial:
            continue

        # --- Step 5: Create a new trial with the fixed distributions and force-set the parameters ---
        new_trial = study.ask(fixed_distributions=param_dists)
        for p_name, p_val in new_params.items():
            if p_name in param_dists:
                new_trial._suggest(p_name, p_val)
        study.tell(new_trial, val_loss, state=TrialState.COMPLETE)

    print(f"[INFO] Imported old trials with parameter conversion.")

# Add old trials to the study with parameter conversion
def _check_if_value_in_distribution(value, dist):
    from optuna.distributions import CategoricalDistribution, IntDistribution, FloatDistribution
    if isinstance(dist, CategoricalDistribution):
        return value in dist.choices
    elif isinstance(dist, IntDistribution):
        return isinstance(value, int) and dist.low <= value <= dist.high
    elif isinstance(dist, FloatDistribution):
        return (isinstance(value, (float, int))
                and dist.low <= float(value) <= dist.high)
    else:
        return False
    
    
def objective(trial):
    seq_len = trial.suggest_categorical('seq_len',[15])
    batch_sz= trial.suggest_categorical('batch_sz',[8,16])
    freeze_block = trial.suggest_int('freeze_block',0,4)
    hidden_ch = trial.suggest_categorical('hidden_ch',[64,128,256])
    lr = trial.suggest_float('lr',1e-5,5e-4, log=True)
    # Deprecation fix => switch to suggest_float with log=True
      

    # === Skip if param combo was tested successfully before ===
    current_params = {
        'seq_len': seq_len,
        'batch_sz': batch_sz,
        'freeze_block': freeze_block,
        'hidden_ch': hidden_ch,
        'lr': lr
    }
    param_fro = frozenset(current_params.items())
    if param_fro in TESTED_PARAMS:
        print(f"[SKIP] This param combo was tested with success before => pruning trial.")
        raise optuna.TrialPruned()

    # If we get here => not in the tested set or it was previously failed => proceed
    train_loader, val_loader, _ = get_classification_dataloaders(batch_size=batch_sz, seq_len=seq_len)

    model = EfficientNetB0ConvLSTM(
        freeze_until_block=freeze_block,
        convlstm_hidden=hidden_ch,
        out_dim=16
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer=optim.Adam(model.parameters(), lr=lr)
    scaler=GradScaler()

    best_val_loss= float("inf")
    patience=2
    patience_ctr=0
    max_epochs=5

    for ep in range(max_epochs):
        model.train()
        run_train=0.
        for (inputs,labels) in tqdm(train_loader, desc=f"[Trial Ep{ep+1}]"):
            inputs,labels= inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast(enabled=True, device_type='cuda'):
                logits= model(inputs)
                loss = multi_ce_loss(logits, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            run_train+= loss.item() * inputs.size(0)
        train_loss= run_train/ len(train_loader.dataset)

        # validate
        model.eval()
        val_run=0.
        with torch.no_grad():
            for (inputs, labels) in val_loader:
                inputs, labels= inputs.to(device), labels.to(device)
                logits= model(inputs)
                loss= multi_ce_loss(logits, labels)
                val_run+= loss.item() * inputs.size(0)
        val_loss= val_run/ len(val_loader.dataset)
        print(f"[Trial] Ep{ep+1}, train_loss={train_loss:.4f}, val_loss={val_loss:.4f}")

        if val_loss<best_val_loss:
            best_val_loss= val_loss
            patience_ctr=0
        else:
            patience_ctr+=1
            if patience_ctr>= patience:
                print("[Trial] Early stopping triggered.")
                break

        trial.report(val_loss, ep)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return best_val_loss

### Final Training EfficientNetB0 + ConvLSTM Model & Evaluate


In [10]:
def final_training_with_resume(model, train_loader, val_loader,
                               max_epochs=15, lr=1e-4, early_stop_patience=3,
                               model_save_dir="models_effb0_convlstm_final"):
    device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer= optim.Adam(model.parameters(), lr=lr)
    scaler=GradScaler()

    # attempt resume
    start_ep, best_val_loss= load_latest_checkpoint(
        model, optimizer, model_dir=model_save_dir, 
        filename="EffB0_ConvLSTM_class.pth", device=device
    )
    patience_ctr=0

    for ep in range(start_ep, max_epochs):
        model.train()
        run_train=0.
        for (inputs,labels) in tqdm(train_loader, desc=f"[Final Ep{ep+1}]"):
            inputs, labels= inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast(enabled=True, device_type='cuda'):
                logits= model(inputs)
                loss= multi_ce_loss(logits, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            run_train += loss.item()* inputs.size(0)
        train_loss= run_train/ len(train_loader.dataset)

        # validation
        model.eval()
        val_run=0.
        with torch.no_grad():
            for (inputs,labels) in val_loader:
                inputs, labels= inputs.to(device), labels.to(device)
                logits= model(inputs)
                loss= multi_ce_loss(logits, labels)
                val_run+= loss.item()* inputs.size(0)
        val_loss= val_run/ len(val_loader.dataset)
        logger.info(f"[Final Resume] Ep{ep+1}/{max_epochs}, train_loss={train_loss:.4f}, val_loss={val_loss:.4f}")

        if val_loss< best_val_loss:
            best_val_loss= val_loss
            patience_ctr=0
            save_checkpoint(model, optimizer, ep, best_val_loss, directory=model_save_dir)
        else:
            patience_ctr+=1
            if patience_ctr>= early_stop_patience:
                print("[Final Resume] Early stopping triggered.")
                break
    print("[Final Resume] Done or early-stopped.")



### Main Excuxecution for EfficientNetB0 + ConvLSTM Model


In [None]:
# create or load the SQLite study
study_name = "my_effb0_convlstm_study"
storage_url = "sqlite:///optuna_study.db"

study = optuna.create_study(
    direction='minimize',
    study_name=study_name,
    storage=storage_url,
    load_if_exists=True
)
print(f"[INFO] Using study '{study_name}' with storage '{storage_url}'.")

# (Optional) load old trials from file only if file has ahead of database
try:
    old_saved_trials = load_old_trials("optuna_trials_backup.txt")
    if old_saved_trials:
        print("\n[BACKUP TRIALS] Loaded trials from file:")
        for trial_number, val_loss, params in old_saved_trials:
            print(f"Trial {trial_number} | Val Loss: {val_loss} | Params: {params}")
        
        # Always import backup trials when starting from a fresh DB.
        add_previous_trials_to_study(study, old_saved_trials)
        print(f"[INFO] Imported {len(old_saved_trials)} old trials into the study.")
except FileNotFoundError:
    print("[WARN] No 'optuna_trials_backup.txt' found. Skipping old trials import.")
except Exception as e:
    print(f"[WARN] Could not import old trials: {e}")

    print(f"[WARN] Could not import old trials: {e}")
    
# Print out all completed trial results from the study for easy reference.
print("\n[OPTUNA] Completed Trials:")
for trial in study.trials:
    print(f"Trial {trial.number} | Val Loss: {trial.value} | Params: {trial.params}")

# Now do the new search
study.optimize(objective, n_trials=10)

# Print out all completed trial results from the study for easy reference.
print("\n[OPTUNA] Completed Trials:")
for trial in study.trials:
    print(f"Trial {trial.number} | Val Loss: {trial.value} | Params: {trial.params}")

best_trial = study.best_trial
print("\n[OPTUNA] Best Trial Hyperparams:")
for k, v in best_trial.params.items():
    print(f"  {k}: {v}")
print(" best_val_loss:", best_trial.value)

# 2) Grab best hyperparams
best_seq_len  = best_trial.params['seq_len']
best_batch_sz = best_trial.params['batch_sz']
best_frz_block = best_trial.params['freeze_block']
best_hidden   = best_trial.params['hidden_ch']
best_lr       = best_trial.params['lr']

# 3) Build Data w/ best hyperparams
train_loader, val_loader, test_loader = get_classification_dataloaders(
    batch_size=best_batch_sz, seq_len=best_seq_len
)

# 4) Build Model w/ best hyperparams
final_model = EfficientNetB0ConvLSTM(
    freeze_until_block=best_frz_block,
    convlstm_hidden=best_hidden,
    out_dim=16
)

# 5) Final Training w/ Resume
final_training_with_resume(
    model=final_model,
    train_loader=train_loader,
    val_loader=val_loader,
    max_epochs=15,
    lr=best_lr,
    early_stop_patience=3,
    model_save_dir="models_effb0_convlstm_final"
)

# 6) Evaluate on test set
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
final_model.to(device)

# load best final checkpoint for eval
from pathlib import Path
final_ckpt_dir = Path("models_effb0_convlstm_final")
cpts = list(final_ckpt_dir.rglob("EffB0_ConvLSTM_class.pth"))
if cpts:
    best_ckpt = max(cpts, key=lambda p: p.stat().st_mtime)
    print(f"[Final Resume] Loading best checkpoint for evaluation: {best_ckpt}")
    cpoint = torch.load(best_ckpt, map_location=device)
    final_model.load_state_dict(cpoint['model_state_dict'], strict=False)

# Evaluate
final_model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for (inputs, labels) in tqdm(test_loader, desc="[Evaluate Final]"):
        inputs, labels = inputs.to(device), labels.to(device)
        logits = final_model(inputs)
        bsz = logits.size(0)
        logits_reshaped = logits.view(bsz, 4, 4)
        preds = torch.argmax(logits_reshaped, dim=2)
        all_preds.append(preds.cpu())
        all_labels.append(labels.cpu())

all_preds = torch.cat(all_preds, dim=0).numpy()
all_labels = torch.cat(all_labels, dim=0).numpy()
dims = ["Boredom", "Engagement", "Confusion", "Frustration"]
from sklearn.metrics import classification_report
for d in range(4):
    print(f"\nDimension: {dims[d]}")
    print(classification_report(all_labels[:, d], all_preds[:, d], labels=[0, 1, 2, 3], digits=3))

print("[Done] EfficientNetB0 + ConvLSTM + Optuna + Resume complete!")

[I 2025-02-14 22:38:21,392] Using an existing study with name 'my_effb0_convlstm_study' instead of creating a new one.


[INFO] Using study 'my_effb0_convlstm_study' with storage 'sqlite:///optuna_study.db'.
[WARN] No 'optuna_trials_backup.txt' found. Skipping old trials import.

[OPTUNA] Completed Trials:
Trial 0 | Val Loss: 0.9770944646974807 | Params: {'seq_len': 15, 'batch_size': 8, 'freeze_until_block': 1, 'convlstm_hidden': 64, 'lr': 3.428815705452256e-05}
Trial 1 | Val Loss: 0.9723774410361923 | Params: {'seq_len': 15, 'batch_size': 8, 'freeze_until_block': 3, 'convlstm_hidden': 64, 'lr': 2.9768857623565067e-05}
Trial 2 | Val Loss: 0.977101352516799 | Params: {'seq_len': 15, 'batch_size': 16, 'freeze_until_block': 4, 'convlstm_hidden': 64, 'lr': 2.3579914063343766e-05}
Trial 3 | Val Loss: None | Params: {'seq_len': 15, 'batch_sz': 16, 'freeze_block': 4, 'hidden_ch': 64, 'lr': 0.0002524669209895806}


[Trial Ep1]: 100%|██████████| 607/607 [04:10<00:00,  2.42it/s]


[Trial] Ep1, train_loss=0.8707, val_loss=0.9924


[Trial Ep2]: 100%|██████████| 607/607 [04:19<00:00,  2.34it/s]


[Trial] Ep2, train_loss=0.8322, val_loss=0.9711


[Trial Ep3]: 100%|██████████| 607/607 [04:32<00:00,  2.23it/s]


[Trial] Ep3, train_loss=0.8201, val_loss=1.0133


[Trial Ep4]: 100%|██████████| 607/607 [10:56<00:00,  1.08s/it]
[I 2025-02-14 23:08:01,320] Trial 4 finished with value: 0.971092811324865 and parameters: {'seq_len': 15, 'batch_sz': 8, 'freeze_block': 1, 'hidden_ch': 128, 'lr': 0.00022544027989915264}. Best is trial 4 with value: 0.971092811324865.


[Trial] Ep4, train_loss=0.8157, val_loss=1.0073
[Trial] Early stopping triggered.


[Trial Ep1]: 100%|██████████| 607/607 [17:28<00:00,  1.73s/it]


[Trial] Ep1, train_loss=0.8917, val_loss=0.9993


[Trial Ep2]: 100%|██████████| 607/607 [05:25<00:00,  1.86it/s]


[Trial] Ep2, train_loss=0.8275, val_loss=0.9872


[Trial Ep3]: 100%|██████████| 607/607 [03:54<00:00,  2.59it/s]


[Trial] Ep3, train_loss=0.8092, val_loss=0.9672


[Trial Ep4]: 100%|██████████| 607/607 [03:56<00:00,  2.57it/s]


[Trial] Ep4, train_loss=0.8000, val_loss=1.0165


[Trial Ep5]: 100%|██████████| 607/607 [03:58<00:00,  2.54it/s]
[I 2025-02-14 23:48:20,368] Trial 5 finished with value: 0.9671566304499825 and parameters: {'seq_len': 15, 'batch_sz': 8, 'freeze_block': 0, 'hidden_ch': 128, 'lr': 3.850605492222891e-05}. Best is trial 5 with value: 0.9671566304499825.


[Trial] Ep5, train_loss=0.7903, val_loss=0.9847
[Trial] Early stopping triggered.


[Trial Ep1]: 100%|██████████| 607/607 [03:42<00:00,  2.73it/s]
[I 2025-02-14 23:52:47,057] Trial 6 pruned. 


[Trial] Ep1, train_loss=0.9631, val_loss=1.0131


[Trial Ep1]: 100%|██████████| 607/607 [02:54<00:00,  3.47it/s]


[Trial] Ep1, train_loss=0.8713, val_loss=0.9722


[Trial Ep2]: 100%|██████████| 607/607 [02:53<00:00,  3.49it/s]


[Trial] Ep2, train_loss=0.8291, val_loss=0.9871


[Trial Ep3]: 100%|██████████| 607/607 [02:55<00:00,  3.46it/s]
[I 2025-02-15 00:03:39,026] Trial 7 finished with value: 0.9721880690712792 and parameters: {'seq_len': 15, 'batch_sz': 8, 'freeze_block': 4, 'hidden_ch': 128, 'lr': 0.00019389369423148}. Best is trial 5 with value: 0.9671566304499825.


[Trial] Ep3, train_loss=0.8177, val_loss=0.9904
[Trial] Early stopping triggered.


[Trial Ep1]: 100%|██████████| 607/607 [03:44<00:00,  2.70it/s]
[I 2025-02-15 00:08:08,291] Trial 8 pruned. 


[Trial] Ep1, train_loss=0.8690, val_loss=1.0004


[Trial Ep1]: 100%|██████████| 607/607 [03:53<00:00,  2.60it/s]


[Trial] Ep1, train_loss=0.8702, val_loss=0.9813


[Trial Ep2]: 100%|██████████| 607/607 [03:52<00:00,  2.61it/s]


[Trial] Ep2, train_loss=0.8279, val_loss=0.9714


[Trial Ep3]: 100%|██████████| 607/607 [03:52<00:00,  2.61it/s]


[Trial] Ep3, train_loss=0.8119, val_loss=0.9763


[Trial Ep4]: 100%|██████████| 607/607 [03:51<00:00,  2.62it/s]
[I 2025-02-15 00:26:26,987] Trial 9 finished with value: 0.9714210024240386 and parameters: {'seq_len': 15, 'batch_sz': 8, 'freeze_block': 1, 'hidden_ch': 128, 'lr': 0.00010680591874223483}. Best is trial 5 with value: 0.9671566304499825.


[Trial] Ep4, train_loss=0.8003, val_loss=1.0081
[Trial] Early stopping triggered.


[Trial Ep1]: 100%|██████████| 304/304 [40:30<00:00,  8.00s/it]
[I 2025-02-15 01:07:53,316] Trial 10 pruned. 


[Trial] Ep1, train_loss=0.9533, val_loss=1.0149


[Trial Ep1]: 100%|██████████| 304/304 [03:07<00:00,  1.62it/s]
[I 2025-02-15 01:11:44,896] Trial 11 pruned. 


[Trial] Ep1, train_loss=0.9015, val_loss=0.9996


[Trial Ep1]: 100%|██████████| 607/607 [03:57<00:00,  2.56it/s]
[I 2025-02-15 01:16:26,279] Trial 12 pruned. 


[Trial] Ep1, train_loss=0.8782, val_loss=1.1256


[Trial Ep1]: 100%|██████████| 607/607 [03:52<00:00,  2.62it/s]
[I 2025-02-15 01:21:02,529] Trial 13 pruned. 


[Trial] Ep1, train_loss=0.8792, val_loss=1.0049

[OPTUNA] Completed Trials:
Trial 0 | Val Loss: 0.9770944646974807 | Params: {'seq_len': 15, 'batch_size': 8, 'freeze_until_block': 1, 'convlstm_hidden': 64, 'lr': 3.428815705452256e-05}
Trial 1 | Val Loss: 0.9723774410361923 | Params: {'seq_len': 15, 'batch_size': 8, 'freeze_until_block': 3, 'convlstm_hidden': 64, 'lr': 2.9768857623565067e-05}
Trial 2 | Val Loss: 0.977101352516799 | Params: {'seq_len': 15, 'batch_size': 16, 'freeze_until_block': 4, 'convlstm_hidden': 64, 'lr': 2.3579914063343766e-05}
Trial 3 | Val Loss: None | Params: {'seq_len': 15, 'batch_sz': 16, 'freeze_block': 4, 'hidden_ch': 64, 'lr': 0.0002524669209895806}
Trial 4 | Val Loss: 0.971092811324865 | Params: {'seq_len': 15, 'batch_sz': 8, 'freeze_block': 1, 'hidden_ch': 128, 'lr': 0.00022544027989915264}
Trial 5 | Val Loss: 0.9671566304499825 | Params: {'seq_len': 15, 'batch_sz': 8, 'freeze_block': 0, 'hidden_ch': 128, 'lr': 3.850605492222891e-05}
Trial 6 | Val Loss: 1

KeyError: 'convlstm_hidden'