In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence
import numpy as np
import json, os, cv2, time
from tqdm import tqdm

# ===================================================
# CONFIG
# ===================================================
DATA_PATH = "D:/Semester/Semester5/DPL302/Project/sentence_dataset/"
METADATA_PATH = 'D:/Semester/Semester5/DPL302/Project/sentence_dataset/metadata.json'
BATCH_SIZE = 8
EPOCHS = 10
LR = 1e-4
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEQ_LEN = 60  # chu·∫©n h√≥a t·∫°m th·ªùi
TRAIN_RATIO, VAL_RATIO = 0.8, 0.1



In [11]:
# ===================================================
# DATASET
# ===================================================
class GestureCTCDataset(Dataset):
    def __init__(self, data_dir, metadata_path):
        self.data_dir = data_dir
        with open(metadata_path, 'r', encoding='utf-8') as f:
            self.metadata = json.load(f)
        all_labels = sorted({lbl for item in self.metadata for lbl in item["labels"]})
        self.label_to_idx = {lbl: i + 1 for i, lbl in enumerate(all_labels)}  # 0 l√† blank
        self.idx_to_label = {i + 1: lbl for i, lbl in enumerate(all_labels)}

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        item = self.metadata[idx]
        keypoints = np.load(os.path.join(self.data_dir, item["file"]))  # (T,2,21,2)
        keypoints = keypoints.reshape(keypoints.shape[0], -1)  # (T,84)
        label_seq = [self.label_to_idx[lbl] for lbl in item["labels"]]
        return torch.tensor(keypoints, dtype=torch.float32), torch.tensor(label_seq, dtype=torch.long)

def collate_fn(batch):
    inputs, labels = zip(*batch)
    input_lengths = torch.tensor([len(x) for x in inputs])
    label_lengths = torch.tensor([len(l) for l in labels])
    inputs_padded = pad_sequence(inputs, batch_first=True)
    labels_concat = torch.cat(labels)
    return inputs_padded, labels_concat, input_lengths, label_lengths



In [12]:
# ===================================================
# MODEL: Transformer Encoder + CTC
# ===================================================
class TransformerCTC(nn.Module):
    def __init__(self, input_dim, model_dim, num_classes, num_layers=4, num_heads=8, ff_dim=512):
        super().__init__()
        self.input_fc = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=model_dim,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=0.2,
            batch_first=True
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_fc = nn.Linear(model_dim, num_classes)

    def forward(self, x):
        x = self.input_fc(x)
        x = self.encoder(x)
        return self.output_fc(x)



In [13]:
# ===================================================
# LOAD DATA
# ===================================================
dataset = GestureCTCDataset(DATA_PATH, METADATA_PATH)
train_size = int(TRAIN_RATIO * len(dataset))
val_size = int(VAL_RATIO * len(dataset))
test_size = len(dataset) - train_size - val_size
train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False, collate_fn=collate_fn)

num_classes = len(dataset.label_to_idx) + 1
input_dim = 84



In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
import numpy as np
import mediapipe as mp
from collections import deque, Counter
import json

# ===================================================
# MODEL (GI·ªêNG TRAINING)
# ===================================================
class TransformerCTC(nn.Module):
    def __init__(self, input_dim, model_dim, num_classes, num_layers=2, num_heads=4, ff_dim=256):
        super().__init__()
        self.input_fc = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=model_dim,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=0.2,
            batch_first=True
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_fc = nn.Linear(model_dim, num_classes)

    def forward(self, x):
        x = self.input_fc(x)
        x = self.encoder(x)
        return self.output_fc(x)

# ===================================================
# CONFIG
# ===================================================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INPUT_DIM = 84
MODEL_DIM = 128
MAX_SEQ_LEN = 60
DEBUG_MODE = True

MODEL_PATH = "D:/Semester/Semester5/DPL302/Project/src/transformer_ctc_gesture_best.pth"
METADATA_PATH = "D:\Semester\Semester5\DPL302\Project\sentence_dataset\metadata.json"

# ===================================================
# LOAD LABEL MAPPING
# ===================================================
def load_label_mapping(metadata_path):
    try:
        with open(metadata_path, 'r', encoding='utf-8') as f:
            metadata = json.load(f)
        all_labels = sorted({lbl for item in metadata for lbl in item["labels"]})
        label_to_idx = {lbl: i + 1 for i, lbl in enumerate(all_labels)}
        idx_to_label = {i + 1: lbl for i, lbl in enumerate(all_labels)}
        idx_to_label[0] = "BLANK"
        print(f"‚úÖ Loaded {len(all_labels)} gesture classes")
        return idx_to_label, len(all_labels) + 1
    except FileNotFoundError:
        print("‚ö†Ô∏è Metadata not found, using default labels")
        idx_to_label = {0:"BLANK", 1:"Hello",2:"Thank_you",3:"I_love_you",4:"Yes",
                        5:"No",6:"Please",7:"Sorry",8:"Help",9:"Eat",
                        10:"Drink",11:"Good",12:"Bad",13:"More",14:"Less",
                        15:"Stop",16:"Go",17:"Come",18:"Wait",19:"Understand",
                        20:"Not_understand",21:"What",22:"Where",23:"When",24:"Who",
                        25:"How",26:"Why",27:"Can",28:"Cannot",29:"Want",30:"Not_want"}
        return idx_to_label, 31

GESTURE_LABELS, NUM_CLASSES = load_label_mapping(METADATA_PATH)

# ===================================================
# LOAD MODEL
# ===================================================
model = TransformerCTC(INPUT_DIM, MODEL_DIM, NUM_CLASSES).to(DEVICE)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE), strict=False)
model.eval()
print(f"‚úÖ Model loaded on {DEVICE}")

# ===================================================
# MEDIAPIPE SETUP
# ===================================================
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

pose_detector = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
hands_detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# ===================================================
# KEYPOINT EXTRACTION & NORMALIZATION
# ===================================================
def get_neck_and_shoulder(frame):
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose_detector.process(rgb)
    if results.pose_landmarks:
        landmarks = results.pose_landmarks.landmark
        shoulder_right = np.array([landmarks[11].x, landmarks[11].y])
        shoulder_left = np.array([landmarks[12].x, landmarks[12].y])
        neck = (shoulder_left + shoulder_right) / 2
        shoulder_dist = np.linalg.norm(shoulder_left - shoulder_right)
        return neck, shoulder_dist
    return None, None

def extract_hand_keypoints(res_hands):
    keypoints = np.zeros((2,21,2))
    if res_hands.multi_hand_landmarks and res_hands.multi_handedness:
        for hand_landmarks, handedness in zip(res_hands.multi_hand_landmarks, res_hands.multi_handedness):
            hand_kp = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
            idx = 0 if handedness.classification[0].label=="Left" else 1
            keypoints[idx] = hand_kp
    return keypoints

def normalize_keypoints(keypoints, neck_point, shoulder_dist):
    normalized = np.zeros_like(keypoints)
    if neck_point is None or shoulder_dist is None or shoulder_dist < 0.01:
        return keypoints
    for i in range(2):
        hand = keypoints[i]
        if np.all(hand==0):
            continue
        normalized[i] = (hand - neck_point)/shoulder_dist
    return normalized

def prepare_input_like_training(keypoints):
    return keypoints

# ===================================================
# CTC DECODING
# ===================================================
def ctc_decode_detailed(logits):
    log_probs = F.log_softmax(logits, dim=-1)
    probs = torch.exp(log_probs)
    pred_classes = torch.argmax(log_probs, dim=-1)[0].cpu().numpy()
    max_probs = torch.max(probs, dim=-1)[0][0].cpu().numpy()
    class_counts = Counter(pred_classes)
    collapsed = []
    prev=-1
    for cls in pred_classes:
        if cls!=0 and cls!=prev:
            collapsed.append(cls)
        prev = cls
    return {
        'raw_predictions': pred_classes,
        'max_probs': max_probs,
        'class_counts': class_counts,
        'collapsed': collapsed,
        'avg_confidence': np.mean(max_probs),
        'non_blank_ratio': 1-(class_counts.get(0,0)/len(pred_classes))
    }

# ===================================================
# DEBUG PANEL
# ===================================================
def draw_debug_panel(frame, info_dict, y_start=100):
    h,w=frame.shape[:2]
    panel_height = len(info_dict)*25+20
    cv2.rectangle(frame,(10,y_start),(w-10,y_start+panel_height),(0,0,0),-1)
    cv2.rectangle(frame,(10,y_start),(w-10,y_start+panel_height),(255,255,0),2)
    y_offset=y_start+25
    for key,value in info_dict.items():
        text=f"{key}: {value}"
        cv2.putText(frame,text,(20,y_offset),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,255,255),1)
        y_offset+=25

# ===================================================
# MAIN LOOP
# ===================================================
def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("‚ùå Cannot open camera"); return

    # Calibration
    print("üîß Calibrating pose... Stand still")
    neck_samples=[]
    shoulder_samples=[]
    for i in range(100):
        ret, frame = cap.read()
        if not ret: continue
        neck, dist = get_neck_and_shoulder(frame)
        if neck is not None:
            neck_samples.append(neck)
            shoulder_samples.append(dist)
        if (i+1)%20==0: print(f"Progress: {i+1}/100")
    avg_neck=np.mean(neck_samples, axis=0) if neck_samples else np.array([0.5,0.5])
    avg_shoulder=np.mean(shoulder_samples) if shoulder_samples else 0.2
    print(f"‚úÖ Calibration done: neck={avg_neck}, shoulder_dist={avg_shoulder}")

    seq_buffer = deque(maxlen=MAX_SEQ_LEN)
    gesture_history = deque(maxlen=10)
    current_gesture="Waiting..."
    current_confidence=0.0
    frame_count=0

    while True:
        ret, frame = cap.read()
        if not ret: break
        frame=cv2.flip(frame,1)
        h,w=frame.shape[:2]
        rgb=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        res_hands=hands_detector.process(rgb)

        keypoints_raw=extract_hand_keypoints(res_hands)
        keypoints_norm=normalize_keypoints(keypoints_raw, avg_neck, avg_shoulder)
        input_data=prepare_input_like_training(keypoints_norm)
        seq_buffer.append(input_data)

        input_flat=input_data.reshape(-1)
        has_left=not np.all(keypoints_raw[0]==0)
        has_right=not np.all(keypoints_raw[1]==0)
        debug_info={
            'Buffer': f"{len(seq_buffer)}/{MAX_SEQ_LEN}",
            'Hands': f"L:{has_left} R:{has_right}",
            'Input Stats': f"mean={input_flat.mean():.3f} std={input_flat.std():.3f}",
            'Input Range': f"[{input_flat.min():.3f}, {input_flat.max():.3f}]"
        }

        # Predict khi buffer ƒë·∫ßy
        if len(seq_buffer) == MAX_SEQ_LEN and frame_count % 3 == 0:
            seq_array = np.array(seq_buffer)             # (T, 2, 21, 2)
            seq_flat = seq_array.reshape(seq_array.shape[0], -1)  # (T, 84)
            x = torch.tensor(seq_flat, dtype=torch.float32).unsqueeze(0).to(DEVICE)

            with torch.no_grad():
                logits = model(x)  # (1, T, NUM_CLASSES)
                decode_result = ctc_decode_detailed(logits)

            # Debug console
            if DEBUG_MODE:
                print(f"\n{'='*40}")
                print(f"Frame {frame_count}")
                print(f"Top 5 classes: {decode_result['class_counts'].most_common(5)}")
                print(f"Collapsed: {decode_result['collapsed']}")
                print(f"Avg confidence: {decode_result['avg_confidence']:.3f}")
                print(f"Non-blank ratio: {decode_result['non_blank_ratio']:.3f}")

            debug_info['Top Classes'] = str(decode_result['class_counts'].most_common(3))
            debug_info['Non-blank%'] = f"{decode_result['non_blank_ratio']*100:.1f}%"
            debug_info['Avg Conf'] = f"{decode_result['avg_confidence']:.3f}"

            collapsed = decode_result['collapsed']
            if collapsed:
                most_common_cls = Counter(collapsed).most_common(1)[0][0]
                gesture_history.append(most_common_cls)
                if len(gesture_history) >= 3:
                    smoothed_cls = Counter(gesture_history).most_common(1)[0][0]
                    current_gesture = GESTURE_LABELS.get(smoothed_cls, f"Unknown_{smoothed_cls}")
                    current_confidence = decode_result['avg_confidence']
            else:
                if DEBUG_MODE:
                    print("‚ö†Ô∏è All predictions are BLANK")

        # V·∫Ω UI
        cv2.rectangle(frame, (10,10), (w-10,90), (0,0,0), -1)
        cv2.rectangle(frame, (10,10), (w-10,90), (0,255,0), 3)
        cv2.putText(frame, current_gesture, (20,60), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0), 2)

        if current_confidence > 0:
            bar_width = int((w-40) * current_confidence)
            cv2.rectangle(frame, (20,70), (20+bar_width,80), (0,255,255), -1)

        if DEBUG_MODE:
            draw_debug_panel(frame, debug_info, y_start=110)

        cv2.imshow("Gesture Recognition - Transformer CTC", frame)
        frame_count += 1

        if cv2.waitKey(1) & 0xFF == 27:  # ESC
            break

    cap.release()
    pose_detector.close()
    hands_detector.close()
    cv2.destroyAllWindows()
    print("\nüëã Program ended")
    print("="*60)

if __name__ == "__main__":
    main()


‚úÖ Loaded 30 gesture classes
‚úÖ Model loaded on cpu
üîß Calibrating pose... Stand still
Progress: 20/100
Progress: 40/100
Progress: 60/100
Progress: 80/100
Progress: 100/100
‚úÖ Calibration done: neck=[0.50945608 0.8123345 ], shoulder_dist=0.3893596390409072

Frame 60
Top 5 classes: [(0, 60)]
Collapsed: []
Avg confidence: 0.999
Non-blank ratio: 0.000
‚ö†Ô∏è All predictions are BLANK

Frame 63
Top 5 classes: [(0, 60)]
Collapsed: []
Avg confidence: 0.999
Non-blank ratio: 0.000
‚ö†Ô∏è All predictions are BLANK

Frame 66
Top 5 classes: [(0, 60)]
Collapsed: []
Avg confidence: 0.999
Non-blank ratio: 0.000
‚ö†Ô∏è All predictions are BLANK

Frame 69
Top 5 classes: [(0, 60)]
Collapsed: []
Avg confidence: 0.999
Non-blank ratio: 0.000
‚ö†Ô∏è All predictions are BLANK

Frame 72
Top 5 classes: [(0, 60)]
Collapsed: []
Avg confidence: 0.999
Non-blank ratio: 0.000
‚ö†Ô∏è All predictions are BLANK

Frame 75
Top 5 classes: [(0, 60)]
Collapsed: []
Avg confidence: 0.999
Non-blank ratio: 0.000
‚ö†Ô∏è Al

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import json
import os
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm

# ===================================================
# CONFIG
# ===================================================
DATA_PATH = "D:/Semester/Semester5/DPL302/Project/sentence_dataset/"
METADATA_PATH = "D:/Semester/Semester5/DPL302/Project/sentence_dataset/metadata.json"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Reduced model size
MODEL_DIM = 128      # Gi·∫£m t·ª´ 256
NUM_LAYERS = 2       # Gi·∫£m t·ª´ 4
NUM_HEADS = 4        # Gi·∫£m t·ª´ 8
FF_DIM = 256         # Gi·∫£m t·ª´ 512

BATCH_SIZE = 16       # Gi·∫£m batch size
EPOCHS = 50          # TƒÉng epochs
LR = 1e-4            # Gi·∫£m learning rate
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15

# ===================================================
# DATASET
# ===================================================
class GestureCTCDataset(Dataset):
    def __init__(self, data_dir, metadata_path):
        self.data_dir = data_dir
        with open(metadata_path, 'r', encoding='utf-8') as f:
            self.metadata = json.load(f)
        
        all_labels = sorted({lbl for item in self.metadata for lbl in item["labels"]})
        self.label_to_idx = {lbl: i + 1 for i, lbl in enumerate(all_labels)}
        self.idx_to_label = {i + 1: lbl for i, lbl in enumerate(all_labels)}
        
        print(f"‚úÖ Loaded {len(self.metadata)} samples")
        print(f"‚úÖ Found {len(all_labels)} gesture classes")
        
        # Check input/label length ratio
        self._check_data_validity()
    
    def _check_data_validity(self):
        """Check if data is valid for CTC"""
        print("\nüîç Checking data validity...")
        problematic = []
        
        for item in self.metadata:
            keypoints = np.load(os.path.join(self.data_dir, item["file"]))
            input_len = keypoints.shape[0]
            label_len = len(item["labels"])
            ratio = input_len / label_len if label_len > 0 else 0
            
            if ratio < 2:
                problematic.append((item["file"], input_len, label_len, ratio))
        
        if problematic:
            print(f"‚ö†Ô∏è WARNING: {len(problematic)} samples have input_length < 2 * label_length")
            print("This may cause CTC training to fail!")
            for fname, inp, lbl, r in problematic[:5]:
                print(f"  {fname}: input={inp}, label={lbl}, ratio={r:.2f}")
        else:
            print("‚úÖ All samples have valid length ratios")
    
    def __len__(self):
        return len(self.metadata)
    
    def __getitem__(self, idx):
        item = self.metadata[idx]
        keypoints = np.load(os.path.join(self.data_dir, item["file"]))
        keypoints = keypoints.reshape(keypoints.shape[0], -1)  # (T, 84)
        label_seq = [self.label_to_idx[lbl] for lbl in item["labels"]]
        return torch.tensor(keypoints, dtype=torch.float32), torch.tensor(label_seq, dtype=torch.long)

def collate_fn(batch):
    inputs, labels = zip(*batch)
    input_lengths = torch.tensor([len(x) for x in inputs])
    label_lengths = torch.tensor([len(l) for l in labels])
    inputs_padded = pad_sequence(inputs, batch_first=True)
    labels_concat = torch.cat(labels)
    return inputs_padded, labels_concat, input_lengths, label_lengths

# ===================================================
# MODEL (SMALLER)
# ===================================================
class TransformerCTC(nn.Module):
    def __init__(self, input_dim, model_dim, num_classes, num_layers=2, num_heads=4, ff_dim=256):
        super().__init__()
        self.input_fc = nn.Linear(input_dim, model_dim)
        self.pos_encoder = PositionalEncoding(model_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=model_dim,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=0.3,  # TƒÉng dropout
            batch_first=True
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_fc = nn.Linear(model_dim, num_classes)
    
    def forward(self, x):
        x = self.input_fc(x)
        x = self.pos_encoder(x)
        x = self.encoder(x)
        return self.output_fc(x)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        return x + self.pe[:x.size(1), :].unsqueeze(0)

# ===================================================
# CTC LOSS WITH BLANK PENALTY
# ===================================================
class CTCLossWithPenalty(nn.Module):
    def __init__(self, blank=0, zero_infinity=True, blank_penalty=0.4):
        super().__init__()
        self.ctc_loss = nn.CTCLoss(blank=blank, zero_infinity=zero_infinity, reduction='mean')
        self.blank_penalty = blank_penalty
    
    def forward(self, log_probs, targets, input_lengths, target_lengths):
        ctc_loss = self.ctc_loss(log_probs, targets, input_lengths, target_lengths)
        
        # Penalty for predicting too many blanks
        blank_prob = torch.exp(log_probs[:, :, 0])
        blank_ratio = blank_prob.mean()
        penalty = blank_ratio * self.blank_penalty
        
        return ctc_loss + penalty, ctc_loss.item(), blank_ratio.item()

# ===================================================
# TRAINING
# ===================================================
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    total_ctc_loss = 0
    total_blank_ratio = 0
    
    for x, y, x_len, y_len in tqdm(loader, desc="Training"):
        x, y = x.to(device), y.to(device)
        
        logits = model(x)
        log_probs = logits.log_softmax(2).transpose(0, 1)  # (T, B, C)
        
        loss, ctc_loss, blank_ratio = criterion(log_probs, y, x_len, y_len)
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
        optimizer.step()
        
        total_loss += loss.item()
        total_ctc_loss += ctc_loss
        total_blank_ratio += blank_ratio
    
    n = len(loader)
    return total_loss/n, total_ctc_loss/n, total_blank_ratio/n

def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    total_ctc_loss = 0
    
    with torch.no_grad():
        for x, y, x_len, y_len in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            log_probs = logits.log_softmax(2).transpose(0, 1)
            loss, ctc_loss, _ = criterion(log_probs, y, x_len, y_len)
            total_loss += loss.item()
            total_ctc_loss += ctc_loss
    
    n = len(loader)
    return total_loss/n, total_ctc_loss/n

# ===================================================
# MAIN
# ===================================================
def main():
    print("="*60)
    print("üöÄ TRAINING TRANSFORMER CTC MODEL (IMPROVED)")
    print("="*60)
    
    # Load dataset
    dataset = GestureCTCDataset(DATA_PATH, METADATA_PATH)
    num_classes = len(dataset.label_to_idx) + 1
    
    # Split
    train_size = int(TRAIN_RATIO * len(dataset))
    val_size = int(VAL_RATIO * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])
    
    train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
    
    print(f"\nüìä Data split:")
    print(f"   Train: {train_size} samples")
    print(f"   Val: {val_size} samples")
    print(f"   Test: {test_size} samples")
    
    # Model
    model = TransformerCTC(84, MODEL_DIM, num_classes, NUM_LAYERS, NUM_HEADS, FF_DIM).to(DEVICE)
    criterion = CTCLossWithPenalty(blank=0, zero_infinity=True, blank_penalty=0.4)
    optimizer = optim.Adam(model.parameters(), lr=LR)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
    
    print(f"\nüîß Model config:")
    print(f"   Model dim: {MODEL_DIM}")
    print(f"   Layers: {NUM_LAYERS}")
    print(f"   Heads: {NUM_HEADS}")
    print(f"   FF dim: {FF_DIM}")
    print(f"   Classes: {num_classes}")
    print(f"   Device: {DEVICE}")
    print(f"   Learning rate: {LR}")
    
    # Training loop
    best_val_loss = float('inf')
    patience_counter = 0
    max_patience = 10
    
    print(f"\n{'='*60}")
    print("üî• STARTING TRAINING")
    print(f"{'='*60}\n")
    
    for epoch in range(EPOCHS):
        train_loss, train_ctc, train_blank = train_epoch(model, train_loader, criterion, optimizer, DEVICE)
        val_loss, val_ctc = validate(model, val_loader, criterion, DEVICE)
        
        print(f"\nüìä Epoch {epoch+1}/{EPOCHS}")
        print(f"   Train Loss: {train_loss:.4f} | CTC: {train_ctc:.4f} | Blank: {train_blank:.2%}")
        print(f"   Val Loss: {val_loss:.4f} | CTC: {val_ctc:.4f}")
        
        # Learning rate scheduling
        old_lr = optimizer.param_groups[0]['lr']
        scheduler.step(val_loss)
        new_lr = optimizer.param_groups[0]['lr']
        if new_lr != old_lr:
            print(f"   üìâ Learning rate reduced: {old_lr:.6f} ‚Üí {new_lr:.6f}")
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "transformer_ctc_gesture_best.pth")
            print(f"   ‚úÖ Best model saved! (Val Loss: {val_loss:.4f})")
            patience_counter = 0
        else:
            patience_counter += 1
        
        # Early stopping
        if patience_counter >= max_patience:
            print(f"\n‚ö†Ô∏è Early stopping triggered (no improvement for {max_patience} epochs)")
            break
    
    print(f"\n{'='*60}")
    print("‚úÖ TRAINING COMPLETED!")
    print(f"Best Val Loss: {best_val_loss:.4f}")
    print(f"{'='*60}")

if __name__ == "__main__":
    main()

üöÄ TRAINING TRANSFORMER CTC MODEL (IMPROVED)
‚úÖ Loaded 3000 samples
‚úÖ Found 30 gesture classes

üîç Checking data validity...
‚úÖ All samples have valid length ratios

üìä Data split:
   Train: 2100 samples
   Val: 450 samples
   Test: 450 samples

üîß Model config:
   Model dim: 128
   Layers: 2
   Heads: 4
   FF dim: 256
   Classes: 31
   Device: cpu
   Learning rate: 0.0001

üî• STARTING TRAINING



Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:43<00:00,  3.03it/s]



üìä Epoch 1/50
   Train Loss: 23.6606 | CTC: 23.3261 | Blank: 83.61%
   Val Loss: 4.4216 | CTC: 4.0249
   ‚úÖ Best model saved! (Val Loss: 4.4216)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:44<00:00,  2.94it/s]



üìä Epoch 2/50
   Train Loss: 4.1862 | CTC: 3.7920 | Blank: 98.53%
   Val Loss: 4.0888 | CTC: 3.6923
   ‚úÖ Best model saved! (Val Loss: 4.0888)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:52<00:00,  2.52it/s]



üìä Epoch 3/50
   Train Loss: 3.8614 | CTC: 3.4670 | Blank: 98.59%
   Val Loss: 3.7447 | CTC: 3.3479
   ‚úÖ Best model saved! (Val Loss: 3.7447)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.05it/s]



üìä Epoch 4/50
   Train Loss: 3.5583 | CTC: 3.1636 | Blank: 98.66%
   Val Loss: 3.4363 | CTC: 3.0396
   ‚úÖ Best model saved! (Val Loss: 3.4363)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:45<00:00,  2.91it/s]



üìä Epoch 5/50
   Train Loss: 3.3048 | CTC: 2.9100 | Blank: 98.68%
   Val Loss: 3.1817 | CTC: 2.7850
   ‚úÖ Best model saved! (Val Loss: 3.1817)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:53<00:00,  2.48it/s]



üìä Epoch 6/50
   Train Loss: 3.0716 | CTC: 2.6768 | Blank: 98.70%
   Val Loss: 2.9156 | CTC: 2.5191
   ‚úÖ Best model saved! (Val Loss: 2.9156)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:57<00:00,  2.31it/s]



üìä Epoch 7/50
   Train Loss: 2.8405 | CTC: 2.4456 | Blank: 98.71%
   Val Loss: 2.6301 | CTC: 2.2343
   ‚úÖ Best model saved! (Val Loss: 2.6301)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.95it/s]



üìä Epoch 8/50
   Train Loss: 2.6109 | CTC: 2.2162 | Blank: 98.67%
   Val Loss: 2.4128 | CTC: 2.0165
   ‚úÖ Best model saved! (Val Loss: 2.4128)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:35<00:00,  3.67it/s]



üìä Epoch 9/50
   Train Loss: 2.3686 | CTC: 1.9742 | Blank: 98.60%
   Val Loss: 2.0622 | CTC: 1.6676
   ‚úÖ Best model saved! (Val Loss: 2.0622)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:38<00:00,  3.44it/s]



üìä Epoch 10/50
   Train Loss: 2.1236 | CTC: 1.7295 | Blank: 98.52%
   Val Loss: 1.8461 | CTC: 1.4508
   ‚úÖ Best model saved! (Val Loss: 1.8461)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:35<00:00,  3.76it/s]



üìä Epoch 11/50
   Train Loss: 1.9246 | CTC: 1.5310 | Blank: 98.41%
   Val Loss: 1.6004 | CTC: 1.2061
   ‚úÖ Best model saved! (Val Loss: 1.6004)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:48<00:00,  2.75it/s]



üìä Epoch 12/50
   Train Loss: 1.7311 | CTC: 1.3376 | Blank: 98.36%
   Val Loss: 1.3734 | CTC: 0.9799
   ‚úÖ Best model saved! (Val Loss: 1.3734)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:59<00:00,  2.21it/s]



üìä Epoch 13/50
   Train Loss: 1.5530 | CTC: 1.1597 | Blank: 98.31%
   Val Loss: 1.1804 | CTC: 0.7872
   ‚úÖ Best model saved! (Val Loss: 1.1804)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:59<00:00,  2.23it/s]



üìä Epoch 14/50
   Train Loss: 1.3763 | CTC: 0.9833 | Blank: 98.27%
   Val Loss: 1.0440 | CTC: 0.6506
   ‚úÖ Best model saved! (Val Loss: 1.0440)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [01:00<00:00,  2.17it/s]



üìä Epoch 15/50
   Train Loss: 1.2354 | CTC: 0.8424 | Blank: 98.25%
   Val Loss: 0.9491 | CTC: 0.5561
   ‚úÖ Best model saved! (Val Loss: 0.9491)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:48<00:00,  2.73it/s]



üìä Epoch 16/50
   Train Loss: 1.1280 | CTC: 0.7350 | Blank: 98.24%
   Val Loss: 0.8664 | CTC: 0.4731
   ‚úÖ Best model saved! (Val Loss: 0.8664)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.04it/s]



üìä Epoch 17/50
   Train Loss: 1.0333 | CTC: 0.6403 | Blank: 98.25%
   Val Loss: 0.8243 | CTC: 0.4306
   ‚úÖ Best model saved! (Val Loss: 0.8243)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.04it/s]



üìä Epoch 18/50
   Train Loss: 0.9577 | CTC: 0.5647 | Blank: 98.25%
   Val Loss: 0.7731 | CTC: 0.3809
   ‚úÖ Best model saved! (Val Loss: 0.7731)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.04it/s]



üìä Epoch 19/50
   Train Loss: 0.8900 | CTC: 0.4969 | Blank: 98.25%
   Val Loss: 0.6992 | CTC: 0.3059
   ‚úÖ Best model saved! (Val Loss: 0.6992)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.06it/s]



üìä Epoch 20/50
   Train Loss: 0.8316 | CTC: 0.4385 | Blank: 98.27%
   Val Loss: 0.6957 | CTC: 0.3018
   ‚úÖ Best model saved! (Val Loss: 0.6957)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.04it/s]



üìä Epoch 21/50
   Train Loss: 0.7798 | CTC: 0.3866 | Blank: 98.29%
   Val Loss: 0.6399 | CTC: 0.2463
   ‚úÖ Best model saved! (Val Loss: 0.6399)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.05it/s]



üìä Epoch 22/50
   Train Loss: 0.7438 | CTC: 0.3506 | Blank: 98.30%
   Val Loss: 0.6139 | CTC: 0.2205
   ‚úÖ Best model saved! (Val Loss: 0.6139)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:39<00:00,  3.30it/s]



üìä Epoch 23/50
   Train Loss: 0.7017 | CTC: 0.3084 | Blank: 98.32%
   Val Loss: 0.5946 | CTC: 0.2011
   ‚úÖ Best model saved! (Val Loss: 0.5946)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:59<00:00,  2.20it/s]



üìä Epoch 24/50
   Train Loss: 0.6836 | CTC: 0.2905 | Blank: 98.28%
   Val Loss: 0.5734 | CTC: 0.1802
   ‚úÖ Best model saved! (Val Loss: 0.5734)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:55<00:00,  2.37it/s]



üìä Epoch 25/50
   Train Loss: 0.6491 | CTC: 0.2558 | Blank: 98.32%
   Val Loss: 0.5745 | CTC: 0.1806


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.07it/s]



üìä Epoch 26/50
   Train Loss: 0.6251 | CTC: 0.2318 | Blank: 98.32%
   Val Loss: 0.5503 | CTC: 0.1566
   ‚úÖ Best model saved! (Val Loss: 0.5503)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.00it/s]



üìä Epoch 27/50
   Train Loss: 0.6047 | CTC: 0.2114 | Blank: 98.33%
   Val Loss: 0.5447 | CTC: 0.1512
   ‚úÖ Best model saved! (Val Loss: 0.5447)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.96it/s]



üìä Epoch 28/50
   Train Loss: 0.5806 | CTC: 0.1872 | Blank: 98.34%
   Val Loss: 0.5301 | CTC: 0.1363
   ‚úÖ Best model saved! (Val Loss: 0.5301)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.92it/s]



üìä Epoch 29/50
   Train Loss: 0.5658 | CTC: 0.1724 | Blank: 98.33%
   Val Loss: 0.5304 | CTC: 0.1371


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.04it/s]



üìä Epoch 30/50
   Train Loss: 0.5515 | CTC: 0.1581 | Blank: 98.34%
   Val Loss: 0.5043 | CTC: 0.1109
   ‚úÖ Best model saved! (Val Loss: 0.5043)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.96it/s]



üìä Epoch 31/50
   Train Loss: 0.5370 | CTC: 0.1436 | Blank: 98.33%
   Val Loss: 0.5059 | CTC: 0.1122


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.03it/s]



üìä Epoch 32/50
   Train Loss: 0.5254 | CTC: 0.1321 | Blank: 98.33%
   Val Loss: 0.5033 | CTC: 0.1096
   ‚úÖ Best model saved! (Val Loss: 0.5033)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.02it/s]



üìä Epoch 33/50
   Train Loss: 0.5173 | CTC: 0.1240 | Blank: 98.34%
   Val Loss: 0.4918 | CTC: 0.0985
   ‚úÖ Best model saved! (Val Loss: 0.4918)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.00it/s]



üìä Epoch 34/50
   Train Loss: 0.5114 | CTC: 0.1181 | Blank: 98.33%
   Val Loss: 0.4871 | CTC: 0.0937
   ‚úÖ Best model saved! (Val Loss: 0.4871)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.02it/s]



üìä Epoch 35/50
   Train Loss: 0.4971 | CTC: 0.1037 | Blank: 98.34%
   Val Loss: 0.4826 | CTC: 0.0892
   ‚úÖ Best model saved! (Val Loss: 0.4826)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.92it/s]



üìä Epoch 36/50
   Train Loss: 0.4924 | CTC: 0.0990 | Blank: 98.35%
   Val Loss: 0.4764 | CTC: 0.0831
   ‚úÖ Best model saved! (Val Loss: 0.4764)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:59<00:00,  2.21it/s]



üìä Epoch 37/50
   Train Loss: 0.4853 | CTC: 0.0920 | Blank: 98.32%
   Val Loss: 0.4736 | CTC: 0.0800
   ‚úÖ Best model saved! (Val Loss: 0.4736)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:52<00:00,  2.52it/s]



üìä Epoch 38/50
   Train Loss: 0.4711 | CTC: 0.0777 | Blank: 98.34%
   Val Loss: 0.4832 | CTC: 0.0895


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.06it/s]



üìä Epoch 39/50
   Train Loss: 0.4701 | CTC: 0.0768 | Blank: 98.33%
   Val Loss: 0.4564 | CTC: 0.0630
   ‚úÖ Best model saved! (Val Loss: 0.4564)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:36<00:00,  3.65it/s]



üìä Epoch 40/50
   Train Loss: 0.4637 | CTC: 0.0704 | Blank: 98.31%
   Val Loss: 0.4605 | CTC: 0.0669


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.90it/s]



üìä Epoch 41/50
   Train Loss: 0.4601 | CTC: 0.0667 | Blank: 98.35%
   Val Loss: 0.4519 | CTC: 0.0582
   ‚úÖ Best model saved! (Val Loss: 0.4519)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.01it/s]



üìä Epoch 42/50
   Train Loss: 0.4564 | CTC: 0.0631 | Blank: 98.31%
   Val Loss: 0.4627 | CTC: 0.0688


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.09it/s]



üìä Epoch 43/50
   Train Loss: 0.4567 | CTC: 0.0635 | Blank: 98.32%
   Val Loss: 0.4682 | CTC: 0.0746


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.99it/s]



üìä Epoch 44/50
   Train Loss: 0.4531 | CTC: 0.0599 | Blank: 98.29%
   Val Loss: 0.4609 | CTC: 0.0672


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  4.00it/s]



üìä Epoch 45/50
   Train Loss: 0.4506 | CTC: 0.0572 | Blank: 98.34%
   Val Loss: 0.4492 | CTC: 0.0558
   ‚úÖ Best model saved! (Val Loss: 0.4492)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.03it/s]



üìä Epoch 46/50
   Train Loss: 0.4385 | CTC: 0.0451 | Blank: 98.36%
   Val Loss: 0.4534 | CTC: 0.0599


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.99it/s]



üìä Epoch 47/50
   Train Loss: 0.4487 | CTC: 0.0556 | Blank: 98.28%
   Val Loss: 0.4510 | CTC: 0.0580


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.04it/s]



üìä Epoch 48/50
   Train Loss: 0.4394 | CTC: 0.0464 | Blank: 98.26%
   Val Loss: 0.4532 | CTC: 0.0601


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:33<00:00,  3.99it/s]



üìä Epoch 49/50
   Train Loss: 0.4428 | CTC: 0.0496 | Blank: 98.28%
   Val Loss: 0.4448 | CTC: 0.0515
   ‚úÖ Best model saved! (Val Loss: 0.4448)


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 132/132 [00:32<00:00,  4.02it/s]



üìä Epoch 50/50
   Train Loss: 0.4359 | CTC: 0.0430 | Blank: 98.22%
   Val Loss: 0.4478 | CTC: 0.0546

‚úÖ TRAINING COMPLETED!
Best Val Loss: 0.4448
