# Baseline CAPTCHA Recognition Model

This notebook implements a simple CNN baseline model for CAPTCHA recognition. The baseline uses a classic convolutional neural network architecture to demonstrate the baseline performance, which highlights the superiority of our custom approach.

In [10]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import numpy as np
from tqdm import tqdm
import string

## Dataset Configuration

Define the character set and dataset parameters for CAPTCHA recognition.

In [11]:
# Define character set (digits + lowercase letters)
CHARACTERS = string.digits + string.ascii_lowercase  # '0123456789abcdefghijklmnopqrstuvwxyz'
NUM_CLASSES = len(CHARACTERS)  # 36 classes
MAX_LENGTH = 8  # Maximum CAPTCHA length

# Create character to index mapping
char_to_idx = {char: idx for idx, char in enumerate(CHARACTERS)}
idx_to_char = {idx: char for idx, char in enumerate(CHARACTERS)}

# Data paths
TRAIN_DIR = 'processed/train'
TEST_DIR = 'processed/test'

print(f"Number of character classes: {NUM_CLASSES}")
print(f"Character set: {CHARACTERS}")
print(f"Maximum CAPTCHA length: {MAX_LENGTH}")

Number of character classes: 36
Character set: 0123456789abcdefghijklmnopqrstuvwxyz
Maximum CAPTCHA length: 8


## Custom Dataset Class

Create a PyTorch Dataset for loading CAPTCHA images and labels.

In [12]:
class CaptchaDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(data_dir) if f.endswith('.png')]
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.data_dir, img_name)
        
        # Load image
        image = Image.open(img_path).convert('RGB')
        
        # Extract label from filename (format: label-0.png)
        label_str = img_name.split('-')[0].lower()
        
        # Convert label string to list of indices
        label_indices = [char_to_idx[c] for c in label_str]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label_indices, label_str

# Define transforms
transform = transforms.Compose([
    transforms.Resize((64, 200)),  # Resize to standard size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom collate function for variable length labels
def collate_fn(batch):
    images, labels, label_strs = zip(*batch)
    images = torch.stack(images, 0)
    
    # Get label lengths
    label_lengths = torch.tensor([len(label) for label in labels], dtype=torch.long)
    
    # Flatten all labels into one tensor
    labels = [item for sublist in labels for item in sublist]
    labels = torch.tensor(labels, dtype=torch.long)
    
    return images, labels, label_lengths, label_strs

print("Dataset class created successfully")

Dataset class created successfully


## Simplified Baseline CRNN Model with CTC Loss

Define a simplified CRNN architecture appropriate for ~8K training samples. Uses fewer conv blocks and a single-layer LSTM to reduce model complexity and prevent overfitting.

In [None]:
class CRNN(nn.Module):
    def __init__(self, img_height=64, num_classes=NUM_CLASSES):
        super(CRNN, self).__init__()
        self.num_classes = num_classes
        
        # Simplified CNN for feature extraction (4 blocks instead of 6)
        self.cnn = nn.Sequential(
            # Conv block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # 32x100
            
            # Conv block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # 16x50
            
            # Conv block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2, 1)),  # 8x50
            
            # Conv block 4
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2, 1)),  # 4x50
        )
        
        # Calculate the height after convolutions: 64 -> 32 -> 16 -> 8 -> 4 = 4
        self.rnn_input_size = 256 * 4  # 1024
        
        # Single-layer Bidirectional LSTM with smaller hidden size
        self.rnn = nn.LSTM(
            self.rnn_input_size,
            128,  # Reduced from 256 to 128
            num_layers=1,  # Reduced from 2 to 1
            bidirectional=True,
            batch_first=True,
            dropout=0.0
        )
        
        # Fully connected layer
        self.fc = nn.Linear(128 * 2, num_classes + 1)  # +1 for CTC blank
        
    def forward(self, x):
        # CNN feature extraction
        conv = self.cnn(x)  # [batch, 256, 4, width]
        
        # Reshape for RNN: [batch, width, channels*height]
        batch, channel, height, width = conv.size()
        conv = conv.permute(0, 3, 1, 2)  # [batch, width, channel, height]
        conv = conv.contiguous().view(batch, width, channel * height)
        
        # RNN
        rnn_out, _ = self.rnn(conv)  # [batch, width, 256]
        
        # Fully connected
        output = self.fc(rnn_out)  # [batch, width, num_classes+1]
        
        # For CTC loss: [width, batch, num_classes+1]
        output = output.permute(1, 0, 2)
        
        return output

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CRNN(img_height=64, num_classes=NUM_CLASSES).to(device)

print(f"Model initialized on device: {device}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Model architecture simplified for {len(train_dataset) if 'train_dataset' in dir() else '~8K'} training samples")

Model initialized on device: cuda
Total parameters: 10,821,285


## Training Setup

Initialize data loaders, loss function, and optimizer.

In [14]:
# Create datasets
train_dataset = CaptchaDataset(TRAIN_DIR, transform=transform)
test_dataset = CaptchaDataset(TEST_DIR, transform=transform)

# Create data loaders with custom collate function
BATCH_SIZE = 64
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, 
                         num_workers=0, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, 
                        num_workers=0, collate_fn=collate_fn)

# CTC Loss and optimizer
criterion = nn.CTCLoss(blank=NUM_CLASSES, zero_infinity=True)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Using CTC Loss with blank index: {NUM_CLASSES}")

Training samples: 7836
Test samples: 2000
Batch size: 64
Using CTC Loss with blank index: 36




## Training Function

Define the training loop for one epoch.

In [15]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    
    progress_bar = tqdm(train_loader, desc='Training')
    
    for images, labels, label_lengths, _ in progress_bar:
        images = images.to(device)
        labels = labels.to(device)
        label_lengths = label_lengths.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)  # [T, N, C] where T=width, N=batch, C=classes
        
        # Calculate input lengths (all same for this model)
        input_lengths = torch.full(size=(images.size(0),), 
                                   fill_value=outputs.size(0), 
                                   dtype=torch.long,
                                   device=device)
        
        # CTC Loss
        loss = criterion(outputs.log_softmax(2), labels, input_lengths, label_lengths)
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)  # Gradient clipping
        optimizer.step()
        
        total_loss += loss.item()
        
        # Update progress bar
        progress_bar.set_postfix({'loss': total_loss / (progress_bar.n + 1)})
    
    avg_loss = total_loss / len(train_loader)
    
    return avg_loss

print("Training function defined")

Training function defined


## Evaluation Function

Define the evaluation function to test the model and return predictions as strings.

In [16]:
def decode_predictions(outputs):
    """Decode CTC outputs to strings"""
    # outputs: [T, N, C]
    _, preds = outputs.max(2)  # [T, N]
    preds = preds.transpose(1, 0).contiguous()  # [N, T]
    
    decoded = []
    for i in range(preds.size(0)):
        pred = preds[i]
        # Remove blanks and duplicates
        pred_text = []
        prev_char = None
        for char_idx in pred:
            char_idx = char_idx.item()
            if char_idx != NUM_CLASSES and char_idx != prev_char:  # Not blank and not duplicate
                pred_text.append(idx_to_char[char_idx])
            prev_char = char_idx
        decoded.append(''.join(pred_text))
    
    return decoded

def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct_sequences = 0
    total_sequences = 0
    correct_chars = 0
    total_chars = 0
    
    with torch.no_grad():
        progress_bar = tqdm(test_loader, desc='Evaluating')
        
        for images, labels, label_lengths, label_strs in progress_bar:
            images = images.to(device)
            labels = labels.to(device)
            label_lengths = label_lengths.to(device)
            
            # Forward pass
            outputs = model(images)
            
            # Calculate input lengths
            input_lengths = torch.full(size=(images.size(0),), 
                                      fill_value=outputs.size(0), 
                                      dtype=torch.long,
                                      device=device)
            
            # CTC Loss
            loss = criterion(outputs.log_softmax(2), labels, input_lengths, label_lengths)
            total_loss += loss.item()
            
            # Decode predictions
            pred_strs = decode_predictions(outputs)
            
            # Calculate accuracy
            for pred, true in zip(pred_strs, label_strs):
                if pred == true:
                    correct_sequences += 1
                
                # Character-level accuracy
                min_len = min(len(pred), len(true))
                for i in range(min_len):
                    if pred[i] == true[i]:
                        correct_chars += 1
                total_chars += len(true)
            
            total_sequences += len(label_strs)
            
            seq_acc = 100 * correct_sequences / total_sequences
            char_acc = 100 * correct_chars / total_chars if total_chars > 0 else 0
            
            progress_bar.set_postfix({
                'loss': total_loss / (progress_bar.n + 1),
                'char_acc': char_acc,
                'seq_acc': seq_acc
            })
    
    avg_loss = total_loss / len(test_loader)
    char_accuracy = 100 * correct_chars / total_chars if total_chars > 0 else 0
    seq_accuracy = 100 * correct_sequences / total_sequences
    
    return avg_loss, char_accuracy, seq_accuracy

def predict_captcha(model, image, device):
    """Predict CAPTCHA text from an image and return as string"""
    model.eval()
    with torch.no_grad():
        image = image.unsqueeze(0).to(device)
        outputs = model(image)
        pred_strs = decode_predictions(outputs)
        return pred_strs[0]

print("Evaluation functions defined")

Evaluation functions defined


## Training Loop

Train the baseline model for multiple epochs.

In [None]:
# Training configuration
NUM_EPOCHS = 15  # Reduced from 20 to prevent overfitting on smaller dataset

# Training history
history = {
    'train_loss': [],
    'test_loss': [],
    'test_char_acc': [],
    'test_seq_acc': []
}

print("Starting training...")
best_seq_acc = 0

for epoch in range(NUM_EPOCHS):
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
    print("-" * 50)
    
    # Train
    train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    history['train_loss'].append(train_loss)
    
    # Evaluate
    test_loss, test_char_acc, test_seq_acc = evaluate(model, test_loader, criterion, device)
    history['test_loss'].append(test_loss)
    history['test_char_acc'].append(test_char_acc)
    history['test_seq_acc'].append(test_seq_acc)
    
    print(f"\nTrain Loss: {train_loss:.4f}")
    print(f"Test Loss: {test_loss:.4f} | Test Char Acc: {test_char_acc:.2f}% | Test Seq Acc: {test_seq_acc:.2f}%")
    
    # Learning rate scheduling
    scheduler.step(test_loss)
    
    # Save best model
    if test_seq_acc > best_seq_acc:
        best_seq_acc = test_seq_acc
        torch.save(model.state_dict(), 'baseline_best_model.pth')
        print(f"✓ Best model saved with sequence accuracy: {best_seq_acc:.2f}%")

print("\n" + "=" * 50)
print("Training completed!")
print(f"Best Test Sequence Accuracy: {best_seq_acc:.2f}%")

Starting training...

Epoch 1/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:43<00:00,  2.84it/s, loss=4.32]
Evaluating: 100%|██████████| 32/32 [00:08<00:00,  4.00it/s, loss=3.93, char_acc=0, seq_acc=0]



Train Loss: 4.3167
Test Loss: 3.9270 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 2/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:39<00:00,  3.14it/s, loss=3.93]
Training: 100%|██████████| 123/123 [00:39<00:00,  3.14it/s, loss=3.93]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.57it/s, loss=3.93, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.57it/s, loss=3.93, char_acc=0, seq_acc=0]



Train Loss: 3.9312
Test Loss: 3.9291 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 3/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.17it/s, loss=3.93]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.17it/s, loss=3.93]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.59it/s, loss=3.93, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.59it/s, loss=3.93, char_acc=0, seq_acc=0]



Train Loss: 3.9315
Test Loss: 3.9289 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 4/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.20it/s, loss=3.93]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.20it/s, loss=3.93]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.69it/s, loss=3.93, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.69it/s, loss=3.93, char_acc=0, seq_acc=0]



Train Loss: 3.9302
Test Loss: 3.9260 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 5/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.20it/s, loss=3.93]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.20it/s, loss=3.93]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.58it/s, loss=3.93, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.58it/s, loss=3.93, char_acc=0, seq_acc=0]



Train Loss: 3.9294
Test Loss: 3.9259 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 6/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.93]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.93]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.56it/s, loss=3.92, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.56it/s, loss=3.92, char_acc=0, seq_acc=0]



Train Loss: 3.9263
Test Loss: 3.9213 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 7/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:39<00:00,  3.15it/s, loss=3.92]
Training: 100%|██████████| 123/123 [00:39<00:00,  3.15it/s, loss=3.92]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.45it/s, loss=3.92, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.45it/s, loss=3.92, char_acc=0, seq_acc=0]



Train Loss: 3.9203
Test Loss: 3.9183 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 8/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.92]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.92]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.45it/s, loss=3.91, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.45it/s, loss=3.91, char_acc=0, seq_acc=0]



Train Loss: 3.9192
Test Loss: 3.9103 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 9/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.17it/s, loss=3.92]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.17it/s, loss=3.92]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.43it/s, loss=3.92, char_acc=0, seq_acc=0]




Train Loss: 3.9200
Test Loss: 3.9164 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 10/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.92]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.92]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.63it/s, loss=3.91, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.63it/s, loss=3.91, char_acc=0, seq_acc=0]



Train Loss: 3.9156
Test Loss: 3.9099 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 11/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:39<00:00,  3.15it/s, loss=3.92]
Training: 100%|██████████| 123/123 [00:39<00:00,  3.15it/s, loss=3.92]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.42it/s, loss=3.91, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.42it/s, loss=3.91, char_acc=0, seq_acc=0]



Train Loss: 3.9156
Test Loss: 3.9083 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 12/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.17it/s, loss=3.91]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.17it/s, loss=3.91]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.54it/s, loss=3.91, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.54it/s, loss=3.91, char_acc=0, seq_acc=0]



Train Loss: 3.9136
Test Loss: 3.9101 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 13/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:39<00:00,  3.08it/s, loss=3.91]
Training: 100%|██████████| 123/123 [00:39<00:00,  3.08it/s, loss=3.91]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.43it/s, loss=3.91, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.43it/s, loss=3.91, char_acc=0, seq_acc=0]



Train Loss: 3.9136
Test Loss: 3.9096 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 14/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.91]
Training: 100%|██████████| 123/123 [00:38<00:00,  3.18it/s, loss=3.91]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.59it/s, loss=3.91, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:06<00:00,  4.59it/s, loss=3.91, char_acc=0, seq_acc=0]



Train Loss: 3.9149
Test Loss: 3.9102 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 15/20
--------------------------------------------------


Training: 100%|██████████| 123/123 [00:39<00:00,  3.12it/s, loss=3.91]
Training: 100%|██████████| 123/123 [00:39<00:00,  3.12it/s, loss=3.91]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.05it/s, loss=3.91, char_acc=0, seq_acc=0]
Evaluating: 100%|██████████| 32/32 [00:07<00:00,  4.05it/s, loss=3.91, char_acc=0, seq_acc=0]



Train Loss: 3.9139
Test Loss: 3.9097 | Test Char Acc: 0.00% | Test Seq Acc: 0.00%

Epoch 16/20
--------------------------------------------------


Training:  57%|█████▋    | 70/123 [00:22<00:17,  3.09it/s, loss=3.91]



KeyboardInterrupt: 

## Test Predictions

Test the model on sample images and display predictions as strings.

In [None]:
# Load best model
model.load_state_dict(torch.load('baseline_best_model.pth'))
model.eval()

# Test on a few samples
import matplotlib.pyplot as plt

# Get some test samples
test_images = []
test_labels = []
for i in range(10):
    img, _, label_str = test_dataset[i]
    test_images.append(img)
    test_labels.append(label_str)

# Make predictions
print("Sample Predictions:")
print("=" * 60)
correct_count = 0
for i, (img, true_label) in enumerate(zip(test_images, test_labels)):
    predicted_text = predict_captcha(model, img, device)
    match = "✓" if predicted_text == true_label else "✗"
    if predicted_text == true_label:
        correct_count += 1
    print(f"{match} Sample {i+1}: True: '{true_label}' | Predicted: '{predicted_text}'")

print("\n" + "=" * 60)
print(f"Accuracy on samples: {100 * correct_count / len(test_images):.1f}%")

## Results Summary

Display the training history and final performance metrics.

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Loss
axes[0].plot(history['train_loss'], label='Train Loss', marker='o')
axes[0].plot(history['test_loss'], label='Test Loss', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Test Loss')
axes[0].legend()
axes[0].grid(True)

# Character Accuracy
axes[1].plot(history['test_char_acc'], label='Test Char Acc', marker='s', color='orange')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Character-Level Accuracy')
axes[1].legend()
axes[1].grid(True)

# Sequence Accuracy
axes[2].plot(history['test_seq_acc'], label='Test Seq Acc', marker='s', color='green')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Accuracy (%)')
axes[2].set_title('Sequence-Level Accuracy')
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.savefig('baseline_training_history.png', dpi=150, bbox_inches='tight')
plt.show()

# Print final results
print("\n" + "=" * 60)
print("SIMPLIFIED BASELINE CRNN MODEL FINAL RESULTS")
print("=" * 60)
print(f"Best Test Character Accuracy: {max(history['test_char_acc']):.2f}%")
print(f"Best Test Sequence Accuracy: {max(history['test_seq_acc']):.2f}%")
print(f"Final Test Character Accuracy: {history['test_char_acc'][-1]:.2f}%")
print(f"Final Test Sequence Accuracy: {history['test_seq_acc'][-1]:.2f}%")
print("=" * 60)
print("\nThis simplified baseline (~2-3M params) is appropriate for ~8K training samples")
print("and serves as a reasonable comparison point for your custom approach.")