# ECOQCODE OCR CNN Model Training

## 1. Data Generation (Synthetic Dataset)

In [17]:
import os
import random
from PIL import Image, ImageDraw, ImageFont
from pathlib import Path

# Configuration
bg_dir = './real_backgrounds'
out_dir = './eco_dataset/images'
label_file = './eco_dataset/labels.txt'
font_path = 'arial.ttf'  # Change as per your system (e.g., 'NotoSansCJK-Regular.ttc')
text = 'ECOQCODE'
num_samples = 1000  # Number of images to generate
image_size = (224, 224)  # Output image size

os.makedirs(out_dir, exist_ok=True)

# List of background image files
bg_files = [str(p) for p in Path(bg_dir).glob('*') if p.suffix.lower() in ['.jpg', '.png', '.jpeg']]

# Load font
try:
    font = ImageFont.truetype(font_path, size=24)
except:
    raise RuntimeError(f"Font file not found: {font_path}")

# Save labels
with open(label_file, 'w', encoding='utf-8') as lf:
    for i in range(num_samples):
        bg_path = random.choice(bg_files)
        img = Image.open(bg_path).convert("RGB").resize(image_size)

        draw = ImageDraw.Draw(img)

        # Set label for binary classification
        has_text = random.random() < 0.5  # 50% chance to insert ECOQCODE
        label = 1 if has_text else 0

        if has_text:
            # Determine text size
            for _ in range(10):  # Retry 10 times if text is larger than image
                text_size = random.randint(6, 8)
                font = ImageFont.truetype(font_path, text_size)
                text_width = int(text_size * 0.6 * len(text))  # Approximate width calculation
                text_height = text_size

                if text_width < image_size[0] and text_height < image_size[1]:
                    break
            else:
                # Skip if text cannot be inserted
                continue

            x = random.randint(0, image_size[0] - text_width)
            y = random.randint(0, image_size[1] - text_height)

            draw.text((x, y), text, font=font, fill=(0, 0, 0))

        # Save file
        filename = f"img_{i:05d}.jpg"
        save_path = os.path.join(out_dir, filename)
        img.save(save_path)

        # Record label
        lf.write(f"{save_path}\t{label}\n")


## 2. Dataset Class Definition

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image

class ECOQDataset(Dataset):
    def __init__(self, label_path, transform=None):
        self.samples = []
        with open(label_path, encoding="utf-8") as f:
            for line in f:
                path, label = line.strip().split("\t")
                self.samples.append((path, int(label)))
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label


## 3. Image Transformations

In [12]:
import torchvision.transforms as T

transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize([0.5]*3, [0.5]*3)  # RGB Normalization
])


## 4. Model Architecture (CNN Classifier)

In [13]:
import torch.nn as nn
import torchvision.models as models

def get_transfer_model():
    # Load a pre-trained ResNet18 model
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

    # Freeze all the parameters in the feature extraction part
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final fully connected layer
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 1) # Output is 1 for binary classification

    return model


## 5. Data Splitting (Train/Validation)

In [14]:
from sklearn.model_selection import train_test_split

label_file = "./eco_dataset/labels.txt" 

# Load label list and split 8:2
with open(label_file, encoding="utf-8") as f:
    lines = f.readlines()

train_lines, val_lines = train_test_split(lines, test_size=0.2, shuffle=True)

# Split files
with open("eco_dataset/train_labels.txt", "w", encoding="utf-8") as f:
    f.writelines(train_lines)

with open("eco_dataset/val_labels.txt", "w", encoding="utf-8") as f:
    f.writelines(val_lines)


## 6. Dataset and DataLoader Initialization

In [15]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# ImageNet statistics for normalization
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]

# Define image transformations for training and validation
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean, imagenet_std)
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean, imagenet_std)
])

# Custom Dataset Class
class ECOQDataset(Dataset):
    def __init__(self, label_file, transform=None):
        with open(label_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        self.samples = [line.strip().split('\t') for line in lines]
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = torch.tensor([int(label)], dtype=torch.float32)
        return image, label

# Set data paths
train_label_path = './eco_dataset/train_labels.txt'
val_label_path = './eco_dataset/val_labels.txt'

# Create Dataset and DataLoader
train_dataset = ECOQDataset(train_label_path, transform=train_transform)
val_dataset = ECOQDataset(val_label_path, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


## 7. Model Training and Validation

In [16]:
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Get the transfer learning model
model = get_transfer_model().to(device)

criterion = nn.BCEWithLogitsLoss()

# Observe that only parameters of final layer are being optimized
optimizer = optim.Adam(model.fc.parameters(), lr=1e-3)

# Decay LR by a factor of 0.1 every 5 epochs
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

for epoch in range(1, 11): # Train for 10 epochs
    model.train()
    train_loss = 0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch}"):
        imgs, labels = imgs.to(device), labels.float().to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward
        preds = model(imgs)
        # FIX: Ensure labels have the same shape as preds [batch_size, 1]
        loss = criterion(preds, labels)
        
        # Backward + optimize
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()

    scheduler.step()

    print(f"Epoch {epoch} | Train Loss: {train_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device)
            # Squeeze labels for accuracy calculation, as preds will be squeezed
            labels = labels.to(device).float().squeeze()
            outputs = model(imgs)
            preds = (torch.sigmoid(outputs).squeeze() > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    print(f"Validation Accuracy: {correct / total * 100:.2f}%")


Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 10.47it/s]


Epoch 1 | Train Loss: 0.6962
Validation Accuracy: 50.00%


Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.68it/s]


Epoch 2 | Train Loss: 0.6732
Validation Accuracy: 64.50%


Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.59it/s]


Epoch 3 | Train Loss: 0.6390
Validation Accuracy: 50.00%


Epoch 4: 100%|██████████| 25/25 [00:02<00:00, 11.18it/s]


Epoch 4 | Train Loss: 0.6506
Validation Accuracy: 50.00%


Epoch 5: 100%|██████████| 25/25 [00:02<00:00, 11.60it/s]


Epoch 5 | Train Loss: 0.6228
Validation Accuracy: 50.00%


Epoch 6: 100%|██████████| 25/25 [00:02<00:00, 11.20it/s]


Epoch 6 | Train Loss: 0.6112
Validation Accuracy: 50.00%


Epoch 7: 100%|██████████| 25/25 [00:02<00:00, 11.18it/s]


Epoch 7 | Train Loss: 0.6042
Validation Accuracy: 51.00%


Epoch 8: 100%|██████████| 25/25 [00:02<00:00, 11.33it/s]


Epoch 8 | Train Loss: 0.5875
Validation Accuracy: 53.50%


Epoch 9: 100%|██████████| 25/25 [00:02<00:00, 11.40it/s]


Epoch 9 | Train Loss: 0.6005
Validation Accuracy: 50.00%


Epoch 10: 100%|██████████| 25/25 [00:02<00:00, 11.23it/s]


Epoch 10 | Train Loss: 0.5988
Validation Accuracy: 50.50%


## 8. Evaluation and ONNX Export

In [8]:
from sklearn.metrics import classification_report

all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs = imgs.to(device)
        labels = labels.to(device).float().squeeze()
        outputs = model(imgs)
        preds = (torch.sigmoid(outputs).squeeze() > 0.5).float()

        if preds.dim() != labels.dim():
            labels = labels.squeeze()

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds, digits=4))

# Export the trained model to ONNX format
output_onnx_path = "ecoq_classifier.onnx"
dummy_input = torch.randn(1, 3, 224, 224).to(device) # Example input: batch_size=1, channels=3, height=224, width=224

torch.onnx.export(model,                   # trained model
                   dummy_input,             # example input for tracing
                   output_onnx_path,        # where to save the ONNX model
                   export_params=True,      # store the trained parameter weights inside the model file
                   opset_version=11,        # the ONNX version to export the model to
                   do_constant_folding=True, # whether to execute constant folding for optimization
                   input_names = ['input'],   # the model's input names
                   output_names = ['output'], # the model's output names
                   dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                 'output' : {0 : 'batch_size'}})

print(f"Model successfully exported to {output_onnx_path}")

              precision    recall  f1-score   support

         0.0     0.8534    1.0000    0.9209        99
         1.0     1.0000    0.8317    0.9081       101

    accuracy                         0.9150       200
   macro avg     0.9267    0.9158    0.9145       200
weighted avg     0.9275    0.9150    0.9145       200

Model successfully exported to ecoq_classifier.onnx
