Data Setup

Dataset builder- Supervised

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!unzip /content/drive/MyDrive/"CPSC599 Training Data"/unsupervised.zip

Archive:  /content/drive/MyDrive/CPSC599 Training Data/unsupervised.zip
replace unsupervised/Adding_noise2400.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
!unzip /content/drive/MyDrive/"CPSC599 Training Data"/unsupervised_synthetic.zip

In [None]:
!pip install keras-ocr

In [7]:
# Label all the data

import os
import keras_ocr

pipeline = keras_ocr.pipeline.Pipeline()

def process_images(image_dir, image_names):
    images = []
    for img in image_names:
        img_path = os.path.join(image_dir, img)
        image = keras_ocr.tools.read(img_path)
        images.append(image)
    return images

batch_size = 10
root_dir = "/content/unsupervised"
total_images = os.listdir(root_dir)
num_batches = (len(total_images) + batch_size - 1) // batch_size

images_real = []
labels_real = []

for batch_idx in range(num_batches):
    start_idx = batch_idx * batch_size
    end_idx = (batch_idx + 1) * batch_size
    batch_images = process_images(root_dir, total_images[start_idx:end_idx])

    # Process batch_images as needed
    batch_labels = pipeline.recognize(batch_images)
    images_real.extend(batch_images)
    labels_real.extend(batch_labels)

    del batch_images
    del batch_labels

root_dir = "/content/unsupervised_synthetic"
total_images = os.listdir(root_dir)
num_batches = (len(total_images) + batch_size - 1) // batch_size

images_synth = []
labels_synth = []

for batch_idx in range(num_batches):
    start_idx = batch_idx * batch_size
    end_idx = (batch_idx + 1) * batch_size
    batch_images = process_images(root_dir, total_images[start_idx:end_idx])

    # Process batch_images as needed
    batch_labels = pipeline.recognize(batch_images)
    images_synth.extend(batch_images)
    labels_synth.extend(batch_labels)

    del batch_images
    del batch_labels

images = images_real + images_synth
labels = labels_real + labels_synth

del images_real
del labels_real
del images_synth
del labels_synth

Looking for /root/.keras-ocr/craft_mlt_25k.h5
Downloading /root/.keras-ocr/craft_mlt_25k.h5


Instructions for updating:
Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.


Looking for /root/.keras-ocr/crnn_kurapan.h5


In [8]:
labels[0][0][0]

'u'

In [9]:
# Format labels list

import csv

chars_list = ['a','b','c','d','e','f','g','h','i','j','k','l','m',
              'n','o','p','q','r','s','t','u','v','w','x','y','z']

labels_list = []
null_indices = []

for i in range(len(labels)):
  if (labels[i] == []):
    null_indices.append(i)
  elif (labels[i][0][0].lower() not in chars_list):
    null_indices.append(i)
  else:
    labels_list.append(labels[i][0][0].lower())

with open('chars_labels.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=' ',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for item in labels_list:
      csvwriter.writerow([item])

with open('null_indices.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=' ',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for item in null_indices:
      csvwriter.writerow([item])

In [44]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(chars_list)

labels_list = le.transform(labels_list)

In [45]:
images_list = []

for i in range(len(images)):
  if (i not in null_indices):
    images_list.append(images[i])

In [46]:
# Split data into training, validation, and testing sets

from sklearn.model_selection import train_test_split

images_train, images_test, labels_train, labels_test = train_test_split(
    images_list, labels_list, test_size=0.2, random_state=42
)

images_train, images_val, labels_train, labels_val = train_test_split(
    images_train, labels_train, test_size=0.2, random_state=42
)

In [47]:
import torch
from torch import nn

In [70]:
import os
import csv
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms


class SupervisedDataset(Dataset):
    def __init__(self, images, labels, transform = None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label

# Define the transform
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256,256)),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
])

train_data = SupervisedDataset(images_train, labels_train, transform)
val_data = SupervisedDataset(images_val, labels_val, transform)

# Create DataLoaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

letter_counts = {chr(ord('a') + i): 0 for i in range(26)}

# Iterate through the dataset and update letter counts
for _, label in train_data:
    letter_counts[le.inverse_transform([label])[0]] += 1

# Print the counts for each letter
for letter, count in letter_counts.items():
    print(f"{letter}: {count}")

class_counts = torch.tensor([letter_counts[letter] for letter in sorted(letter_counts.keys())], dtype=torch.float32)

# Calculate class weights
total_samples = len(train_data)
class_weights = total_samples / (26 * class_counts)

# Print the calculated class weights
print("Class Weights:", class_weights)





a: 395
b: 298
c: 411
d: 446
e: 336
f: 310
g: 345
h: 369
i: 101
j: 165
k: 338
l: 337
m: 283
n: 414
o: 288
p: 339
q: 221
r: 287
s: 433
t: 431
u: 213
v: 207
w: 334
x: 247
y: 312
z: 253
Class Weights: tensor([0.7900, 1.0471, 0.7592, 0.6996, 0.9287, 1.0066, 0.9045, 0.8456, 3.0895,
        1.8911, 0.9232, 0.9259, 1.1026, 0.7537, 1.0835, 0.9205, 1.4119, 1.0872,
        0.7206, 0.7240, 1.4650, 1.5074, 0.9342, 1.2633, 1.0001, 1.2334])


In [71]:
class DeepFont(nn.Module):
  def __init__(self, num_channels, num_classes):
    super().__init__()

    # self.baby = nn.Linear(in_features=256*256*num_channels, out_features=num_classes)

    self.conv1 = nn.Conv2d(
        in_channels=num_channels,
        out_channels=64,
        kernel_size=11,
        padding=1,
        stride=2
    )
    self.conv2 = nn.Conv2d(
        in_channels=64,
        out_channels=128,
        kernel_size=5,
        padding=2
    )
    self.conv3 = nn.Conv2d(
        in_channels=128,
        out_channels=256,
        kernel_size=3,
        padding=1
    )
    self.conv4 = nn.Conv2d(
        in_channels=256,
        out_channels=256,
        kernel_size=3,
        padding=1
    )
    self.conv5 = nn.Conv2d(
        in_channels=256,
        out_channels=256,
        kernel_size=3,
        padding=1
    )
    self.fc6 = nn.Linear(in_features=31*31*256, out_features=4096) # assuming input image size of 256x256. change in_feats for different sample size
    self.fc7 = nn.Linear(in_features=4096, out_features=4096)
    self.fc8 = nn.Linear(in_features=4096, out_features=num_classes)
    self.norm1 = nn.BatchNorm2d(num_features=64)
    self.norm2 = nn.BatchNorm2d(num_features=128)
    self.dropout = nn.Dropout(0.5)
    self.maxpool = nn.MaxPool2d(2)
    self.relu = nn.ReLU()
    self.flatten = nn.Flatten()
    self.softmax = nn.Softmax()


  def forward(self, x):
    # x = self.flatten(x)
    # x = self.baby(x)

    x = self.conv1(x)
    x = self.norm1(x)
    x = self.maxpool(x)
    x = self.relu(x)

    x = self.conv2(x)
    x = self.norm2(x)
    x = self.maxpool(x)
    x = self.relu(x)

    x = self.conv3(x)
    x = self.relu(x)

    x = self.conv4(x)
    x = self.relu(x)

    x = self.conv5(x)
    x = self.relu(x)

    x = self.flatten(x)

    x = self.dropout(self.fc6(x))
    x = self.relu(x)

    x = self.dropout(self.fc7(x))
    x = self.relu(x)

    x = self.fc8(x)

    return self.softmax(x)

In [72]:
import torch
from torch.utils.data import DataLoader

def evaluation(model, dataloader, criterion, device, phase='Validation'):
    model.eval()
    predictions = []
    ground_truth = []

    true_positives = 0
    true_negatives = 0
    false_positives = 0
    false_negatives = 0

    misclassified_examples = []

    with torch.no_grad():
        total_loss = 0
        total_samples = 0

        for _, (images, labels) in enumerate(dataloader):
            images = images.to(device)
            labels_tensor = torch.tensor([label for label in labels])
            labels = labels_tensor.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            #print(loss)
            total_loss += loss.item() * images.size(0)
            total_samples += images.size(0)

            # Update multi-class metrics
            true_positives += (preds * labels).sum().item()
            true_negatives += ((1 - labels) * (1 - preds)).sum().item()
            false_positives += ((1 - labels) * preds).sum().item()
            false_negatives += (labels * (1 - preds)).sum().item()

            # Collect misclassified examples


            predictions.extend(preds.cpu().numpy())
            ground_truth.extend(labels.cpu().numpy())

        # Calculate multi-class metrics
        precision = true_positives / (true_positives + false_positives + 1e-10)
        recall = true_positives / (true_positives + false_negatives + 1e-10)
        f1_score = 2 * (precision * recall) / (precision + recall + 1e-10)

        accuracy = (true_positives + true_negatives) / (total_samples + 1e-10)
        loss = total_loss / total_samples

        print(f'{phase}\tF1-Score={f1_score:<10.4f}' +
              f'\t\tLoss= {loss:<10.4f}' +
              f'\t\tPrecision: {precision:<10.4f}' +
              f'\t\tRecall: {recall:<10.4f}' +
              f'\t\tAccuracy: {accuracy:<10.4f}')

        return {'loss': loss,
                'f1_score': f1_score,
                'precision': precision,
                'recall': recall,
                'accuracy': accuracy,
                'ground_truth': ground_truth,
                'predictions': predictions}

# Example usage:
# Replace 'your_model' and 'your_dataloader' with your actual model and dataloader
# Replace 'your_device' with 'cuda' or 'cpu' depending on your setup
# evaluation_results = evaluation(your_model, your_dataloader, criterion, your_device)
# misclassified_examples = evaluation_results['misclassified_examples']

In [73]:
from torch.optim import lr_scheduler

def training_supervised(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs, best_model_path):
    model = model.to(device)
    model.train()
    best_loss = torch.inf
    best_results = None
    # youre_on_thin_ice_buster = False
    #misclassified_examples = []

    for epoch in range(epochs):
        total_loss = 0
        total_samples = 0

        # New variables for multi-label metrics
        true_positives = 0
        true_negatives = 0
        false_positives = 0
        false_negatives = 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            images = images.to(device)
            labels_tensor = torch.tensor([label for label in labels])
            labels = labels_tensor.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()

            optimizer.step()

            total_loss += loss.item() * images.size(0)
            total_samples += images.size(0)

            _, preds = torch.max(outputs, 1)

            # Update multi-class metrics
            true_positives += (preds * labels).sum().item()
            true_negatives += ((1 - labels) * (1 - preds)).sum().item()
            false_positives += ((1 - labels) * preds).sum().item()
            false_negatives += (labels * (1 - preds)).sum().item()

        # this is outside of batch loop
        scheduler.step()

        # Calculate multi-class metrics
        precision = true_positives / (true_positives + false_positives + 1e-10)
        recall = true_positives / (true_positives + false_negatives + 1e-10)
        f1_score = 2 * (precision * recall) / (precision + recall + 1e-10)

        accuracy = (true_positives + true_negatives) / (total_samples + 1e-10)
        loss = total_loss / total_samples

        print(f'{epoch:<4}\tTrain\tF1-Score={f1_score:<10.4f}' +
              f'\t\tLoss= {loss:<10.4f}' +
              f'\t\tPrecision: {precision:<10.4f}' +
              f'\t\tRecall: {recall:<10.4f}' +
              f'\t\tAccuracy: {accuracy:<10.4f}')

        results = evaluation(model, val_loader, criterion, device)
        model.train()

        # early stopping:
        if results['loss'] < best_loss:             # we are still improving
            torch.save(model.state_dict(), best_model_path)
            best_loss = results['loss']
            best_results = results
        #     youre_on_thin_ice_buster = False
        # elif youre_on_thin_ice_buster:              # we didn't improve last time and we didn't improve this time
        #     break
        # else:                                       # we didn't improve this time, but it was the first time in a while
        #     youre_on_thin_ice_buster = True

        #if epoch == epochs - 1:  # Check if it's the last epoch
         # misclassified_examples = find_misclassified_examples(model, val_loader, device)
          #print("Misclassified Examples:")
          #for example in misclassified_examples:
           #   print(example)

        for name, param in model.named_parameters():
              if param.requires_grad and param.grad is not None:
               # print(f"Layer: {name}, Gradient Norm: {param.grad.norm().item()}")
               pass
        print()


    # Print misclassified examples after the last epoch

    # Print misclassified examples after the last epoch
    #print("Misclassified Examples:")
    #for example in misclassified_examples:
     #   print(example)

    return best_results

def find_misclassified_examples(model, data_loader, device):
    model.eval()
    misclassified_examples = []

    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels_tensor = torch.tensor([label for label in labels])
            labels = labels_tensor.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            misclassified_mask = (preds != labels)
            misclassified_indices = torch.nonzero(misclassified_mask).squeeze()

            for idx in misclassified_indices:
                # Append to misclassified examples without moving to CPU
                misclassified_examples.append({
                    'image': images[idx].clone(),  # Use clone to avoid modifying the original tensor
                    'predicted_label': preds[idx].clone(),
                    'true_label': labels[idx].clone()
                })

    return misclassified_examples

In [74]:
print(len(train_data))


8113


In [80]:
import torch.nn as nn
import torch.optim as optim

device = torch.device("cpu")

# Train the unsupervised sub-network IS NOT IN THIS NOTEBOOK ANYMORE GO SEE OTHER NOTEBOOK


# Train the supervised sub-network

# Hyperparameters
learning_rate = 0.01 # we don't have all day
momentum = 0.95
weight_decay = 1e-4
epochs = 16 # CHANGED BY XANNA
criterion = torch.nn.CrossEntropyLoss()


In [76]:
# Stops colab from breaking sometimes
# Only works sometimes
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [77]:
# define supervised model
supervised_model = DeepFont(
    num_channels=3, num_classes=26
)  # one class per letter (not case-sensitive)

# Freeze the convolutional layers from SCAE
for param in supervised_model.conv1.parameters():
    param.requires_grad = False
for param in supervised_model.conv2.parameters():
    param.requires_grad = False

# unfreeze layers                                   # something something when i removed coconunt.jpg the whole project broke and we don;t even wanna know if this is a coconut
for param in supervised_model.conv3.parameters():
    param.requires_grad = True
for param in supervised_model.conv4.parameters():
    param.requires_grad = True
for param in supervised_model.conv5.parameters():
    param.requires_grad = True
for param in supervised_model.fc6.parameters():
    param.requires_grad = True
for param in supervised_model.fc7.parameters():
    param.requires_grad = True
for param in supervised_model.fc8.parameters():
    param.requires_grad = True




In [78]:
# define optimizer and scheduler :) thank u xanna. ur welcome
optimizer = optim.SGD(supervised_model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [81]:
#torch.cuda.empty_cache()

# TODO: CHANGE THE i VALUE BEFORE YOU TRAIN (xanna 0 and 1; seher 2 and 3; quyanna 4 and 5; allison 6)
# AND ALSO RERUN THE PREVIOUS 2 CELLS (REDEFINE THE SUPERVISED MODEL)
best_model_path = "char_classifier.pt"
best_results = training_supervised(
    supervised_model,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    scheduler,
    device,
    epochs,
    best_model_path,
)


  return self._call_impl(*args, **kwargs)


0   	Train	F1-Score=8.7182    		Loss= 3.2540    		Precision: 11.9939   		Recall: 6.8480    		Accuracy: 146.1574  
Validation	F1-Score=3.4166    		Loss= 3.2540    		Precision: 11.7117   		Recall: 2.0000    		Accuracy: 34.1350   


RuntimeError: ignored

Create the test_dataloader

In [None]:
import os
from PIL import Image
from torchvision import transforms

#Define transforms for testing data
test_transform = transforms.Compose([
    transforms.ToTensor()
])

test_dataset = SupervisedDataset(images_test, labels_test, test_transform)

#Create dataloader for test dataset
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

#dataloader with shuffling enabled for visualization/testing, for debugging dataloader logic
vis_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

Test the test_loader

In [None]:
import matplotlib.pyplot as plt

def show_images(images, labels, num_images=4):
    plt.figure(figsize=(15, 10))
    for i in range(num_images):
        plt.subplot(1, num_images, i + 1)
        plt.imshow(images[i].numpy().transpose(1, 2, 0))  # Convert tensor to image format
        plt.title('Label: ' + labels[i])
        plt.axis('off')
    plt.show()

# Get a random batch of images and labels
for images, labels in vis_loader:
    text_labels = [le.inverse_transform([label])[0] for label in labels]
    show_images(images, text_labels)
    break  # Display only the first batch




In [None]:
# Testing our models
import torch.nn as nn
criterion = nn.CrossEntropyLoss()
device = torch.device("cpu")

# ...

model_path = "char_classifier.pt"

# Create an instance of your model
model = DeepFont(num_channels=3, num_classes=26)

# Load the state dictionary into the model
model.load_state_dict(torch.load(model_path, map_location=device))

results = evaluation(model, test_loader, criterion, device, 'Test')