Data Setup

In [29]:
import shutil
import os
from pathlib import Path
import zipfile as zipfile


# Destination directories for storing the downloaded data
supervised_dest_dir = 'supervised'
unsupervised_synthetic_dest_dir = 'unsupervised_synthetic'
unsupervised_dest_dir = 'unsupervised'

# Create destination directories if they don't exist
os.makedirs(supervised_dest_dir, exist_ok=True)
os.makedirs(unsupervised_synthetic_dest_dir, exist_ok=True)
os.makedirs(unsupervised_dest_dir, exist_ok=True)

# Function to download a folder from Google Drive
def download_folder(zip_name,destination):

    with zipfile.ZipFile(zip_name, 'r') as zip_ref:
        zip_ref.extractall(destination)

    # Move the contents of the extracted folder to the destination
    extracted_folder = os.path.join(destination, Path(zip_name).stem)
    for item in os.listdir(extracted_folder):
        s = os.path.join(extracted_folder, item)
        d = os.path.join(destination, item)
        if os.path.isdir(s):
            shutil.move(s, d)
        else:
            shutil.copy2(s, d)

    # Clean up temporary files
    #os.remove(zip_name)
    shutil.rmtree(extracted_folder)

# Download and organize the supervised dataset
#download_folder('supervised_data.zip', supervised_dest_dir)

# Download and organize the unsupervised dataset
download_folder("unsupervised_data.zip", unsupervised_dest_dir)

#download_folder("unsupervised_synthetic_data.zip", unsupervised_synthetic_dest_dir)


In [35]:
import os
import hashlib
import shutil

def get_file_checksum(file_path):
    """Calculate the checksum of a file."""
    sha256_hash = hashlib.sha256()
    with open(file_path, "rb") as f:
        # Read and update hash string value in blocks of 4K
        for byte_block in iter(lambda: f.read(4096), b""):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()

def remove_duplicate_images(folder_path):
    """Remove duplicate images in a folder."""
    # Dictionary to store checksums and corresponding file paths
    checksums = {}

    # List all files in the folder
    files = os.listdir(folder_path)

    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        # Check if the file is a regular file and not a directory
        if os.path.isfile(file_path):
            # Calculate the checksum of the file
            checksum = get_file_checksum(file_path)

            # Check if the checksum is already in the dictionary
            if checksum in checksums:
                # If a duplicate is found, remove the file
                print(f"Removing duplicate: {file_path}")
                os.remove(file_path)
            else:
                # Add the checksum to the dictionary
                checksums[checksum] = file_path

def rename_files(folder_path):
    """Rename files with '(1)' in their names."""
    # List all files in the folder
    files = os.listdir(folder_path)

    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        # Check if the file is a regular file and not a directory
        if os.path.isfile(file_path):
            # Check if the file name contains '(1)'
            if '(1)' in file_name:
                # Rename the file by removing '(1)'
                new_file_name = file_name.replace('(1)', '')
                new_file_path = os.path.join(folder_path, new_file_name)
                os.rename(file_path, new_file_path)
                print(f"Renamed: {file_path} to {new_file_path}")

if __name__ == "__main__":
    folder_path = "unsupervised_synthetic/times_unsupervised_images/"
    remove_duplicate_images(folder_path)
    rename_files(folder_path)


Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-918.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-919.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-920.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-921.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-922.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-923.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-924.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-925.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-926.jpg
Removing duplicate: unsupervised_synthetic/times_unsupervised_images/times-927.jpg
Renamed: unsupervised_synthetic/times_unsupervised_images/times-918(1).jpg to unsupervised_synthetic/times_unsupervised_images/times-918.jpg
Renamed: unsupervised_synthet

In [12]:
import torch
from torch import nn

Dataset builder- Supervised

In [28]:
import os
import csv
from torch.utils.data import Dataset, DataLoader
from PIL import Image

class SupervisedDataset(Dataset):
    def __init__(self, root_dir, labels_path):
        self.root_dir = root_dir
        self.labels_path = labels_path
        self.data = []
        with open(labels_path, newline='') as labels_file:
            labels_reader = csv.reader(labels_file)
            for row in labels_reader:
                self.data.append(row) # a list of [filename, text in image]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data[idx][0])
        image = Image.open(img_path).convert("RGB")
        label = self.data[idx][1]

        return image, label

# Set your root directory
root_dir = 'supervised'
#subfolder = 'supervised_data'

# Create datasets for each model
models = ['arial', 'bradhitc', 'century_schoolbook', 'comic', 'cour', 'papyrus', 'times']
train_datasets, val_datasets = [], []

for model in models:
    model_dir = os.path.join(root_dir, f"{model}_images")
    labels_path = os.path.join(root_dir, f"{model}.csv")
    all_data = SupervisedDataset(model_dir, labels.path)
    
    # Split data into training and validation sets
    train_size = 999
    val_size = 100
    train_data, val_data = torch.utils.data.random_split(all_data, [train_size, val_size])

    train_datasets.append(train_data)
    val_datasets.append(val_data)

# Create DataLoaders
train_loaders = [DataLoader(dataset, batch_size=32, shuffle=True) for dataset in train_datasets]
val_loaders = [DataLoader(dataset, batch_size=32, shuffle=False) for dataset in val_datasets]

Dataset builder - Unsupervised

In [36]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os

class CustomFontDataset(Dataset):
    def __init__(self, root, font_name, transform=None):
        self.root = root
        self.font_name = font_name
        self.transform = transform
        self.unsupervised_data = self.load_unsupervised_data()
        self.synthetic_data = self.load_synthetic_data()

    def load_unsupervised_data(self):
        unsupervised_path = os.path.join(self.root, 'unsupervised')
        unsupervised_images = [os.path.join(unsupervised_path, img) for img in os.listdir(unsupervised_path)]
        return unsupervised_images

    def load_synthetic_data(self):
        synthetic_path = os.path.join(self.root, f'unsupervised_synthetic/{self.font_name}_unsupervised_images')
        synthetic_images = [os.path.join(synthetic_path, img) for img in os.listdir(synthetic_path)]
        return synthetic_images

    def __len__(self):
        return len(self.unsupervised_data) + len(self.synthetic_data)

    def __getitem__(self, index):
        if index < len(self.unsupervised_data):
            img_path = self.unsupervised_data[index]
            label = 0  # You can set the label for unsupervised data to 0 or any other value
        else:
            adjusted_index = index - len(self.unsupervised_data)
            img_path = self.synthetic_data[adjusted_index]
            label = 1  # You can set the label for synthetic data to 1 or any other value

        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

# Define the transform (you can customize this based on your needs)
transform = transforms.Compose([transforms.ToTensor()])

# Set root to the current working directory
current_working_directory = os.getcwd()




# Create datasets and dataloaders for each font
dataloaders = {}
for font_name in models:
    font_dataset = CustomFontDataset(root=current_working_directory, font_name=font_name, transform=transform)
    font_dataloader = DataLoader(font_dataset, batch_size=32, shuffle=True)
    dataloaders[font_name] = font_dataloader

# Access each dataloader as needed
for font_name, dataloader in dataloaders.items():
    print(f"Font: {font_name}, Number of samples: {len(dataloader.dataset)}")


Font: arial, Number of samples: 5918
Font: bradhitc, Number of samples: 5918
Font: century_schoolbook, Number of samples: 5918
Font: comic, Number of samples: 5918
Font: cour, Number of samples: 5918
Font: papyrus, Number of samples: 5918
Font: times, Number of samples: 5918


In [None]:
# Stacked Convolutional Auto-Encoder (the unsupervised sub-network)
class SCAE(nn.Module):
  def __init__(self, num_channels):
    super().__init__()

    self.conv1 = nn.Conv2d(
        in_channels=num_channels,
        out_channels=64,
        kernel_size=11,
        padding=1,
        stride=2
    )
    self.conv2 = nn.Conv2d(
        in_channels=64,
        out_channels=128,
        kernel_size=5,
        padding=2
    )
    self.deconv1 = nn.ConvTranspose2d(
        in_channels = 128,
        out_channels = 64,
        kernel_size = 5,
        padding = 2
    )
    self.deconv2 = nn.ConvTranspose2d(
        in_channels=64,
        out_channels=1,
        kernel_size=11,
        padding=1,
        stride=2
    )
    self.maxpool = nn.MaxPool2d(2, return_indices=True)
    self.unpool = nn.MaxUnpool2d(2)
    self.relu = nn.ReLU()

  def forward(self, x):
    self.conv1(x)
    self.maxpool(x)
    self.relu(x)

    self.conv2(x)
    self.relu(x)

    self.deconv1(x)
    self.unpool(x)
    self.relu(x)

    self.deconv2(x)
    self.relu(x)

    return x

In [None]:
class DeepFont(nn.Module):
  def __init__(self, num_channels, num_classes):
    super().__init__()
    
    self.conv1 = nn.Conv2d(
        in_channels=num_channels,
        out_channels=64,
        kernel_size=11,
        padding=1,
        stride=2
    )
    self.conv2 = nn.Conv2d(
        in_channels=64,
        out_channels=128,
        kernel_size=5,
        padding=2
    )
    self.conv3 = nn.Conv2d(
        in_channels=128,
        out_channels=256,
        kernel_size=3,
        padding=1
    )
    self.conv4 = nn.Conv2d(
        in_channels=256,
        out_channels=256,
        kernel_size=3,
        padding=1
    )
    self.conv5 = self.conv4
    self.fc6 = nn.Linear(in_features=12*12*256, out_features=4096) # assuming input image size of 105. change in_feats for different sample size
    self.fc7 = nn.Linear(in_features=4096, out_features=4096)
    self.fc8 = nn.Linear(in_features=4096, out_features=num_classes)
    self.norm1 = nn.BatchNorm2d(num_features=64)
    self.norm2 = nn.BatchNorm2d(num_features=128)
    self.dropout = nn.Dropout(0.5)
    self.maxpool = nn.MaxPool2d(2)
    self.relu = nn.ReLU()
    self.flatten = nn.Flatten()
    self.softmax = nn.Softmax()

  def forward(self, x):
    self.conv1(x)
    self.norm1(x)
    self.maxpool(x)
    self.relu(x)

    self.conv2(x)
    self.norm2(x)
    self.maxpool(x)
    self.relu(x)

    self.conv3(x)
    self.relu(x)

    self.conv4(x)
    self.relu(x)

    self.conv5(x)
    self.relu(x)

    self.flatten(x)

    self.dropout(self.fc6(x))
    self.relu(x)

    self.dropout(self.fc7(x))
    self.relu(x)

    self.fc8(x)
    self.relu(x)

    self.softmax(x)

    return x

In [None]:
def training_unsupervised(model, dataloader, criterion, optimizer, device, epochs, model_path):
    model = model.to(device)
    model.train()
    best_loss = torch.inf
    for epoch in range(epochs):
        total_loss = 0
        for batch_index, (images, _) in enumerate(loader):
            optimizer.zero_grad()
            images = images.to(device)
            outputs = model(images)
            loss = criterion(outputs, images)
            total_loss += loss.item() * images.size(0)
            loss.backward()
            optimizer.step()
        avg_loss = total_loss / (batch_index+1)
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), model_path)

In [None]:
# Supervised model training function adapted from code provided by Dr. Farhad Maleki in MNISTFasion_CNN.ipynb
def training_supervised(model, train_loader, val_loader, criterion, optimizer, device,
             epochs, best_model_path):
    model = model.to(device)
    model.train()
    best_loss = torch.inf
    best_results = None
    for epoch in range(epochs):
        total_loss = 0
        total  = 0
        correct = 0
        for batch_index, (images, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * images.size(0)
            total += images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
        accuracy = correct / total
        loss = total_loss / total
        print(f'{epoch:<4} Train Accuracy={accuracy:<10.4f}  Loss= {loss:<10.4f}')
        results = evaluation(model, val_loader, criterion, device)
        if results['loss'] < best_loss:
            torch.save(model, best_model_path)
            best_loss = results['loss']
            best_results = results
        print()
    return best_results

In [None]:
import torch.nn as nn
import torch.optim as optim

models_dir = 'models'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train the unsupervised sub-network
# Hyperparameters
learning_rate = 0.01
momentum = 0.9
weight_decay = 5e-4
epochs = 10

scae_model = SCAE(num_channels=3)
criterion = nn.MSELoss()
optimizer = optim.SGD(scae_model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

for font_name, dataloader in dataloaders.items():
    model_path = os.path.join(models_dir, f"{font_name}_SCAE.pt")
    training_unsupervised(scae_model, dataloader, criterion, optimizer, device, epochs, model_path)

# Train the supervised sub-network
supervised_model = DeepFont(num_channels=3, num_classes=1099) # each image contains a unique word, so each is its own class
for i in range(len(models)):
    font_name = models[i]

    # Import the convolutional layers of the SCAE as conv1 and conv2
    scae_path = os.path.join(models_dir, f"{font_name}_SCAE.pt")
    supervised_model.load_state_dict(torch.load(scae_path), strict=False)
    train_loader = train_loaders[i]
    val_loader = val_loaders[i]
    best_model_path = os.path.join(models_dir, f"{font_name}_model.pt")
    best_results = training_supervised(model, train_loader, val_loader, criterion, optimizer, device, epochs, best_model_path)