### Code for model

In [1]:
from generator import RenderedTextGenerator
from model import *
import os
import torch
import copy
import time
import numpy as np
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from skimage import io, img_as_float
import albumentations as A
import cv2
from albumentations.pytorch import ToTensorV2
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [2]:
class DatasetGenerator(Dataset):
    def __init__(self, text_generator, transform=None, num_words : int = 8):
        super(DatasetGenerator, self).__init__()
        self.text_generator = text_generator
        self.transform = transform
        self.num_words = num_words
        
    def __len__(self):
        return 124
    
    def __getitem__(self, idx):
        imgs, labels = self.text_generator.render_text()
        
        if self.transform is not None:
            return torch.stack(
                [self.transform(image = imgs[i, :, :, :3])['image'].float() for i in range(self.num_words)]), torch.tensor(labels)
        
        return torch.tensor(imgs), torch.tensor(labels)
    
def worker_init_fn(worker_id):
    torch_seed = torch.initial_seed()
    if torch_seed >= 2**30:  # make sure torch_seed + workder_id < 2**32
        torch_seed = torch_seed % 2**30
    random.seed(torch_seed + worker_id)
    np.random.seed(torch_seed + worker_id)
    
    
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = torch.tensor([0.])
                    loss = loss.to(device)
                    for i in range(outputs.shape[0]):
                        loss += criterion(outputs[i], labels[i])#C
                        
                    _, preds = torch.max(outputs, 2)
                    #print(preds)
                        
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) / 8

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                pass
                val_acc_history.append(epoch_acc.detach().cpu().clone().numpy())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    
    model.load_state_dict(best_model_wts)
    return model, val_acc_history    

In [3]:
rtg = RenderedTextGenerator(batch_size=1, fontsize_range=(10, 22), fonts_folder='fonts/', img_shape=(224, 224),
                            fonttype='bold')

input_size = 224
train_transform = A.Compose([
    A.ShiftScaleRotate(shift_limit = 0.05, scale_limit = 0.05, rotate_limit = 0.05, p = 0.5),
    A.SmallestMaxSize(max_size=input_size),
    #A.RandomCrop(height=input_size, width=input_size),
    A.RGBShift(r_shift_limit=0.7, g_shift_limit=0.7, b_shift_limit=0.7, p = 0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

dataset_gen = DatasetGenerator(rtg, train_transform)

val_transform = A.Compose([
        A.SmallestMaxSize(max_size=input_size),
        #A.CenterCrop(height=input_size, width=input_size),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 
        ToTensorV2(),
])

val_dataset_gen = DatasetGenerator(rtg, val_transform)

train_dataloader = DataLoader(dataset_gen, batch_size=16,worker_init_fn=worker_init_fn)
val_dataloader = DataLoader(val_dataset_gen, batch_size=16, worker_init_fn=worker_init_fn)

dataloaders_dict = {'train' : train_dataloader,
                  'val': val_dataloader
}

model = BoldClassifier() 

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model = model.to(device).float()
model, history= train_model(model,
                              dataloaders_dict, 
                              criterion,
                              optimizer,
                              num_epochs=20
)

In [None]:
a = torch.tensor([[0., 100.]])
b = torch.tensor([1])
criterion(a, b)

In [4]:
#torch.save(model.state_dict(), 'model.pth')
model = BoldClassifier()
model.load_state_dict(torch.load('model.pth'))
model.eval()
model = model.to(device).float()

In [5]:
test_transform = A.Compose([
        A.SmallestMaxSize(max_size=input_size),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
])

test_dataset_gen = DatasetGenerator(rtg, test_transform)
test_dataloader = DataLoader(test_dataset_gen, batch_size=10, worker_init_fn=worker_init_fn)

for imgs, labels in test_dataloader:
    imgs = imgs.to(device)
    labels = labels.to(device)
    out = model(imgs)
    _, res = torch.max(out, dim = 2)
    print(torch.sum(res == labels).float() / (res.shape[0] * res.shape[1]))

tensor(1., device='cuda:0')
tensor(0.9625, device='cuda:0')
tensor(0.9750, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9625, device='cuda:0')
tensor(0.9875, device='cuda:0')
tensor(0.9625, device='cuda:0')
tensor(0.9750, device='cuda:0')
tensor(0.9750, device='cuda:0')
tensor(0.9875, device='cuda:0')
tensor(0.9875, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')


In [6]:
np.array([1,0.9625, 0.9750, 1, 0.9625, 0.9875, 0.9625, 0.9750, 0.9750, 0.9875,0.9875,1,1]).mean()


0.9826923076923079