### Training Model

In [1]:
from dataset_generator import *
from model import *
import os
import torch
import copy
import time
import numpy as np
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from skimage import io, img_as_float
import albumentations as A
import cv2
from albumentations.pytorch import ToTensorV2
from collections import defaultdict
import random
#-----------------------
from const import VOCABS
from Font import Font
from WordGenerator import WordGenerator
from WordRenderer import WordRenderer
#-----------------------
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [2]:
class VisualAugmenterNothing(IVisualAugmenter):
    def augment(self, img: np.array, *args, **kwrgs) -> np.array:
        return np.array(img).astype(np.uint8)

#### Function for creating fotns dict. Dict's key is font name without modifications(without bold, italic and e.t) 

In [3]:
def create_fonts_dict(fonts_folder) -> Dict[str, Union[List[IFont], np.array]]:
    fonts_path = [f'{fonts_folder}/{fontname}' for fontname in os.listdir(fonts_folder)]
    fonts_dict = defaultdict(list)
    fonts = defaultdict(list)
    for i in range(len(fonts_path)):
        font = Font()
        font.load_font(fonts_path[i])
        font_name = os.path.basename(fonts_path[i])
        fonts_dict[font_name[:font_name.find('_')]].append(font)
    for key in fonts_dict.keys():
        fonts[key] = [
            fonts_dict[key],
            np.ones(len(fonts_dict[key])) / len(fonts_dict[key])
        ]
    return fonts

#### Random seed for torch workers

In [4]:
def worker_init_fn(worker_id):
    torch_seed = torch.initial_seed()
    if torch_seed >= 2**30:  # make sure torch_seed + workder_id < 2**32
        torch_seed = torch_seed % 2**30
    random.seed(torch_seed + worker_id)
    np.random.seed(torch_seed + worker_id)

#### Training finction

In [5]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = torch.tensor([0.])
                    loss = loss.to(device)
                    for i in range(outputs.shape[0]):
                        loss += criterion(outputs[i], labels[i])#C
                        
                    _, preds = torch.max(outputs, 2)
                    #print("Preds is: ",preds)
                    #print("Labels is :", labels)
                        
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) / 32

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                pass
                val_acc_history.append(epoch_acc.detach().cpu().clone().numpy())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    
    model.load_state_dict(best_model_wts)
    return model, val_acc_history    

In [6]:
%cd ..
%cd DataGenerator/fonts

/home/ivan/pdfparser-3
/home/ivan/pdfparser-3/DataGenerator/fonts


In [7]:
vocab = VOCABS['french']
fonts_folder = '/home/ivan/pdfparser-3/DataGenerator/fonts'
fonts = create_fonts_dict(fonts_folder)
word_generator = WordGenerator()
word_generator.word_storage_load('/home/ivan/pdfparser-3/DataGenerator/word_storage/wiki_qa_corpus/wiki_qa.wl',vocab)
word_renderer = WordRenderer()
text_generator = SequenceGenerator(
    fonts= fonts,#(fonts, np.ones((len(fonts))) / len(fonts)),
    font_sizes= ([x for x in range(10, 20)], np.ones(10) / 10),
    augmenters= [VisualAugmenterNothing()],
    word_generator= word_generator,
    word_renderer= word_renderer
)

In [8]:
input_size = 224
train_transform = A.Compose([
    A.ShiftScaleRotate(shift_limit = 0.05, scale_limit = 0.05, rotate_limit = 0.05, p = 0.5),
    A.SmallestMaxSize(max_size=input_size),
    #A.RandomCrop(height=input_size, width=input_size),
    A.RGBShift(r_shift_limit=0.7, g_shift_limit=0.7, b_shift_limit=0.7, p = 0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

dataset_gen = DatasetGenerator(text_generator, train_transform)

val_transform = A.Compose([
        A.SmallestMaxSize(max_size=input_size),
        #A.CenterCrop(height=input_size, width=input_size),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 
        ToTensorV2(),
])

val_dataset_gen = DatasetGenerator(text_generator, val_transform)

train_dataloader = DataLoader(dataset_gen, batch_size=4,worker_init_fn=worker_init_fn)
val_dataloader = DataLoader(val_dataset_gen, batch_size=4, worker_init_fn=worker_init_fn)

dataloaders_dict = {'train' : train_dataloader,
                  'val': val_dataloader
}

model = BoldClassifier() 

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
model = model.to(device).float()
model, history= train_model(model,
                              dataloaders_dict, 
                              criterion,
                              optimizer,
                              num_epochs=5
)

Epoch 0/4
----------
train Loss: 1.6329 Acc: 0.8413
val Loss: 9.6473 Acc: 0.7329

Epoch 1/4
----------
train Loss: 0.5553 Acc: 0.9458
val Loss: 0.5959 Acc: 0.9546

Epoch 2/4
----------
train Loss: 0.3621 Acc: 0.9692
val Loss: 0.8502 Acc: 0.9219

Epoch 3/4
----------
train Loss: 0.3136 Acc: 0.9736
val Loss: 0.8784 Acc: 0.9512

Epoch 4/4
----------
train Loss: 0.2378 Acc: 0.9800
val Loss: 0.9507 Acc: 0.9258

Training complete in 9m 2s
Best val Acc: 0.954590


In [13]:
#text_generator.generate_sequence(32)
%cd ..
%cd ..
%cd BoldClassifier

/home/ivan/pdfparser-3/DataGenerator
/home/ivan/pdfparser-3
/home/ivan/pdfparser-3/BoldClassifier


In [11]:
#torch.save(model.state_dict(), 'model.pth')