# Training a simple PyTorch classifier on the German Character Recognition Dataset
The following Jupyter-Notebook shows how to train a simple PyTorch classifier on the [German Character Recognition Dataset](https://www.kaggle.com/datasets/thomassedlmeyr/german-character-recognition-dataset). The trained network achieves an ACC- and MCC-value of roughly 0.99.   

First we define some global variables which are used for the whole training process

In [2]:
import torch
# Change the paths accordingly
path_train_csv = "../train.csv"
path_test_csv = "../test.csv"
# First we have to select the classes on which we would like to train on
classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
# All available classes which are contained in the dataset
#classes = ['!','$','&','(',')','+','0','1','2','3','4','5','6','7','8','9','<','>','?','A','B','C','D','E','F','G','H',
#           'I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i',
#           'j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','~','ß','α','β','π','φ','€','∑','√','∞',
#           '∫']
dict_classes_to_numbers = dict(zip(classes, range(len(classes))))
dict_numbers_to_classes = dict(zip(range(len(classes)), classes))
num_classes = len(classes)
print("Num classes: " + str(num_classes))
num_val_samples_per_class = 250
# Standard DL-parameters
batch_size_train = 128
batch_size_val = 256
num_workers = 2
lr = 0.001
hparams = {"num_epochs": 100, "early_stopping_patience": 5, "early_stopping_threshold": 0.001}
# For getting reproducible results
seed = 0
torch.manual_seed(seed)

Num classes: 10


<torch._C.Generator at 0x7f763da86830>

We define some helper functions for the training

Then we can define the train loop

In [3]:
import copy
from tqdm import tqdm
from torchvision import transforms

from train_utils import EpochInformation
from train_utils import EarlyStopper

def train_model(data_loaders, model, loss_func, optimizer, device):
    print("training started")
    num_epochs = hparams["num_epochs"]
    information = EpochInformation(model, device, num_classes, dataset_sizes)
    early_stopper = EarlyStopper(patience=hparams["early_stopping_patience"],
                             min_delta=hparams["early_stopping_threshold"],
                             model_weights=copy.deepcopy(model.state_dict()))
    strop_training = False
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        if strop_training == True:
            break
        # Each epoch has a training and validation phase
        for phase in ['val', 'train']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()  
            information.reset_metrics()
            
            if phase == 'train':
                print("training...")
            else:
                print("validating...")                
            data_loader = tqdm(data_loaders[phase])
            for inputs, labels in data_loader:
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = loss_func(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                information.update_metrics_for_batch(outputs, loss, inputs, labels)

            result_dict = information.calculate_metrics(phase)
            # prints the all metrics of the training and validation phase
            print(" ".join(name + ": " + str(round(value, 4)) for name, value in result_dict.items()))

            if phase == 'val':
                if early_stopper.early_stop(result_dict["mcc"], copy.deepcopy(model.state_dict())):
                    print('early stopping')
                    strop_training = True
    # load best model
    model.load_state_dict(early_stopper.best_model_weights) 
    return model

For loading the data we need some helper methods. As stated in the description of the dataset, the representation of each class of the train data set is the same for each class. We also want to make sure that the validation data has the same distribution as the test data, so we need a function which takes a certain amount of samples from each class of the train data set and puts them into the validation data set. To optimize the run time, we save the indices of the train and validation data set in a numpy array. This approach eliminates the need to regenerate the data split each time, thereby significantly reducing processing time.

In [4]:
import numpy as np
from random import random

from train_utils import get_train_and_val_set, get_class_counts_of_data_loader
from data_pytorch import GermanCharacterRecognitionDS

# We normalize with the men and std of the train set
standard_transforms = [transforms.ToTensor(),transforms.Normalize(35.37502147246886, 75.87412766890324)]
test_set = GermanCharacterRecognitionDS(path_test_csv, dict_classes_to_numbers, transform=transforms.Compose(standard_transforms), classes=classes,
                                        num_channels=1)
train_set = GermanCharacterRecognitionDS(path_train_csv, dict_classes_to_numbers, transform=None, classes=classes,
                                         num_channels=1)
num_train = len(train_set)
num_test = len(test_set)
print("sum whole ds: " + str(num_train + num_test))
# TODO comment the following line after the first run
train_set, val_set = get_train_and_val_set(train_set, classes, dict_numbers_to_classes)
# TODO uncomment this line if you want to use the precalculated indnum_val_samples_per_classices which speeds up the run time
#train_set, val_set = split_train_set_from_indices(train_set, np.load("train_indices.npy"), np.load("val_indices.npy"))

train_transforms = standard_transforms + [transforms.RandomRotation(30), transforms.RandomGrayscale(p=0.1), 
                                          transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0))]
train_set.dataset.transform = transforms.Compose(train_transforms)
val_set.dataset.transform = transforms.Compose(standard_transforms)

g = torch.Generator()
g.manual_seed(seed)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size_train, shuffle=True, num_workers=num_workers,                                                   generator=g)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size_val, shuffle=False, num_workers=num_workers,
                                         generator=g)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size_val, shuffle=False, num_workers=num_workers,
                                          generator=g)

class_counts_train = get_class_counts_of_data_loader(train_loader, classes, dict_numbers_to_classes)
class_counts_val = get_class_counts_of_data_loader(val_loader, classes, dict_numbers_to_classes)
class_counts_test = get_class_counts_of_data_loader(test_loader, classes, dict_numbers_to_classes)

print("train_loader: " + str(class_counts_train))
print("val_loader: " + str(class_counts_val))
print("test_loader: " + str(class_counts_test))

data_loaders = {"train": train_loader, "val": val_loader, "test": test_loader}
dataset_sizes = {"train": len(train_loader.dataset), "val": len(val_loader.dataset), "test": len(test_loader.dataset)}

sum whole ds: 49251
Splitting train- and val-data ...
Splitting done
train_loader: {'0': 4541, '1': 4203, '2': 4168, '3': 4120, '4': 4019, '5': 3966, '6': 4235, '7': 4161, '8': 4213, '9': 4125}
val_loader: {'0': 250, '1': 250, '2': 250, '3': 250, '4': 250, '5': 250, '6': 250, '7': 250, '8': 250, '9': 250}
test_loader: {'0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500}


We also calculate the class weights in order to use a weighted loss function

In [5]:
class_weights = []
number_train_values = len(train_loader.dataset)
for class_label in classes:
    weight = float(number_train_values) / class_counts_train[class_label]
    class_weights.append(weight)
class_weights = torch.tensor(class_weights)
sum_class_weights = torch.sum(class_weights)
class_weights = class_weights / sum_class_weights
print("class weights: ", str(dict(zip(classes, class_weights.tolist()))))

class weights:  {'0': 0.09183375537395477, '1': 0.09921891242265701, '2': 0.10005208849906921, '3': 0.10121773928403854, '4': 0.10376140475273132, '5': 0.10514803230762482, '6': 0.09846921265125275, '7': 0.10022040456533432, '8': 0.09898340702056885, '9': 0.101095050573349}


Building the model

In [7]:
from torchsummary import summary
from network_pytorch import PyTorchClassifier

model = PyTorchClassifier(len(classes))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
# print the model
summary(model, (1, 40, 40))

cuda:0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 40, 40]           1,184
         MaxPool2d-2           [-1, 32, 20, 20]               0
            Conv2d-3           [-1, 64, 20, 20]          32,832
         MaxPool2d-4           [-1, 64, 10, 10]               0
            Conv2d-5          [-1, 128, 10, 10]         131,200
         MaxPool2d-6            [-1, 128, 5, 5]               0
            Conv2d-7            [-1, 256, 5, 5]         131,328
            Conv2d-8            [-1, 256, 4, 4]         262,400
         MaxPool2d-9            [-1, 256, 2, 2]               0
          Dropout-10                 [-1, 1024]               0
           Linear-11                  [-1, 256]         262,400
          Dropout-12                  [-1, 256]               0
           Linear-13                   [-1, 10]           2,570
Total params: 823,914
Trainable 

Now we can start the training

In [8]:
class_weights = class_weights.to(device)
optimizer = torch.optim.NAdam(model.parameters(), lr=lr)
loss_func = torch.nn.CrossEntropyLoss(weight=class_weights)
model = train_model(data_loaders, model, loss_func, optimizer, device)

training started
Epoch 0/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 16.07it/s]


loss: 2.304 acc: 0.1 mcc: 0 auc: 0.4339
training...


100%|██████████| 327/327 [00:08<00:00, 39.32it/s]


loss: 0.3854 acc: 0.8749 mcc: 0.861 auc: 0.9908 l2_grad: 1.0043 l2_weights: 23.6107
Epoch 1/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 16.18it/s]


loss: 0.0679 acc: 0.9832 mcc: 0.9813 auc: 0.9997
training...


100%|██████████| 327/327 [00:08<00:00, 36.62it/s]


loss: 0.0774 acc: 0.9766 mcc: 0.974 auc: 0.9994 l2_grad: 4.4325 l2_weights: 25.8208
Epoch 2/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 15.97it/s]


loss: 0.4098 acc: 0.8956 mcc: 0.8891 auc: 0.9967
training...


100%|██████████| 327/327 [00:08<00:00, 37.45it/s]


loss: 0.0675 acc: 0.9802 mcc: 0.978 auc: 0.9995 l2_grad: 0.0764 l2_weights: 28.1896
Epoch 3/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 15.80it/s]


loss: 0.0651 acc: 0.986 mcc: 0.9845 auc: 0.9996
training...


100%|██████████| 327/327 [00:11<00:00, 28.83it/s]


loss: 0.0487 acc: 0.9866 mcc: 0.9851 auc: 0.9997 l2_grad: 0.3222 l2_weights: 29.9702
Epoch 4/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 11.55it/s]


loss: 0.0559 acc: 0.986 mcc: 0.9845 auc: 0.9997
training...


100%|██████████| 327/327 [00:12<00:00, 26.93it/s]


loss: 0.0429 acc: 0.9878 mcc: 0.9865 auc: 0.9998 l2_grad: 0.193 l2_weights: 31.8139
Epoch 5/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 11.31it/s]


loss: 0.0571 acc: 0.9856 mcc: 0.984 auc: 0.9998
training...


100%|██████████| 327/327 [00:12<00:00, 26.77it/s]


loss: 0.0387 acc: 0.9894 mcc: 0.9883 auc: 0.9998 l2_grad: 0.0021 l2_weights: 33.9051
Epoch 6/99
----------
validating...


100%|██████████| 10/10 [00:01<00:00,  7.69it/s]


loss: 0.057 acc: 0.9872 mcc: 0.9858 auc: 0.9998
training...


100%|██████████| 327/327 [00:11<00:00, 27.90it/s]


loss: 0.0322 acc: 0.991 mcc: 0.99 auc: 0.9998 l2_grad: 0.0222 l2_weights: 35.5847
Epoch 7/99
----------
validating...


100%|██████████| 10/10 [00:01<00:00,  8.94it/s]


loss: 0.0494 acc: 0.9876 mcc: 0.9862 auc: 0.9998
training...


100%|██████████| 327/327 [00:11<00:00, 28.76it/s]


loss: 0.0276 acc: 0.9911 mcc: 0.9902 auc: 0.9999 l2_grad: 0.0418 l2_weights: 37.5314
Epoch 8/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 13.80it/s]


loss: 0.0546 acc: 0.9888 mcc: 0.9876 auc: 0.9997
training...


100%|██████████| 327/327 [00:10<00:00, 31.54it/s]


loss: 0.0288 acc: 0.9917 mcc: 0.9908 auc: 0.9999 l2_grad: 0.0243 l2_weights: 39.6079
Epoch 9/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 12.44it/s]


loss: 0.0502 acc: 0.988 mcc: 0.9867 auc: 0.9998
training...


100%|██████████| 327/327 [00:10<00:00, 30.30it/s]


loss: 0.0251 acc: 0.9925 mcc: 0.9917 auc: 0.9999 l2_grad: 0.0147 l2_weights: 41.6525
Epoch 10/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 12.07it/s]


loss: 0.054 acc: 0.9868 mcc: 0.9853 auc: 0.9997
training...


100%|██████████| 327/327 [00:12<00:00, 26.68it/s]


loss: 0.0236 acc: 0.9926 mcc: 0.9918 auc: 0.9999 l2_grad: 0.0173 l2_weights: 43.5441
Epoch 11/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 10.17it/s]


loss: 0.073 acc: 0.988 mcc: 0.9867 auc: 0.9996
training...


100%|██████████| 327/327 [00:13<00:00, 24.96it/s]


loss: 0.0243 acc: 0.9928 mcc: 0.992 auc: 0.9999 l2_grad: 0.0728 l2_weights: 45.747
Epoch 12/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 10.59it/s]


loss: 0.0574 acc: 0.988 mcc: 0.9867 auc: 0.9998
training...


100%|██████████| 327/327 [00:10<00:00, 30.06it/s]


loss: 0.0224 acc: 0.9934 mcc: 0.9927 auc: 0.9999 l2_grad: 0.7372 l2_weights: 47.5216
Epoch 13/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 11.93it/s]


loss: 0.0693 acc: 0.9876 mcc: 0.9862 auc: 0.9997
training...


100%|██████████| 327/327 [00:13<00:00, 24.23it/s]


loss: 0.0202 acc: 0.994 mcc: 0.9934 auc: 1.0 l2_grad: 0.0013 l2_weights: 49.2705
Epoch 14/99
----------
validating...


100%|██████████| 10/10 [00:00<00:00, 11.74it/s]


loss: 0.0711 acc: 0.986 mcc: 0.9845 auc: 0.9996
early stopping
training...


100%|██████████| 327/327 [00:11<00:00, 27.49it/s]

loss: 0.0166 acc: 0.9949 mcc: 0.9943 auc: 1.0 l2_grad: 0.785 l2_weights: 50.8286
Epoch 15/99
----------





After the training we evaluate the model

In [9]:
information_test = EpochInformation(model, device, num_classes, dataset_sizes)
model.eval()
for inputs, labels in data_loaders["test"]:
    inputs = inputs.to(device, non_blocking=True)
    labels = labels.to(device, non_blocking=True)
    optimizer.zero_grad()
    with torch.set_grad_enabled(False):
        outputs = model(inputs)
        loss = loss_func(outputs, labels)
    information_test.update_metrics_for_batch(outputs, loss, inputs, labels)

result_dict = information_test.calculate_metrics("test")
print("Test metrics:")
print(" ".join(name + ": " + str(round(value, 4)) for name, value in result_dict.items()))

Test metrics:
loss: 0.0494 acc: 0.9888 mcc: 0.9876 auc: 0.9997
