<b>HW 6: Character classification using KNN with PyTorch

Author:
</b> Brian Erichsen Fagundes


In [69]:
# Step 1: Data acquision + clenup
import pandas as pd
import numpy as np

# loads data into variable
data = pd.read_csv('ARIAL.csv')

# selects which columns to keep m_label and all the r{x} c{y}
columns_to_keep = ['m_label']
columns_to_keep += [f'r{r}c{c}' for r in range(0, 20) for c in range(0, 20)]
filtered_data = data[columns_to_keep]

# funtion that transforms dataframe returns 2 numpy arrays
# x sample x 20 x 20 has pixel val, y #samples x 1 array has ascii for each char
def transform_data(data_frame):
    # extract the pixel val and normalize data
    # . values converts from pandas to numpy array
    Xs = data_frame[[f'r{r}c{c}' for r in range(0, 20) for c in range(0, 20)]].values
    # makes it samples x 20 x 20 D / 256.0
    Xs = Xs.reshape(-1, 20, 20) / 256.0

    # extrac the ascii value for each char
    Ys = data_frame['m_label'].values
    # makes samples# x 1 Dim
    Ys = Ys.reshape(-1, 1)

    return Xs, Ys

Xs, Ys = transform_data(filtered_data)

# dictionary for label conversion - using set (collection of unique elements)
unique_chars = sorted(set(filtered_data['m_label']))
# maps each char to unique index
char_to_index = {char: idx for idx, char in enumerate(unique_chars)}
# maps each index back to char
index_to_char = {idx: char for char, idx in char_to_index.items()}

# convert labels to indices
Ys = np.array([char_to_index[char] for char in Ys.flatten()])

<b>Step 2: Build a Pytorch network</b>

In [70]:
# Step 2: Build a Pytorch network where its archecture is
    # Convolution 2D layer (relu)
    # Max pooling layer
    # Convolution, another Max pooling
    # Dense layer (relu), dense layer
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import TensorDataset, DataLoader

# Tensor is numpy multi dim array
# Convert data to PyTorch tensors
Xs = torch.tensor(Xs, dtype=torch.float32).reshape(-1, 1, 20, 20) # between 0 and 1
Ys = torch.tensor(Ys, dtype=torch.long) # can be long int

# So we can iterate over batches
dataset = TensorDataset(Xs, Ys)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Network as a class with a constructor and forward method
class Net(nn.Module):
    def __init__(self):
        # parent class
        super(Net, self).__init__()
        # 1d input, 6 outputs and 3 x 3 pixels kernel filter
        self.conv1 = nn.Conv2d(1, 6, 3)
        # kernel size of 2, reduces spatial dim by half, with stride of 2 for 2x2 kernel
        self.pool = nn.MaxPool2d(2, 2)
        #conv1 output - ((input size - kernel size + 2 x Padding) / Stride)+1
        # 20 - 3 / 1 + 1 -- 18 x 18
        # after first pooling -- 9 x 9 size instead of 18 x 18
        # 6 from the 6 output layer in the 1st convolution layer
        self.conv2 = nn.Conv2d(6, 16, 3)
        # 9 - 3 / 1 + 1 -- 7 x 7
        # after second layer of pooling - 3 x 3
        # first dense layer has 16 * 3 * 3 input features and 120 neurons (output features)
        # after second pooling layer, we have 16 channels 3 x 3
        self.fc1 = nn.Linear(16 * 3 * 3, 120)
        #self.fc2 = nn.Linear(120, 84)
                            # (84, num of classes)
        self.fc3 = nn.Linear(120, len(unique_chars))

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))# conv1 -> relu -> max pool
        x = self.pool(F.relu(self.conv2(x)))# conv2 -> relu -> max pool
        x = x.view(-1, 16 * 3 * 3)# flattens the tensor back to 1 D
        x = F.relu(self.fc1(x)) # FC1 -> relu
        #x = F.relu(self.fc2(x)) # FC2 -> relu
        x = self.fc3(x) # last dense layer
        return x
    
    # Initialize model, loss function, and optimizer
# remember that cuda is using GPU with parallelism
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
net = Net().to(device)
# measures error for classification
criterion = nn.CrossEntropyLoss()
# uses ADAM optimizer to find the best weights
optmizer = optim.Adam(net.parameters(), lr=0.001)

# training function
def train(model,train_loader, optmizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            #inputs, labels = inputs.to(device), labels.to(device)
            # zero the param gradients
            optmizer.zero_grad()
            outputs = model(inputs) # predict the output with training data
            loss = criterion(outputs, labels) # see how well we did
            loss.backward() # see how to change weight to do better
            optmizer.step() # actually changes the weights
            running_loss += loss.item()
            # prints every 200 batch statistics
            if i % 200 == 199:
                print(f'Epoch [{epoch + 1}], Step [{i + 1}], Loss: {running_loss / 200:.4f}')
                running_loss = 0.0
    print('Finished Training')

train(net,train_loader, optmizer, criterion, 4)

Epoch [1], Step [200], Loss: 6.9383
Epoch [1], Step [400], Loss: 5.7744
Epoch [1], Step [600], Loss: 5.3444
Epoch [1], Step [800], Loss: 4.9814
Epoch [2], Step [200], Loss: 4.1703
Epoch [2], Step [400], Loss: 3.6756
Epoch [2], Step [600], Loss: 3.3063
Epoch [2], Step [800], Loss: 2.9941
Epoch [3], Step [200], Loss: 2.3600
Epoch [3], Step [400], Loss: 2.2110
Epoch [3], Step [600], Loss: 2.0563
Epoch [3], Step [800], Loss: 1.9921
Epoch [4], Step [200], Loss: 1.6616
Epoch [4], Step [400], Loss: 1.6788
Epoch [4], Step [600], Loss: 1.6410
Epoch [4], Step [800], Loss: 1.6299
Finished Training


In [71]:
# Evaluate function
import matplotlib.pyplot as plt

def evaluate(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            #images, labels = images.to(device), labels.to(device).view(-1)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the network: { 100 * correct / total:.2f}%')

evaluate(net)

Accuracy of the network: 60.80%


<b>Step 3: Exploration and Evaluation</b>

In [72]:
# Evaluate the network using cross validation
# (splitting data into training/testing). What is its accuracy?

from sklearn.model_selection import train_test_split
# random number is arbitrary
x_train, x_test, y_train, y_test = train_test_split(Xs, Ys, test_size=0.2, random_state=42)
train_data = TensorDataset(x_train, y_train)
test_data = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, drop_last=True)

# function to properly train NN and do a Evaluation with Cross-Validation
def validade_CV(model, test_dataset):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            #images, labels = images.to(device), labels.to(device).view(-1)
            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    test_loss /= len(test_dataset)
    accuracy = 100 * correct / total
    print(f'Validation Loss: {accuracy:.2f}%')

In [73]:
train(net,train_loader, optmizer, criterion , 4)
validade_CV(net, test_loader)

Epoch [1], Step [200], Loss: 1.4098
Epoch [1], Step [400], Loss: 1.4298
Epoch [1], Step [600], Loss: 1.4352
Epoch [2], Step [200], Loss: 1.2303
Epoch [2], Step [400], Loss: 1.2976
Epoch [2], Step [600], Loss: 1.2794
Epoch [3], Step [200], Loss: 1.1240
Epoch [3], Step [400], Loss: 1.1802
Epoch [3], Step [600], Loss: 1.2106
Epoch [4], Step [200], Loss: 1.0479
Epoch [4], Step [400], Loss: 1.1104
Epoch [4], Step [600], Loss: 1.1287
Finished Training
Validation Loss: 59.17%


In [74]:
# Lets create and train a different topology, adding more convolutiuon layers
class NetImproved(nn.Module):
    def __init__(self):
        super(NetImproved, self).__init__()
        a = 1 # solve for a ...
        # 1d input, 6 outputs and 3 x 3 pixels kernel filter
        c1Out = 6
        c2Out = 16
        c3Out = 32
        self.conv1 = nn.Conv2d(1, c1Out, 3)
        # convoluted layer 1 output -> 20 - 3 + 1 --18 x 18
        # first pooling layer -- 9 x 9
        self.conv2 = nn.Conv2d(c1Out, c2Out, 3)
        # convoluted layer 2 output -> 9 - 3 + 1 -- 7 x 7
        # second pooling layer -- 3 x 3
        self.pool = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(c2Out, c3Out, 3)
        # convoluted layer 3 output -> 3 - 3 + 1 -- 1
        self.pooledOutputSize = c3Out * a * a
        #self.fc1 = nn.Linear(self.pooledOutputSize, 120)
        self.fc1 = nn.Sequential(
            nn.Linear(self.pooledOutputSize, 120),
            nn.Dropout(0.5)
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, len(unique_chars)),
            nn.Dropout(0.5)
        )

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = x.view(-1, self.pooledOutputSize)
        x = F.relu(self. fc1(x))
        x = self.fc2(x)

        return x

net_improved = NetImproved().to(device)
optimizer_improved = optim.Adam(net_improved.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


In [75]:
train(net_improved,train_loader, optimizer_improved, criterion, 10)
validade_CV(net_improved, test_loader)
evaluate(net_improved)

Epoch [1], Step [200], Loss: 7.8146
Epoch [1], Step [400], Loss: 7.4588
Epoch [1], Step [600], Loss: 7.2730
Epoch [2], Step [200], Loss: 7.1072
Epoch [2], Step [400], Loss: 7.0128
Epoch [2], Step [600], Loss: 6.9022
Epoch [3], Step [200], Loss: 6.8987
Epoch [3], Step [400], Loss: 6.8536
Epoch [3], Step [600], Loss: 6.9197
Epoch [4], Step [200], Loss: 6.7854
Epoch [4], Step [400], Loss: 6.8172
Epoch [4], Step [600], Loss: 6.7597
Epoch [5], Step [200], Loss: 6.7439
Epoch [5], Step [400], Loss: 6.6930
Epoch [5], Step [600], Loss: 6.7581
Epoch [6], Step [200], Loss: 6.7291
Epoch [6], Step [400], Loss: 6.6647
Epoch [6], Step [600], Loss: 6.6543
Epoch [7], Step [200], Loss: 6.6839
Epoch [7], Step [400], Loss: 6.7037
Epoch [7], Step [600], Loss: 6.6192
Epoch [8], Step [200], Loss: 6.5805
Epoch [8], Step [400], Loss: 6.6784
Epoch [8], Step [600], Loss: 6.5894
Epoch [9], Step [200], Loss: 6.5938
Epoch [9], Step [400], Loss: 6.6070
Epoch [9], Step [600], Loss: 6.5617
Epoch [10], Step [200], Loss

I noticed that sometimes adding more layers can decrease the overall accuracy of the model.

In [76]:
# Lets try a different topology since the previous attempt yielded a low
# Here I used same number of convoluted layers and fully connected layers as well
# but added batch normalization for stabalize and accelerate training
# and dropout as well after each FC layer to prevent overfitting

# Network as a class with a constructor and forward method
class NetImproved2(nn.Module):
    def __init__(self):
        # parent class
        super(NetImproved2, self).__init__()
        # 1d input, 6 outputs and 3 x 3 pixels kernel filter
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.batch_norm1 = nn.BatchNorm2d(6)
        # kernel size of 2, reduces spatial dim by half, with stride of 2 for 2x2 kernel
        self.pool = nn.MaxPool2d(2, 2)
        #conv1 output - ((input size - kernel size + 2 x Padding) / Stride)+1
        # 20 - 3 / 1 + 1 -- 18 x 18
        # after first pooling -- 9 x 9 size instead of 18 x 18
        # 6 from the 6 output layer in the 1st convolution layer
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.batch_norm2 = nn.BatchNorm2d(16)
        # 9 - 3 / 1 + 1 -- 7 x 7
        # after second layer of pooling - 3 x 3
        # first dense layer has 16 * 3 * 3 input features and 120 neurons (output features)
        # after second pooling layer, we have 16 channels 3 x 3
        self.fc1 = nn.Linear(16 * 3 * 3, 120)
        #self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(120, len(unique_chars))
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.batch_norm1(self.conv1(x))))# conv1 -> relu -> max pool
        x = self.pool(F.relu(self.batch_norm2(self.conv2(x))))# conv2 -> relu -> max pool
        x = x.view(-1, 16 * 3 * 3)# flattens the tensor back to 1 D
        x = self.dropout(F.relu(self.fc1(x))) # FC1 -> relu
        #x = F.relu(self.fc2(x)) # FC2 -> relu
        x = self.fc3(x) # last dense layer
        return x

net_improved = NetImproved2().to(device)
optimizer_improved = optim.Adam(net_improved.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


In [77]:
train(net_improved,train_loader, optimizer_improved, criterion, 10)
validade_CV(net_improved, test_loader)
evaluate(net_improved)

Epoch [1], Step [200], Loss: 6.5024
Epoch [1], Step [400], Loss: 5.5669
Epoch [1], Step [600], Loss: 5.3243
Epoch [2], Step [200], Loss: 4.7789
Epoch [2], Step [400], Loss: 4.6169
Epoch [2], Step [600], Loss: 4.3285
Epoch [3], Step [200], Loss: 3.9455
Epoch [3], Step [400], Loss: 3.8206
Epoch [3], Step [600], Loss: 3.6748
Epoch [4], Step [200], Loss: 3.3406
Epoch [4], Step [400], Loss: 3.1526
Epoch [4], Step [600], Loss: 3.0366
Epoch [5], Step [200], Loss: 2.7670
Epoch [5], Step [400], Loss: 2.7337
Epoch [5], Step [600], Loss: 2.7113
Epoch [6], Step [200], Loss: 2.3823
Epoch [6], Step [400], Loss: 2.3738
Epoch [6], Step [600], Loss: 2.3875
Epoch [7], Step [200], Loss: 2.1171
Epoch [7], Step [400], Loss: 2.1198
Epoch [7], Step [600], Loss: 2.1720
Epoch [8], Step [200], Loss: 1.9933
Epoch [8], Step [400], Loss: 1.9430
Epoch [8], Step [600], Loss: 1.9560
Epoch [9], Step [200], Loss: 1.8002
Epoch [9], Step [400], Loss: 1.7667
Epoch [9], Step [600], Loss: 1.8112
Epoch [10], Step [200], Loss

By adding batch normalization for stabalize and accelerate training

and dropout as well after each fully connected layer to prevent overfitting we were

able to increase accuracy of the network to 81.03%.

In [78]:
# Lets try a slightly different topology using bigger kernel size for
# the convolution layers


# Network as a class with a constructor and forward method
class NetImproved3(nn.Module):
    def __init__(self):
        # parent class
        super(NetImproved3, self).__init__()
        # 1d input, 6 outputs and 5 x 5 pixels kernel filter
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.batch_norm1 = nn.BatchNorm2d(6)
        # kernel size of 2, reduces spatial dim by half, with stride of 2 for 2x2 kernel
        self.pool = nn.MaxPool2d(2, 2)
        #conv1 output - ((input size - kernel size + 2 x Padding) / Stride)+1
        # 20 - 5 / 1 + 1 -- 16 x 16
        # after first pooling -- 8 x 8 size instead of 16 x 16
        # 6 from the 6 output layer in the 1st convolution layer
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.batch_norm2 = nn.BatchNorm2d(16)
        # 8 - 5 / 1 + 1 -- 4 x 4
        # after second layer of pooling - 2 x 2
        # first dense layer has 16 * 3 * 3 input features and 120 neurons (output features)
        # after second pooling layer, we have 16 channels 3 x 3
        self.fc1 = nn.Linear(16 * 2 * 2, 120)
        #self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(120, len(unique_chars))
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.batch_norm1(self.conv1(x))))# conv1 -> relu -> max pool
        x = self.pool(F.relu(self.batch_norm2(self.conv2(x))))# conv2 -> relu -> max pool
        x = x.view(-1, 16 * 2 * 2)# flattens the tensor back to 1 D
        x = self.dropout(F.relu(self.fc1(x))) # FC1 -> relu
        #x = F.relu(self.fc2(x)) # FC2 -> relu
        x = self.fc3(x) # last dense layer
        return x

net_improved = NetImproved3().to(device)
optimizer_improved = optim.Adam(net_improved.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [79]:
train(net_improved,train_loader, optimizer_improved, criterion, 10)
validade_CV(net_improved, test_loader)
evaluate(net_improved)

Epoch [1], Step [200], Loss: 6.4814
Epoch [1], Step [400], Loss: 5.5625
Epoch [1], Step [600], Loss: 5.2458
Epoch [2], Step [200], Loss: 4.6857
Epoch [2], Step [400], Loss: 4.4841
Epoch [2], Step [600], Loss: 4.2978
Epoch [3], Step [200], Loss: 3.8488
Epoch [3], Step [400], Loss: 3.7360
Epoch [3], Step [600], Loss: 3.5587
Epoch [4], Step [200], Loss: 3.2072
Epoch [4], Step [400], Loss: 3.1462
Epoch [4], Step [600], Loss: 3.0437
Epoch [5], Step [200], Loss: 2.7302
Epoch [5], Step [400], Loss: 2.7330
Epoch [5], Step [600], Loss: 2.6570
Epoch [6], Step [200], Loss: 2.4078
Epoch [6], Step [400], Loss: 2.4030
Epoch [6], Step [600], Loss: 2.3576
Epoch [7], Step [200], Loss: 2.2048
Epoch [7], Step [400], Loss: 2.1345
Epoch [7], Step [600], Loss: 2.1486
Epoch [8], Step [200], Loss: 1.9463
Epoch [8], Step [400], Loss: 2.0055
Epoch [8], Step [600], Loss: 2.0360
Epoch [9], Step [200], Loss: 1.8452
Epoch [9], Step [400], Loss: 1.8752
Epoch [9], Step [600], Loss: 1.8392
Epoch [10], Step [200], Loss

Lets test the accuracy with char inputs from different front set and see how it performs

In [80]:
# Here we load ARIAL data into model and check accuracy for comparison

# loads data into variable
data = pd.read_csv('ARIAL.csv')

# selects which columns to keep m_label and all the r{x} c{y}
columns_to_keep = ['m_label']
columns_to_keep += [f'r{r}c{c}' for r in range(0, 20) for c in range(0, 20)]
filtered_data = data[columns_to_keep]

Xs, Ys = transform_data(filtered_data)

# convert labels to indices
Ys = np.array([char_to_index[char] for char in Ys.flatten()])

# Tensor is numpy multi dim array
# Convert data to PyTorch tensors
Xs = torch.tensor(Xs, dtype=torch.float32).reshape(-1, 1, 20, 20) # between 0 and 1
Ys = torch.tensor(Ys, dtype=torch.long) # can be long int

# random number is arbitrary
x_train, x_test, y_train, y_test = train_test_split(Xs, Ys, test_size=0.2, random_state=42)
train_data = TensorDataset(x_train, y_train)
test_data = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, drop_last=True)

    # Initialize model, loss function, and optimizer
# remember that cuda is using GPU with parallelism
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
net = Net().to(device)
# measures error for classification
criterion = nn.CrossEntropyLoss()
# uses ADAM optimizer to find the best weights
optimizer = optim.Adam(net.parameters(), lr=0.001)

train(net, train_loader, optimizer, criterion, 10)
validade_CV(net, test_loader)
evaluate(net)

Epoch [1], Step [200], Loss: 6.9750
Epoch [1], Step [400], Loss: 5.9262
Epoch [1], Step [600], Loss: 5.3206
Epoch [2], Step [200], Loss: 4.6045
Epoch [2], Step [400], Loss: 4.1982
Epoch [2], Step [600], Loss: 3.7747
Epoch [3], Step [200], Loss: 2.9396
Epoch [3], Step [400], Loss: 2.7455
Epoch [3], Step [600], Loss: 2.5312
Epoch [4], Step [200], Loss: 2.0059
Epoch [4], Step [400], Loss: 1.9819
Epoch [4], Step [600], Loss: 1.9337
Epoch [5], Step [200], Loss: 1.5943
Epoch [5], Step [400], Loss: 1.6361
Epoch [5], Step [600], Loss: 1.6479
Epoch [6], Step [200], Loss: 1.3833
Epoch [6], Step [400], Loss: 1.4287
Epoch [6], Step [600], Loss: 1.4282
Epoch [7], Step [200], Loss: 1.2718
Epoch [7], Step [400], Loss: 1.2640
Epoch [7], Step [600], Loss: 1.3043
Epoch [8], Step [200], Loss: 1.1324
Epoch [8], Step [400], Loss: 1.1955
Epoch [8], Step [600], Loss: 1.1993
Epoch [9], Step [200], Loss: 1.0510
Epoch [9], Step [400], Loss: 1.1292
Epoch [9], Step [600], Loss: 1.1314
Epoch [10], Step [200], Loss

Using the different font set TIMES instead of ARIAL, using the 1st topology of NN the accuracy of the model seemed to decrease accuracy slightly to when training with arial instead.

In [81]:
net_improved = NetImproved2().to(device)
optimizer_improved = optim.Adam(net_improved.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

train(net_improved,train_loader, optimizer_improved, criterion, 10)
validade_CV(net_improved, test_loader)
evaluate(net_improved)

net_improved = NetImproved3().to(device)
optimizer_improved = optim.Adam(net_improved.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

train(net_improved,train_loader, optimizer_improved, criterion, 10)
validade_CV(net_improved, test_loader)
evaluate(net_improved)

Epoch [1], Step [200], Loss: 6.5198
Epoch [1], Step [400], Loss: 5.6791
Epoch [1], Step [600], Loss: 5.3626
Epoch [2], Step [200], Loss: 4.8440
Epoch [2], Step [400], Loss: 4.6748
Epoch [2], Step [600], Loss: 4.3630
Epoch [3], Step [200], Loss: 4.0299
Epoch [3], Step [400], Loss: 3.9295
Epoch [3], Step [600], Loss: 3.7600
Epoch [4], Step [200], Loss: 3.4349
Epoch [4], Step [400], Loss: 3.3536
Epoch [4], Step [600], Loss: 3.2090
Epoch [5], Step [200], Loss: 3.0044
Epoch [5], Step [400], Loss: 2.8628
Epoch [5], Step [600], Loss: 2.8007
Epoch [6], Step [200], Loss: 2.6093
Epoch [6], Step [400], Loss: 2.5349
Epoch [6], Step [600], Loss: 2.5076
Epoch [7], Step [200], Loss: 2.3189
Epoch [7], Step [400], Loss: 2.3326
Epoch [7], Step [600], Loss: 2.2736
Epoch [8], Step [200], Loss: 2.0962
Epoch [8], Step [400], Loss: 2.0970
Epoch [8], Step [600], Loss: 2.0992
Epoch [9], Step [200], Loss: 1.9371
Epoch [9], Step [400], Loss: 1.9337
Epoch [9], Step [600], Loss: 1.9322
Epoch [10], Step [200], Loss

Lets try to see some results with training the NN with 2 different fonts.

In [83]:
# concatenate both files data
arial_data = pd.read_csv('ARIAL.csv')
times_data = pd.read_csv('TIMES.csv')

data = pd.concat([arial_data, times_data])

# selects which columns to keep m_label and all the r{x} c{y}
columns_to_keep = ['m_label']
columns_to_keep += [f'r{r}c{c}' for r in range(0, 20) for c in range(0, 20)]
filtered_data = data[columns_to_keep]

Xs, Ys = transform_data(filtered_data)
# convert labels to indices
Ys = np.array([char_to_index[char] for char in Ys.flatten()])

# Convert data to PyTorch tensors
Xs = torch.tensor(Xs, dtype=torch.float32).reshape(-1, 1, 20, 20) # between 0 and 1
Ys = torch.tensor(Ys, dtype=torch.long) # can be long int

x_train, x_test, y_train, y_test = train_test_split(Xs, Ys, test_size=0.2, random_state=42)
train_data = TensorDataset(x_train, y_train)
test_data = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, drop_last=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
net = Net().to(device)
# measures error for classification
criterion = nn.CrossEntropyLoss()
# uses ADAM optimizer to find the best weights
optimizer = optim.Adam(net.parameters(), lr=0.001)

train(net, train_loader, optimizer, criterion, 10)
validade_CV(net, test_loader)
evaluate(net)

Epoch [1], Step [200], Loss: 7.2909
Epoch [1], Step [400], Loss: 6.6004
Epoch [1], Step [600], Loss: 6.0636
Epoch [1], Step [800], Loss: 5.6946
Epoch [2], Step [200], Loss: 4.7245
Epoch [2], Step [400], Loss: 4.2984
Epoch [2], Step [600], Loss: 3.8616
Epoch [2], Step [800], Loss: 3.5163
Epoch [3], Step [200], Loss: 2.7176
Epoch [3], Step [400], Loss: 2.5537
Epoch [3], Step [600], Loss: 2.4847
Epoch [3], Step [800], Loss: 2.3618
Epoch [4], Step [200], Loss: 1.9630
Epoch [4], Step [400], Loss: 1.9675
Epoch [4], Step [600], Loss: 1.9750
Epoch [4], Step [800], Loss: 1.8956
Epoch [5], Step [200], Loss: 1.6597
Epoch [5], Step [400], Loss: 1.6433
Epoch [5], Step [600], Loss: 1.7062
Epoch [5], Step [800], Loss: 1.6699
Epoch [6], Step [200], Loss: 1.4613
Epoch [6], Step [400], Loss: 1.4835
Epoch [6], Step [600], Loss: 1.5076
Epoch [6], Step [800], Loss: 1.4733
Epoch [7], Step [200], Loss: 1.2882
Epoch [7], Step [400], Loss: 1.3369
Epoch [7], Step [600], Loss: 1.3553
Epoch [7], Step [800], Loss: