In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np
import time
import math
from copy import deepcopy # Add Deepcopy for args
import seaborn as sns 
import matplotlib.pyplot as plt
from torchvision.models import resnet50

Training

In [None]:
"""
Hyperparameters
"""
num_epochs = 10
batch_size_train = 100
batch_size_test = 1000
learning_rate = 0.005
momentum = 0.5
log_interval = 500

In [None]:
"""
Loading data from EMNIST
"""
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.EMNIST('/files/', split='letters', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.RandomPerspective(), 
                               torchvision.transforms.RandomRotation(10, fill=(0,)), 
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.EMNIST('/files/', split='letters', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [None]:
"""
Printing example data. First data is how the processed image will look like
"""
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)


print(example_data.shape)


In [None]:
fig = plt.figure(figsize=(15, 9))
ex = example_data[0][0].numpy()
ex1 = example_data[1][0].numpy()
diff = set()
sum = 0
for i in range(28):
  for j in range(28):
    if ex[i, j] < 2:
      ex[i, j] = -1
    else:
      ex[i, j] = 1
plt.imshow(torch.from_numpy(ex), cmap='gray', interpolation='none')

for i in range(6):
    plt.subplot(2,3,i+1)
    plt.tight_layout()
    plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
    plt.title("Ground Truth: {}".format(example_targets[i]))
plt.show()

In [None]:
class VGG(nn.Module):  
    """
    Based on - https://github.com/kkweon/mnist-competition
    from: https://github.com/ranihorev/Kuzushiji_MNIST/blob/master/KujuMNIST.ipynb
    """
    def two_conv_pool(self, in_channels, f1, f2):
        s = nn.Sequential(
            nn.Conv2d(in_channels, f1, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f1),
            nn.ReLU(inplace=True),
            nn.Conv2d(f1, f2, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        for m in s.children():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        return s
    
    def three_conv_pool(self,in_channels, f1, f2, f3):
        s = nn.Sequential(
            nn.Conv2d(in_channels, f1, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f1),
            nn.ReLU(inplace=True),
            nn.Conv2d(f1, f2, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f2),
            nn.ReLU(inplace=True),
            nn.Conv2d(f2, f3, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        for m in s.children():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        return s
        
    
    def __init__(self, num_classes=62):
        super(VGG, self).__init__()
        self.l1 = self.two_conv_pool(1, 64, 64)
        self.l2 = self.two_conv_pool(64, 128, 128)
        self.l3 = self.three_conv_pool(128, 256, 256, 256)
        self.l4 = self.three_conv_pool(256, 256, 256, 256)
        
        self.classifier = nn.Sequential(
            nn.Dropout(p = 0.5),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p = 0.5),
            nn.Linear(512, num_classes),
        )
    
    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = self.l4(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return F.log_softmax(x, dim=1)

In [None]:
Half_width =128
layer_width =128

In [None]:
device = 'cuda' 

In [None]:
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
total_step = len(train_loader)
curr_lr = learning_rate

model = VGG().to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [None]:
total_step = len(train_loader)

best_accuracy = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images
        labels = labels.to(device)

        images_np = images.numpy()
        for h in range(100):
            for k in range(28):
              for l in range(28):
                if images_np[h, 0, k, l] < 2:
                  images_np[h, 0, k, l] = -1
                else:
                  images_np[h, 0, k, l] = 1
        images = torch.from_numpy(images_np).to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i == 499:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


        
    # Test the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images
            labels = labels.to(device)
            
            images_np = images.numpy()
            """
            Image processing of EMNIST determined by experimentation
            """
            for h in range(100):
              for k in range(28):
                for l in range(28):
                  if images_np[h, 0, k, l] < 2:
                    images_np[h, 0, k, l] = -1
                  else:
                    images_np[h, 0, k, l] = 1
            images = torch.from_numpy(images_np).to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
        if best_accuracy>= correct / total:
            curr_lr = learning_rate*np.asscalar(pow(np.random.rand(1),3))
            update_lr(optimizer, curr_lr)
            print('Test Accuracy: {} % Best: {} %'.format(100 * correct / total, 100*best_accuracy))
        else:
            best_accuracy = correct / total
            net_opt = model
            print('Test Accuracy: {} % (improvement)'.format(100 * correct / total))

        
        model.train()

Main Code

In [None]:
from google.colab import files
from io import BytesIO
from PIL import Image
from scipy import misc
import imageio

In [None]:
uploaded = files.upload()

In [None]:
im = Image.open(BytesIO(uploaded['sample_letter2.PNG']))
im = im.resize((28, 28))
im.save('resized2.PNG')
img = imageio.imread('resized2.PNG')
img = np.dot(img[...,:3], [0.299, 0.587, 0.114])
plt.imshow(img, cmap = plt.get_cmap('gray'))
plt.show()

In [None]:
sum = 0
max = -100
for i in range(28):
  for j in range(28):
    if img[i, j] > max:
      max = img[i, j]
    sum += img[i, j]
for i in range(28):
  for j in range(28):
    img[i, j] = max - img[i, j]
for i in range(28):
  for j in range(28):
    img[i, j] = ((img[i, j]) - (max - (sum / 784))) / (max - (sum / 784))

for i in range(28):
  for j in range(28):
    if img[i, j] < 1:
      img[i, j] = -1
    else:
      img[i, j] = 1

for i in range(28):
  for j in range(28):
    num = 0
    if i > 0:
      if img[i-1, j] > 0:
        num+=1
    if i < 25:
      if img[i+1, j] > 0:
        num+=1
    if j > 0:
      if img[i, j-1] > 0:
        num += 1
    if j < 25:
      if img[i, j+1] > 0:
        num += 1
    if num == 0:
      img[i, j] = -1
    if i == 0 or j == 0 or i == 27 or j == 27 or i == 1 or j == 1 or i == 26 or j == 26:
      img[i, j] = -1


plt.imshow(img, cmap = plt.get_cmap('gray'))
plt.show()

In [None]:
img = img.astype(float)
t = torch.from_numpy(np.array([np.array([img])])).to(device)


In [None]:
model.eval()
with torch.no_grad():
  output = model(t.float())
  print(torch.max(output.data, 1))