In [37]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from imgaug import augmenters as iaa
import os
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
%matplotlib inline

In [3]:
augmentation = iaa.Sequential([iaa.WithHueAndSaturation(iaa.WithChannels(0, iaa.Add((-5, 5)))),
                              iaa.WithBrightnessChannels(iaa.Add((-30, 30))),
                              iaa.AdditiveGaussianNoise(scale=(0,50)),
                              iaa.Rotate((-8,8)),
                              iaa.Resize({"height":1600, "width":1200})])
augmentation_flipped = iaa.Sequential([iaa.WithHueAndSaturation(iaa.WithChannels(0, iaa.Add((-185, -175)))),
                              iaa.WithBrightnessChannels(iaa.Add((-30, 30))),
                              iaa.AdditiveGaussianNoise(scale=(0,50)),
                              iaa.Rotate((-8,8)),
                              iaa.Resize({"height":1600, "width":1200})])

In [None]:
for card in [file.split(".")[0] for file in os.listdir("Cards")]:
    print(card)
    img = Image.open("Cards/" + card + ".png", 'r').convert("RGB")
    img = np.array(img)
    for i in range(100):
        transform = Image.fromarray(augmentation(image=img))
        transform.save("AugmentedCards/" + str(card) + "_" + str(i) + ".png")
    for i in range(100):
        transform = Image.fromarray(augmentation_flipped(image=img))
        transform.save("AugmentedCards/" + str(card) + "_" + str(i) + "_" + "f" + ".png")

In [2]:
# 60/20/20 test/valid/train
cards = np.array(os.listdir("AugmentedCards"))
test_idx = np.random.choice(np.arange(len(cards)), int(len(cards) * 0.2), replace=False)
test = cards[test_idx]
cards_remaining = np.delete(cards, test_idx)
valid_idx = np.random.choice(np.arange(len(cards_remaining)), int(len(cards) * 0.2), replace=False)
valid = cards_remaining[valid_idx]
train = np.delete(cards_remaining, [valid_idx])

In [16]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 3 input image channels, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 26)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), kernal_size=2)
        # If the size is a square, you can specify with a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), kernal_size=2)
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.sigmoid(self.fc3(x))
        return x


net = Net()
print(net)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=26, bias=True)
)


In [58]:
net = nn.Sequential(
    nn.Conv2d(3, 6, 5),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(6,16,5),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(1886544, 120),  # 5*5 from image dimension
    nn.ReLU(),
    nn.Linear(120, 84),
    nn.ReLU(),
    nn.Linear(84, 30),
    nn.Softmax()
)
print(net)

Sequential(
  (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=1886544, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU()
  (11): Linear(in_features=84, out_features=30, bias=True)
  (12): Softmax(dim=None)
)


In [59]:
np.array(Image.open("AugmentedCards/" + "A_1.png", 'r').convert("RGB")).shape
letter_codes = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "H": 8, "I":9, "J":10, "K":11, "L": 12, "M":13, "N":14, "O":15, "P":16, "Q":17, "R":18, "S":19, "T":20, "U":21, "V":22, "W":23, "X":24, "Y":25, "Z":26, "TH":27, "IN":28, "CL":29, "QU":30}
def letter_to_code(l):
    return letter_codes[l]

In [74]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    images_per_batch = 2 # 6200 total images
    
    for batch in range(len(train) // images_per_batch):
        train_x = []
        train_y = []
        for card in train[batch * images_per_batch:(batch + 1) * images_per_batch]:
            data = np.array(Image.open("AugmentedCards/" + card, 'r').convert("RGB"))
            train_x.append(data)
            train_y.append(card.split("_")[0])
        inputs = th.Tensor(train_x).permute(0,3,1,2)
        labels = th.Tensor([letter_to_code(i) for i in train_y]).long()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        print(outputs, labels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if batch % 5 == 0:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {batch + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],
       grad_fn=<SoftmaxBackward0>) tensor([1, 1])
[1,     1] loss: 0.002
tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.3617e-42, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,

KeyboardInterrupt: 

In [None]:
train_x = []
train_y = []
valid_x = []
valid_y = []
test_x = []
test_y = []

for card in train:
    data = np.array(Image.open("AugmentedCards/" + card, 'r').convert("RGB"))
    train_x.append(data)
    train_y.append(card.split("_")[0])
train_x = np.array(train_x)
train_y = np.array(train_y)

for card in valid:
    data = np.array(Image.open("AugmentedCards/" + card, 'r').convert("RGB"))
    valid_x.append(data)
    valid_y.append(card.split("_")[0])
valid_x = np.array(valid_x)
valid_y = np.array(valid_y)

for card in test:
    data = np.array(Image.open("AugmentedCards/" + card, 'r').convert("RGB"))
    test_x.append(data)
    test_y.append(card.split("_")[0])
test_x = np.array(test_x)
test_y = np.array(test_y)