In [187]:
# Preprocess images
path = "/home/user/workspace3/Dataset/Test/A/"

dirs = os.listdir(path)
print(dirs)
def resize():
    for item in dirs:
        if os.path.isfile(path + item):
            print(item)
            im = Image.open(path+item).convert('L')
            f, e = os.path.splitext(path + item)
            imResize = im.resize((28, 28), Image.ANTIALIAS)
            imResize.save(f + ".jpg", 'JPEG', quality=90)
            if(os.path.isfile(f)):
                os.remove(f)
            if(os.path.isfile(f + ".JPG")):
                os.remove(f + ".JPG")
resize()

['9 no.jpg', '6 no.jpg', '17 no.jpg', '2 no.jpg', '23 no.jpg', '24 no.jpg', '12 no.jpg', '15 no.jpg', '4 no.jpg', '11 no.jpg', '18 no.jpg', '20 no.jpg', '22 no.jpg', '19 no.jpg', '13 no.jpg', '14 no.jpg', '7 no.jpg', '5 no.jpg', '222 no.jpg', '1 no.jpg', '10 no.jpg', '111 no.jpg', '21 no.jpg', '8 no.jpg']
9 no.jpg
6 no.jpg
17 no.jpg
2 no.jpg
23 no.jpg
24 no.jpg
12 no.jpg
15 no.jpg
4 no.jpg
11 no.jpg
18 no.jpg
20 no.jpg
22 no.jpg
19 no.jpg
13 no.jpg
14 no.jpg
7 no.jpg
5 no.jpg
222 no.jpg
1 no.jpg
10 no.jpg
111 no.jpg
21 no.jpg
8 no.jpg


In [216]:
import os, sys
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
%matplotlib inline

from torch.utils.data import DataLoader
from PIL import Image
from torch.utils.data.dataset import Dataset
from imageio import imread
from torch import Tensor

In [217]:
class notMNIST(Dataset):

    # The init method is called when this class will be instantiated.
    def __init__(self, root):
        Images, Y = [], []
        folders = os.listdir(root)

        for folder in folders:
            folder_path = os.path.join(root, folder)
            for ims in os.listdir(folder_path):
                try:
                    img_path = os.path.join(folder_path, ims)
                    Images.append(np.array(imread(img_path)))
                    Y.append(ord(folder) - 65)
                except:
                    # Some images in the dataset are damaged
                    print("File {}/{} is broken".format(folder, ims))
        data = [(x, y) for x, y in zip(Images, Y)]
        self.data = data

    # The number of items in the dataset
    def __len__(self):
        return len(self.data)

    # The Dataloader is a generator that repeatedly calls the getitem method.
    # getitem is supposed to return (X, Y) for the specified index.
    def __getitem__(self, index):
        img = self.data[index][0]
        img = img.reshape(28, 28) / 255.0
        # 8 bit images. Scale between [0,1]. This helps speed up our training
        # img = img.reshape(28, 28) / 255.0

        # Input for Conv2D should be Channels x Height x Width
        img_tensor = Tensor(img).view(1, 28, 28).float()
        label = self.data[index][1]
        return (img_tensor, label)


In [218]:
class Model(torch.nn.Module):

    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))

        # Reshaping the tensor to BATCH_SIZE x 320. Torch infers this from other dimensions when one of the parameter is -1.
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x)
        x = self.fc2(x)
        return x


In [219]:
# Parameters
MODEL_NAME = 'conv2_layer_28x28'
BATCH_SIZE = 4
N_EPOCHS = 50
root = "/home/user/workspace3"

# Load data
train_dataset = notMNIST(os.path.join(root, 'Dataset/Train'))

# Create dataloader
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Create model
net = Model()

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())

loss_history = []

In [220]:
def train(epoch):
    epoch_loss = 0
    n_batches = len(train_dataset) // BATCH_SIZE

    for step, data in enumerate(train_loader, 0):
        train_x, train_y = data
        y_hat = net.forward(train_x)
        train_y = torch.Tensor(np.array(train_y))

        # CrossEntropyLoss requires arg2 to be torch.LongTensor
        loss = criterion(y_hat, train_y.long())
        epoch_loss += loss.item()
        optimizer.zero_grad()

        # Backpropagation
        loss.backward()
        optimizer.step()
        # There are len(dataset)/BATCH_SIZE batches.
        # We print the epoch loss when we reach the last batch.
        if step % n_batches == 0 and step != 0:
            epoch_loss = epoch_loss / n_batches
            loss_history.append(epoch_loss)
            print("Epoch {}, loss {}".format(epoch, epoch_loss))
            epoch_loss = 0

In [221]:
for epoch in range(1, N_EPOCHS + 1):
    train(epoch)

# Saving the model
torch.save(net, 'models/{}.pt'.format(MODEL_NAME))
print("Saved model...")

Saved model...


In [222]:
test_dataset = notMNIST(os.path.join(root, 'Dataset/Test'))
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)
classifier = torch.load('models/{}.pt'.format(MODEL_NAME)).eval()
correct = 0

for _, data in enumerate(test_loader, 0):
    test_x, test_y = data
    pred = classifier.forward(test_x)
    y_hat = np.argmax(pred.data)
    if y_hat == test_y:
        correct += 1

print("Accuracy={}".format(correct / len(test_dataset)))


Accuracy=0.8409090909090909
