In [1]:
import torch
import torchvision
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np

In [2]:
import random
from PIL import Image


class CustomDataset(Dataset):
    def __init__(self, train, to_tensor=True):
        self.mnist = torchvision.datasets.MNIST(
            "files/mnist", train=train, download=True
        )
        self.fashion_mnist = torchvision.datasets.FashionMNIST(
            "files/fashion_mnist", train=train, download=True
        )
        self.to_tensor_transform = transforms.Compose([transforms.ToTensor()])

        self.dataset = []
        self.mnist_size = len(self.mnist)

        for i in range(self.mnist_size):
            number_img, number_label = self.mnist[i]
            left_img, left_label = random.choice(self.fashion_mnist)

            while True:
                right_img, right_label = random.choice(self.fashion_mnist)
                if left_label != right_label:
                    break

            img = self.__concat_images(left_img, number_img, right_img)
            label = left_label if number_label % 2 == 0 else right_label

            if to_tensor:
                img = self.to_tensor_transform(img)

            self.dataset.append((img, label))

    def __len__(self):
        return self.mnist_size

    def __getitem__(self, idx):
        return self.dataset[idx]

    def __concat_images(self, left, center, right):
        IMG_SIZE = 28
        img = Image.new("L", (IMG_SIZE * 3, IMG_SIZE))
        img.paste(left, (0, 0))
        img.paste(center, (IMG_SIZE, 0))
        img.paste(right, (2 * IMG_SIZE, 0))
        return img

    @staticmethod
    def decode_label(label):
        mapping = {
            0: "T-shirt/Top",
            1: "Trouser",
            2: "Pullover",
            3: "Dress",
            4: "Coat",
            5: "Sandal",
            6: "Shirt",
            7: "Sneaker",
            8: "Bag",
            9: "Ankle Boot",
        }

        return mapping[label]

In [3]:
train = CustomDataset(train=True)
test = CustomDataset(train=False)

In [4]:
print(test[1][1])
test[1][0]

2


tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

In [5]:
train_loader = DataLoader(train, batch_size=128)


In [6]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(0, -1),
            nn.Dropout(0.5),
        )

        self.fc = nn.Sequential(
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 10),
            nn.Softmax()
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 512)
        return self.fc(x)

In [7]:
model = CNN()
error = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 15

In [8]:
print(model)

CNN(
  (conv): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Flatten(start_dim=0, end_dim=-1)
    (10): Dropout(p=0.5, inplace=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=512, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=10, bias=True)
    (3): Softmax(dim=None)
  )
)


In [9]:
for images, labels in train_loader:
    print(images.shape)
    print(len(labels))
    break

model(images[:, :, :, :])

torch.Size([128, 1, 28, 84])
128


  return self._call_impl(*args, **kwargs)


tensor([[0.0916, 0.1112, 0.0875,  ..., 0.1162, 0.1001, 0.1171],
        [0.0937, 0.1100, 0.0870,  ..., 0.1168, 0.0982, 0.1197],
        [0.0920, 0.1106, 0.0887,  ..., 0.1175, 0.0999, 0.1167],
        ...,
        [0.0945, 0.1086, 0.0891,  ..., 0.1191, 0.0983, 0.1168],
        [0.0942, 0.1115, 0.0836,  ..., 0.1164, 0.1015, 0.1147],
        [0.0938, 0.1104, 0.0872,  ..., 0.1181, 0.0984, 0.1175]],
       grad_fn=<SoftmaxBackward0>)

In [10]:
# loss_min = np.Inf

model.train()

for e in range(epochs):
    running_loss = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        log_ps = model(images)
        loss = error(log_ps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    
    running_loss = running_loss / len(train_loader.sampler)

    print(f"Epoch {e+1} Training Loss: {running_loss:.6f}")

        



Epoch 1 Training Loss: 2.149119
Epoch 2 Training Loss: 2.049256
Epoch 3 Training Loss: 2.008715
Epoch 4 Training Loss: 1.903554
Epoch 5 Training Loss: 1.817834
Epoch 6 Training Loss: 1.777758
Epoch 7 Training Loss: 1.756065
Epoch 8 Training Loss: 1.739413
Epoch 9 Training Loss: 1.731407
Epoch 10 Training Loss: 1.721715
Epoch 11 Training Loss: 1.716482
Epoch 12 Training Loss: 1.711226
Epoch 13 Training Loss: 1.707252
Epoch 14 Training Loss: 1.702682
Epoch 15 Training Loss: 1.696619


In [11]:
test_loader = DataLoader(test, batch_size=128)

In [12]:
model.eval()
test_loss = 0
class_correct = list(0. for _ in range(10))
class_total = list(0. for _ in range(10))

with torch.no_grad():
    for images, labels in test_loader:
        output = model(images)
        loss = error(output, labels)

        test_loss += loss.item() * images.size(0)

        _, pred = torch.max(output, 1)

        correct = np.squeeze(pred.eq(labels.data.view_as(pred)))

        for i in range(len(labels)):
            label = labels.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

test_loss = test_loss / len(test_loader.sampler)
print(f"Test Loss: {test_loss:.6f}")

for i in range(10):
    if class_total[i] > 10:
        print(f"Test Accuracy of {i}: {100*class_correct[i] / class_total[i]}%")
    else:
        print(f"Test Accuracy of {i}: N/A")

print(f"\nTest Accuracy (Overall): {100*sum(class_correct) / sum(class_total)}%")


Test Loss: 1.674518
Test Accuracy of 0: 81.68870803662259%
Test Accuracy of 1: 92.58130081300813%
Test Accuracy of 2: 76.87747035573122%
Test Accuracy of 3: 86.54618473895583%
Test Accuracy of 4: 70.5940594059406%
Test Accuracy of 5: 91.83477425552354%
Test Accuracy of 6: 8.864541832669323%
Test Accuracy of 7: 93.04174950298211%
Test Accuracy of 8: 93.8894277400582%
Test Accuracy of 9: 90.88960342979635%

Test Accuracy (Overall): 78.64%


In [36]:
converter = transforms.ToPILImage()

img: Image = converter(test[0][0])
img.width
img.resize((40 * img.width, 40 * img.height), Image.NEAREST)
pred = model(test[0][0])
torch.argmax(pred, 1)
test[0][1]




9