In [1]:
################################################################################
# CNN based Pokemon Classfier                                                  #
# ---------------------------------------------------------------------------- #
# ~600 images of 20 pokemon are used to classify them.                         #
# A simple convnet is used to classify the images.                             #
# Compared with EigenFaces on a smaller dataset.                               #
# ---------------------------------------------------------------------------- #
# NOTE: The first version of my model is put in v1_scripts directory.          #
################################################################################

%pylab inline
from PIL import Image, ImageOps
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from time import time
import os

"""
Read images as features and labels
"""
numpy_X = []
numpy_y = []
labels = []

for root, dirs, files in os.walk('dataset/'):
    labels.append(root.split('/')[1])
    for f in files:
        imgF = Image.open(os.path.join(root, f)).convert('RGB') # Open the file in RGB mode
        imgF = ImageOps.fit(image=imgF, size=(64, 64), method=Image.ANTIALIAS) # Crop the image
        
        numpy_X.append(np.array(imgF))
        numpy_y.append(root.split('/')[1])
        
# remove empty entry from labels
labels = labels[1:]

# Convert y from labels to indexes
for i in range(len(numpy_y)):
    numpy_y[i] = labels.index(numpy_y[i])

# Convert to np.array
numpy_X = np.array(numpy_X).reshape(-1, 3, 64, 64)
numpy_y = np.array(numpy_y)

print(numpy_X.shape, numpy_y[:5])
print(labels)

Populating the interactive namespace from numpy and matplotlib
(596, 3, 64, 64) [0 0 0 0 0]
['abra', 'alakazam', 'articuno', 'blastoise', 'bulbasaur', 'charizard', 'charmander', 'charmeleon', 'gengar', 'ivysaur', 'magikarp', 'meowth', 'mew', 'mewtwo', 'moltres', 'pikachu', 'squirtle', 'venusaur', 'wartortle', 'zapdos']


In [2]:
"""
Numpy to Tensors (CUDA)
"""
X = torch.from_numpy(numpy_X).float().cuda()
y = torch.from_numpy(numpy_y).cuda()

print(X.type(), y.type())

# PyTorch dataset
dataset = torch.utils.data.TensorDataset(X, y)

# Test train split
total = len(dataset)
trainSize = int(total * 0.75)
testSize = total - trainSize
train, test = torch.utils.data.random_split(dataset, (trainSize, testSize))

print("Train Size: ", len(train))
print("Test Size: ", len(test))

torch.cuda.FloatTensor torch.cuda.LongTensor
Train Size:  447
Test Size:  149


In [3]:
"""
Train Batch Loader for Batch SGD
"""
BATCH_SIZE=30
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)

In [4]:
"""
ConvNet
"""
class SimpleConvNet(nn.Module):
    def __init__(self):
        super(SimpleConvNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3)
        self.fc1 = nn.Linear(3136, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 20)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        return x

    # copied from torch example
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
device = torch.device("cuda")
cnn = SimpleConvNet().to(device)
print(cnn)

SimpleConvNet(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=3136, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=20, bias=True)
)


In [5]:
"""
Training
"""
NUM_EPOCHS = 60
optimizer = optim.SGD(cnn.parameters(), lr=0.001)
error = nn.CrossEntropyLoss()

for epoch in range(NUM_EPOCHS):
    correct = 0
    for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
        optimizer.zero_grad()
        output = cnn(X_batch)
        loss = error(output, y_batch)
        loss.backward()
        optimizer.step()
        
        predicted = torch.max(output.data, 1)[1]
        correct += (predicted == y_batch.cuda()).sum()
        if batch_idx % 2 == 0:
            print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx*len(X_batch), len(train_loader.dataset), 100.*batch_idx / len(train_loader),
                    loss.data.item(), float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))









In [6]:
correct = 0
for i in range(len(test)):
    if cnn(test[i][0].reshape(1, 3, 64, 64))[0].max(0)[1] == test[i][1]:
        correct += 1
        
print(correct / len(test))

0.18791946308724833
