In [29]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [30]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [31]:
transformer = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomInvert(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], 
                         [0.5, 0.5, 0.5])
])

In [32]:
pokemon = ['Abra', 'Aerodactyl', 'Alakazam', 'Arbok',
          'Arcanine', 'Articuno', 'Beedrill', 'Bellsprout', 'Blastoise',
          'Bulbasaur', 'Butterfree', 'Caterpie', 'Chansey', 'Charizard',
          'Charmander', 'Charmeleon', 'Clefable', 'Clefairy', 'Cloyster',
          'Cubone', 'Dewgong', 'Diglett', 'Ditto', 'Dodrio', 'Doduo', 'Dragonair',
          'Dragonite', 'Dratini', 'Drowzee', 'Dugtrio', 'Eevee', 'Ekans',
          'Electabuzz', 'Electrode', 'Exeggute', 'Exeggutor','Farfechd', 'Fearow',
          'Flareon', 'Gastly', 'Gengar', 'Geodude', 'Gloom', 'Golbat', 'Goldeen',
          'Golduck', 'Golem', 'Graveler', 'Grimer', 'Growlithe', 'Gyarados',
          'Haunter', 'Hitmonchan', 'Hitmonlee', 'Horsea', 'Hypno', 'Ivyasaur',
          'Jigglypuff', 'Jolteon', 'Jynx', 'Kabuto', 'Kabutops', 'Kadabra',
          'Kakuna', 'Kangaskhan', 'Kingler', 'Koffing', 'Krabby', 'Lapras',
          'Likitung', 'Machamp', 'Machoke', 'Machop', 'Magikarp', 'Magmar',
          'Magnemite', 'Magneton', 'Mankey', 'Marowak', 'Meowth', 'Metapod',
          'Mew', 'Mewtwo', 'Moltres', 'Mr.Mime', 'Muk', 'Nidoking', 'Nidoqueen', 
          'Nidoran(female)','Nidoran(male)', 'Nidorina', 'Nidorino', 'Ninetales',
          'Oddish', 'Omanyte', 'Omastar', 'Onix', 'Paras', 'Parasect', 'Persian',
          'Pidgeot', 'Pidgeotto', 'Pidgey', 'Pikachu', 'Pinsir', 'Poliwag',
          'Poliwhirl', 'Poliwrath', 'Ponyta', 'Porygon', 'Primeape', 'Psyduck',
          'Raichu', 'Rapidash', 'Raticate', 'Rattata', 'Rhydon', 'Rhyhorn',
          'Sandshrew', 'Sandslash', 'Scyther', 'Seadra', 'Seaking', 'Seel',
          'Shellder', 'Slowbro', 'Slowpoke', 'Snorlax', 'Spearow', 'Squirtle',
          'Starmie', 'Staryu', 'Tangela', 'Tauros', 'Tentacool', 'Tentacruel',
          'Vapooreon', 'Venomoth', 'Venonat', 'Venusaur', 'Victreebel',
          'Vileplume', 'Voltorb', 'Vulpix', 'Wartortle', 'Weedle', 'Weepinbell',
          'Weezing', 'Wigglytuff', 'Zapdos', 'Zubat']

In [33]:
#Data Loader

train_loader = DataLoader(
    torchvision.datasets.ImageFolder('train', transform = transformer),
    batch_size = 128, shuffle = True
)

test_loader = DataLoader(
    torchvision.datasets.ImageFolder('test', transform = transformer),
    batch_size = 128, shuffle = True
)

In [34]:
root = pathlib.Path('train')

In [35]:
#CNN Network

class ConvNet(nn.Module):
    def __init__(self, num_pokemon = 151):
        super(ConvNet,self).__init__()
    
        #Input shape = (128, 3, 150, 150)
    
        #Output size after convolution = 150
    
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 12, kernel_size = 3, stride = 1, padding = 1)
        #Shape = (128, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features = 12)
        #Shape = (128,12,150,150)
        self.relu1 = nn.ReLU()
    
        self.pool = nn.MaxPool2d(kernel_size = 2)
        #Reduce the image size by factor of 2
        #Shape = (128, 12, 75,75)
    
        self.conv2 = nn.Conv2d(in_channels = 12, out_channels = 20, kernel_size = 3, stride = 1, padding = 1)
        #Shape = (128, 20, 75, 75)
        self.relu2 = nn.ReLU()
        #Shape = (128,20,75,75)
    
        self.conv3 = nn.Conv2d(in_channels = 20, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
        #Shape = (128, 32, 75, 75)
        self.bn3 = nn.BatchNorm2d(num_features = 32)
        #Shape = (128,32,75,75)
        self.relu3 = nn.ReLU()
        #Shape = (128, 32, 75, 75)
    
        self.fc = nn.Linear(in_features = 32*75*75, out_features = num_pokemon)
    
        #Feed forward function
    
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        
        output = self.pool(output)
        
        output = self.conv2(output)
        output = self.relu2(output)
        
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        
        #Above outpput will be in matrix form, with shape (128, 32, 75, 75)
        
        output = output.view(-1, 32*75*75)
        
        output = self.fc(output)
        
        return output
    

In [36]:
model = ConvNet(num_pokemon = 151).to(device)

In [37]:
#Optimizer and loss function
optimizer = Adam(model.parameters(),lr = 0.0001, weight_decay = 0.0001)
loss_function = nn.CrossEntropyLoss()

In [38]:
num_epochs = 20

In [39]:
#Calculating the size of training and testing images
train_count = len(glob.glob('train'+'/**/*.jpg'))
test_count = len(glob.glob('test'+'/**/*.jpg'))

In [40]:
print(train_count, test_count)

6643 448


In [41]:
#Model training and saving best model

best_accuracy = 0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    
    for x, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.cpu().data*images.size(0)
        _,prediction = torch.max(outputs.data, 1)
        
        train_accuracy += int(torch.sum(prediction == labels.data))
    
    train_accuracy = train_accuracy/train_count
    train_loss = train_loss/train_count
    
    
    #evaluation on testing data set
    model.eval()
    
    test_accuracy = 0.0
    for x, (images, labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        test_accuracy += int(torch.sum(prediction==labels.data))
    
    test_accuracy = test_accuracy/test_count
    
    print('epoch: ' + str(epoch) + ' Train Loss: ' + str(float(train_loss)) + ' Train Accuracy: ' + str(train_accuracy) + 'Test Accuracy: ' + str(test_accuracy))
    
    #Save the best model
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy = test_accuracy
    

epoch: 0 Train Loss: 6.959683895111084 Train Accuracy: 0.04786993828089719Test Accuracy: 0.14508928571428573
epoch: 1 Train Loss: 4.23647928237915 Train Accuracy: 0.2214360981484269Test Accuracy: 0.30580357142857145
epoch: 2 Train Loss: 3.286867618560791 Train Accuracy: 0.361282553063375Test Accuracy: 0.43080357142857145
epoch: 3 Train Loss: 2.816659688949585 Train Accuracy: 0.45280746650609666Test Accuracy: 0.42857142857142855
epoch: 4 Train Loss: 2.371396064758301 Train Accuracy: 0.533493903356917Test Accuracy: 0.4888392857142857
epoch: 5 Train Loss: 1.9223923683166504 Train Accuracy: 0.6105675146771037Test Accuracy: 0.47544642857142855
epoch: 6 Train Loss: 1.7145153284072876 Train Accuracy: 0.6558783682071353Test Accuracy: 0.5066964285714286
epoch: 7 Train Loss: 1.4988880157470703 Train Accuracy: 0.6950173114556676Test Accuracy: 0.515625
epoch: 8 Train Loss: 1.2659780979156494 Train Accuracy: 0.7400270961914798Test Accuracy: 0.5066964285714286
epoch: 9 Train Loss: 1.102349042892456 