In [1]:
import pandas as pd
import os

# kaggle datasets download -d vishalsubbiah/pokemon-images-and-types
# unzip into data/pokemon
folder_path = "data/pokemon"
csv_file_path = os.path.join(folder_path, "pokemon.csv")

if os.path.exists(csv_file_path):
    # Load the CSV file into a Pandas DataFrame
    df = pd.read_csv(csv_file_path)
    df = df.drop('Type2', axis=1)
    df = df.drop('Evolution', axis=1)
    # Display the first few rows of the DataFrame
    print(df.head(100))

else:
    print(f"Error: The file '{csv_file_path}' was not found.")
    print("Please make sure you have downloaded and unzipped the dataset correctly.")


          Name     Type1
0    bulbasaur     Grass
1      ivysaur     Grass
2     venusaur     Grass
3   charmander      Fire
4   charmeleon      Fire
..         ...       ...
95     drowzee   Psychic
96       hypno   Psychic
97      krabby     Water
98     kingler     Water
99     voltorb  Electric

[100 rows x 2 columns]


In [2]:
import torch.nn as nn
import pandas as pd
import torch
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

True

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
# Split Train and Validation dataframes
train_df, valid_df = train_test_split(df, test_size=0.2, stratify=df['Type1'], random_state=42)

In [5]:
train_df.head()

Unnamed: 0,Name,Type1
749,mudsdale,Ground
640,tornadus-incarnate,Flying
550,sandile,Ground
702,carbink,Rock
128,magikarp,Water


In [6]:
# Create dataset

IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHS = 3

from torchvision.transforms import v2
transform = v2.Compose([
    v2.Resize((256, 256)),
    v2.ColorJitter(brightness=.2, contrast=.5),
    v2.RandomRotation(25),
    v2.RandomResizedCrop((IMG_WIDTH, IMG_HEIGHT), scale=(.8, 1), ratio=(1, 1)),
    v2.RandomHorizontalFlip(),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True)
])

from PIL import Image

# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# df['Type1_encoded'] = label_encoder.fit_transform(df['Type1'])
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['Type1_encoded'] = label_encoder.fit_transform(df['Type1'])

# Update the train/test split to use encoded labels for stratification
train_df, valid_df = train_test_split(df, test_size=0.2, stratify=df['Type1_encoded'], random_state=42)

class ImageDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.loc[idx, 'Name'] + ".png"
        label = self.df.loc[idx, 'Type1_encoded']  # Use encoded labels

        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.long)

In [7]:
BATCH_SIZE = 32

train_data = ImageDataset(train_df, "data/pokemon/images", transform=transform)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
train_N = len(train_loader.dataset)

valid_data = ImageDataset(valid_df, "data/pokemon/images", transform=transform)
valid_loader = DataLoader(valid_data, batch_size=BATCH_SIZE)
valid_N = len(valid_loader.dataset)

In [8]:
num_classes = df['Type1'].nunique()
num_classes

18

In [9]:
kernel_size = 3
IMAGE_WIDTH = 256
IMAGE_HEIGHT = 256
IMG_CHS = 3
flattened_img_size = IMG_CHS * IMAGE_WIDTH * IMAGE_HEIGHT

model = nn.Sequential(
    # First convolution
    nn.Conv2d(IMG_CHS, 25, kernel_size, stride=1, padding=1),  # 25 x 28 x 28
    nn.BatchNorm2d(25),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),  # 25 x 14 x 14
    # Second convolution
    nn.Conv2d(25, 50, kernel_size, stride=1, padding=1),  # 50 x 14 x 14
    nn.BatchNorm2d(50),
    nn.ReLU(),
    nn.Dropout(.5),
    nn.MaxPool2d(2, stride=2),  # 50 x 7 x 7
    # Third convolution
    nn.Conv2d(50, 75, kernel_size, stride=1, padding=1),  # 75 x 7 x 7
    nn.BatchNorm2d(75),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),  # 75 x 3 x 3
    # Flatten to Dense
    nn.Flatten(),
    nn.Linear(75 * 32 * 32, 512),
    nn.Dropout(.5),
    nn.ReLU(),
    nn.Linear(512, num_classes)
)

In [10]:
model = torch.compile(model.to(device))

In [11]:
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-5)

def get_batch_accuracy(output, y, N):
    pred = output.argmax(dim=1, keepdim=True)
    correct = pred.eq(y.view_as(pred)).sum().item()
    return correct / N

def validate():
    loss = 0
    accuracy = 0

    model.eval()
    with torch.no_grad():
        for x, y in valid_loader:
            x = x.to(device)  # Move images to GPU
            y = y.to(device)
            output = model(x)

            loss += loss_function(output, y).item()
            accuracy += get_batch_accuracy(output, y, valid_N)
    print('Valid - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))


def train():
    loss = 0
    accuracy = 0

    model.train()
    for x, y in train_loader:
        x = x.to(device)  # Move images to GPU
        y = y.to(device)
        output = model(x)
        optimizer.zero_grad()
        batch_loss = loss_function(output, y)
        batch_loss.backward()
        optimizer.step()

        loss += batch_loss.item()
        accuracy += get_batch_accuracy(output, y, train_N)
    print('Train - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))

In [12]:
epochs = 50

for epoch in range(epochs):
    print('Epoch: {}'.format(epoch))
    train()
    validate()

Epoch: 0
Train - Loss: 59.2662 Accuracy: 0.1267
Valid - Loss: 16.8539 Accuracy: 0.1420
Epoch: 1
Train - Loss: 57.1927 Accuracy: 0.1546
Valid - Loss: 16.6870 Accuracy: 0.1420
Epoch: 2
Train - Loss: 54.9249 Accuracy: 0.2087
Valid - Loss: 16.3346 Accuracy: 0.1975
Epoch: 3
Train - Loss: 54.7498 Accuracy: 0.1932
Valid - Loss: 16.1868 Accuracy: 0.2037
Epoch: 4
Train - Loss: 54.0016 Accuracy: 0.2102
Valid - Loss: 16.0407 Accuracy: 0.1852
Epoch: 5
Train - Loss: 53.6773 Accuracy: 0.2257
Valid - Loss: 15.9829 Accuracy: 0.2222
Epoch: 6
Train - Loss: 52.6612 Accuracy: 0.2365
Valid - Loss: 15.9414 Accuracy: 0.2469
Epoch: 7
Train - Loss: 52.6842 Accuracy: 0.2396
Valid - Loss: 15.7059 Accuracy: 0.2160
Epoch: 8
Train - Loss: 51.7162 Accuracy: 0.2550
Valid - Loss: 15.8497 Accuracy: 0.2407
Epoch: 9
Train - Loss: 50.9706 Accuracy: 0.2488
Valid - Loss: 15.9500 Accuracy: 0.2840
Epoch: 10
Train - Loss: 51.1560 Accuracy: 0.2504
Valid - Loss: 15.5527 Accuracy: 0.2593
Epoch: 11
Train - Loss: 50.8804 Accuracy: 

In [13]:
# import IPython
# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)