In [39]:
import torch
from torchvision import datasets, transforms, models
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as fn
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.model_selection import train_test_split
import kagglehub

In [40]:
# Download latest version
path = kagglehub.dataset_download("lantian773030/pokemonclassification")

print("Path to dataset files:", path)

Path to dataset files: /home/kronendieb/.cache/kagglehub/datasets/lantian773030/pokemonclassification/versions/1


In [41]:
width = height = 244
epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
output_model_path = "PokemonModel.pth"
print(device)

cuda


In [42]:
transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
    transforms.Resize((width, height)),
    transforms.AutoAugment(policy=transforms.AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

dataset = datasets.ImageFolder(root=path+"/PokemonData", transform=transform)
num_labels = len(dataset.classes)

print(f"Number of classes: {num_labels}")

Number of classes: 150


In [35]:
labels = [label for _, label in dataset]

train_val_indices, test_indices = train_test_split(
    range(len(dataset)),
    test_size=0.1,
    stratify=labels,
)

train_indices, val_indices = train_test_split(
    train_val_indices,
    test_size=0.1,
    stratify=[labels[i] for i in train_val_indices],
)

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

print(f"Stratified loading sizes\nTraining Size: {len(train_dataset)}, Validation Size: {len(val_dataset)}, Test Size: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)


Stratified loading sizes
Training Size: 5524, Validation Size: 614, Test Size: 682


In [36]:
class PokeModel(nn.Module):
    def __init__(self, num_labels) -> None:
        super(PokeModel, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_labels)
    
    def forward(self, x):
        return self.resnet(x)

In [None]:
model = PokeModel(num_labels).to(device)

In [38]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [43]:
best_val_loss = float("inf")
for epoch in range(epochs):
    print(f"Epoch: {epoch+1}/{epochs}")
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    
    train_loss /= len(train_loader.dataset)
    train_acc = correct / total

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_loader.dataset)
    val_acc = correct / total

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            "epoch": epoch + 1,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": val_loss,
        }, output_model_path)

    print(f"    Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"    Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")

print(f"Training complete, best validation loss: {best_val_loss:.4f}")

Epoch: 1/50


RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [24]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

print(f"Test Accuracy: {correct/total:.4f}")

Test Accuracy: 0.8680


In [25]:
torch.save(model.state_dict(), "PokemonModel.pth")

In [28]:
lmodel = PokeModel(num_labels=num_labels)
lmodel.load_state_dict(torch.load("PokemonModel.pth"))
lmodel.to(device)

  lmodel.load_state_dict(torch.load("PokemonModel.pth"))


PokeModel(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
         

In [29]:
lmodel.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = lmodel(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

print(f"Test Accuracy: {correct/total:.4f}")

Test Accuracy: 0.8768
