In [3]:
import os
directory_path = '.'

files = os.listdir(directory_path)

print("Files and Directories in '", directory_path, "' :")
for item in files:
    print(item)

Files and Directories in ' . ' :
Kaggle Sprint.ipynb
myenv
test data
train_data


In [7]:
import torch
print(torch.__version__)
print("CUDA Available:", torch.cuda.is_available())

2.6.0+cu124
CUDA Available: True


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from PIL import Image

In [25]:
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(0, shear=20, scale=(0.8, 1.2)),
    transforms.RandomResizedCrop(128, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = datasets.ImageFolder(root="train_data/train_data", transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [16]:
class CNNModel(nn.Module):
    def __init__(self, num_classes=149):
        super(CNNModel, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop1 = nn.Dropout(0.25)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop2 = nn.Dropout(0.3)

        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop3 = nn.Dropout(0.35)

        self.conv7 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.conv8 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop4 = nn.Dropout(0.4)

        self.conv9 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        self.conv10 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop5 = nn.Dropout(0.5)

        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(512, 512)
        self.drop6 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        x = self.drop1(x)

        x = F.relu(self.bn2(self.conv3(x)))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        x = self.drop2(x)

        x = F.relu(self.bn3(self.conv5(x)))
        x = F.relu(self.conv6(x))
        x = self.pool3(x)
        x = self.drop3(x)

        x = F.relu(self.bn4(self.conv7(x)))
        x = F.relu(self.conv8(x))
        x = self.pool4(x)
        x = self.drop4(x)

        x = F.relu(self.bn5(self.conv9(x)))
        x = F.relu(self.conv10(x))
        x = self.pool5(x)
        x = self.drop5(x)

        x = self.global_avg_pool(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.drop6(x)
        x = self.fc2(x)
        
        return x

model = CNNModel(num_classes=149)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

print(model)

CNNModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout(p=0.25, inplace=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop2): Dropout(p=0.3, inplace=False)
  (conv5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (

In [17]:
device = torch.device("cuda")

model = CNNModel(num_classes=149).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 20
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")



Epoch [1/20], Loss: 4.7855, Accuracy: 0.0659
Epoch [2/20], Loss: 4.4718, Accuracy: 0.0901
Epoch [3/20], Loss: 4.2532, Accuracy: 0.1125
Epoch [4/20], Loss: 4.0947, Accuracy: 0.1221
Epoch [5/20], Loss: 3.9714, Accuracy: 0.1295
Epoch [6/20], Loss: 3.8435, Accuracy: 0.1413
Epoch [7/20], Loss: 3.7101, Accuracy: 0.1495
Epoch [8/20], Loss: 3.5953, Accuracy: 0.1650
Epoch [9/20], Loss: 3.5099, Accuracy: 0.1748
Epoch [10/20], Loss: 3.4327, Accuracy: 0.1843
Epoch [11/20], Loss: 3.3253, Accuracy: 0.2025
Epoch [12/20], Loss: 3.2739, Accuracy: 0.2015
Epoch [13/20], Loss: 3.1873, Accuracy: 0.2271
Epoch [14/20], Loss: 3.1043, Accuracy: 0.2380
Epoch [15/20], Loss: 3.0457, Accuracy: 0.2466
Epoch [16/20], Loss: 2.9679, Accuracy: 0.2589
Epoch [17/20], Loss: 2.9116, Accuracy: 0.2735
Epoch [18/20], Loss: 2.8530, Accuracy: 0.2814
Epoch [19/20], Loss: 2.7966, Accuracy: 0.2972
Epoch [20/20], Loss: 2.7293, Accuracy: 0.3119


In [18]:
torch.save(model, "model.pth")

In [19]:
torch.save(model.state_dict(), "model_weights.pth")

In [26]:
from PIL import Image
import numpy as np
import os
from tqdm import tqdm
import io
from svglib.svglib import svg2rlg
from reportlab.graphics.renderPM import drawToPIL
import torch

test_dir = 'test data/test data'
test_images = sorted(os.listdir(test_dir))
predictions = []

for img_name in tqdm(test_images):
    img_path = os.path.join(test_dir, img_name)

    try:
        if img_name.lower().endswith('.svg'):
            drawing = svg2rlg(img_path)
            img = drawToPIL(drawing)

        else:
            img = Image.open(img_path).convert('RGB')

        img = img.resize((128, 128))
        img_array = np.array(img) / 255.0
        img_array = np.transpose(img_array, (2, 0, 1))
        img_tensor = torch.tensor(img_array, dtype=torch.float32).unsqueeze(0).to(device)

        with torch.no_grad():
            pred = model(img_tensor)
        pred_class = torch.argmax(pred, dim=1).item()
        class_label = list(train_dataset.class_to_idx.keys())[pred_class]

        predictions.append([img_name, class_label])

    except Exception as e:
        print(f"Error processing {img_name}: {e}")

for img_name, class_label in predictions:
    print(f"Image: {img_name} | Predicted Class: {class_label}")

 13%|█▎        | 296/2195 [00:03<00:21, 89.78it/s] 

Error processing 1254.svg: cannot import desired renderPM backend rlPyCairo
Seek advice at the users list see
https://pairlist2.pair.net/mailman/listinfo/reportlab-users


 64%|██████▍   | 1409/2195 [00:15<00:07, 102.50it/s]

Error processing 277.svg: cannot import desired renderPM backend rlPyCairo
Seek advice at the users list see
https://pairlist2.pair.net/mailman/listinfo/reportlab-users


 66%|██████▌   | 1443/2195 [00:15<00:07, 100.08it/s]

Error processing 31.svg: cannot import desired renderPM backend rlPyCairo
Seek advice at the users list see
https://pairlist2.pair.net/mailman/listinfo/reportlab-users


 73%|███████▎  | 1605/2195 [00:17<00:05, 104.36it/s]

Error processing 450.svg: cannot import desired renderPM backend rlPyCairo
Seek advice at the users list see
https://pairlist2.pair.net/mailman/listinfo/reportlab-users


 80%|███████▉  | 1755/2195 [00:18<00:04, 89.65it/s] 

Error processing 590.svg: cannot import desired renderPM backend rlPyCairo
Seek advice at the users list see
https://pairlist2.pair.net/mailman/listinfo/reportlab-users


100%|██████████| 2195/2195 [00:23<00:00, 92.18it/s] 

Image: 1.jpg | Predicted Class: Hitmonchan
Image: 10.jpg | Predicted Class: Horsea
Image: 100.jpg | Predicted Class: Jynx
Image: 1000.jpg | Predicted Class: Lapras
Image: 1001.jpg | Predicted Class: Pidgeot
Image: 1002.jpg | Predicted Class: Omastar
Image: 1003.jpg | Predicted Class: Magnemite
Image: 1004.jpg | Predicted Class: Horsea
Image: 1005.jpg | Predicted Class: Squirtle
Image: 1006.jpg | Predicted Class: Squirtle
Image: 1007.png | Predicted Class: Slowbro
Image: 1008.jpg | Predicted Class: Pidgeot
Image: 1009.jpg | Predicted Class: Seaking
Image: 101.jpg | Predicted Class: Shellder
Image: 1010.jpg | Predicted Class: Shellder
Image: 1011.jpg | Predicted Class: Pidgeot
Image: 1012.jpg | Predicted Class: Pidgeot
Image: 1013.jpg | Predicted Class: Ninetales
Image: 1014.png | Predicted Class: Electrode
Image: 1015.jpg | Predicted Class: Weedle
Image: 1016.jpg | Predicted Class: Gyarados
Image: 1017.jpg | Predicted Class: Squirtle
Image: 1018.jpg | Predicted Class: Electrode
Image: 1


