In [1]:
# from torchvision import datasets, transforms

# dataset = datasets.ImageFolder(
#     root="data_to_use/dataset_2025-11-03_17-45-40",
#     transform=transforms.ToTensor()
# )

import torch
import os
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

dataset = datasets.ImageFolder(
    root="data_to_use/dataset_2025-11-03_17-45-40",
    transform=transforms.ToTensor()
)

from torch.utils.data import random_split
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_data, test_data = random_split(dataset, [train_size, test_size])


train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=16, shuffle=False)

print(f"Classes found: {dataset.classes}")

Using device: cuda
Classes found: ['dark_center', 'dark_head', 'dark_none', 'dark_rock', 'dark_side', 'fire_center', 'fire_head', 'fire_none', 'fire_rock', 'fire_side', 'ice_center', 'ice_head', 'ice_none', 'ice_rock', 'ice_side', 'no_monster', 'none_center', 'none_head', 'none_none', 'none_rock', 'none_side', 'robot_center', 'robot_head', 'robot_none', 'robot_rock', 'robot_side']


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    """
    Simple 3-layer CNN for 128x128 RGB images.
    Automatically builds correct FC layer size.
    """

    def __init__(self, num_classes):
        super().__init__()

        # Input: (3, 128, 128)
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        # Output: (16, 128, 128)
        self.pool1 = nn.MaxPool2d(2, 2)
        # Output after pool1: (16, 64, 64)

        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        # Output: (32, 64, 64)
        self.pool2 = nn.MaxPool2d(2, 2)
        # Output after pool2: (32, 32, 32)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        # Output: (64, 32, 32)
        self.pool3 = nn.MaxPool2d(2, 2)
        # Output after pool3: (64, 16, 16)

        self.dropout = nn.Dropout(0.3)

        # Dynamically determine fc input size
        self.fc1 = None
        self.fc2 = None
        self.num_classes = num_classes


    def forward_features(self, x):
        """
        Convolutional feature extraction.
        Prints the intermediate shapes for clarity.
        """
        x = F.relu(self.conv1(x))
        x = self.pool1(x)   # (16, 64, 64)
        # print("After pool1:", x.shape)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)   # (32, 32, 32)
        # print("After pool2:", x.shape)

        x = F.relu(self.conv3(x))
        x = self.pool3(x)   # (64, 16, 16)
        # print("After pool3:", x.shape)

        return x


    def forward(self, x):
        # Extract features
        x = self.forward_features(x)

        # Flatten for fully connected layers
        x = x.view(x.size(0), -1)  # shape: (batch_size, flatten_size)

        # Build fully connected layers dynamically (first forward only)
        if self.fc1 is None:
            in_features = x.shape[1]
            self.fc1 = nn.Linear(in_features, 128).to(x.device)
            self.fc2 = nn.Linear(128, self.num_classes).to(x.device)
            print(f"[INFO] Built FC layers dynamically: fc1 input = {in_features}")

        # Forward through FC layers
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)

        return x


model = SimpleCNN(num_classes=26).to("cuda" if torch.cuda.is_available() else "cpu")
dummy = torch.randn(1, 3, 128, 128).to(next(model.parameters()).device)
out = model(dummy)
print("Final output:", out.shape)


[INFO] Built FC layers dynamically: fc1 input = 16384
Final output: torch.Size([1, 26])


In [3]:
import torch.optim as optim

model = SimpleCNN(num_classes=len(dataset.classes)).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.Adam(model.parameters(), lr=0.01)

epochs = 5
for epoch in range(epochs):
    model.train()

    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss+=loss.item()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}] Loss: {avg_loss:.4f}")


: 

In [None]:
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")