In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import json
import os
from torch.utils.data import random_split
import csv
from PIL import Image

<div style="font-size: 40px">
data preparation
</div>

In [8]:
train_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

full_dataset = ImageFolder("train/", transform=train_transform)

dataset_size = len(full_dataset)
train_size = int(dataset_size * 0.92)
val_size = dataset_size - train_size
trainset, valset = random_split(full_dataset, [train_size, val_size])

trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)
valloader = DataLoader(valset, batch_size=32, shuffle=False, num_workers=4)

print(f"Number of classes: {len(full_dataset.classes)}")
print(f"Class names: {full_dataset.classes}")
print(f"training images count: {len(trainset)}")
print(f"testing images count: {len(valset)}")

Liczba klas: 50
Nazwy klas: ['acoustic', 'antenna', 'bacteria', 'battery', 'bean', 'beetle', 'bicycle', 'birch', 'bird', 'bomb', 'bread', 'bridge', 'camera', 'carbon', 'cat', 'corn', 'crab', 'crocodilian', 'echinoderm', 'egg', 'elephant', 'fish', 'flower', 'frog', 'fungus', 'gauge', 'hammer', 'icecream', 'kangaroo', 'memorial', 'monkey', 'motor', 'nest', 'palm', 'pizza', 'pot', 'printer', 'saw', 'snake', 'spice', 'spider', 'spoon', 'squash', 'swine', 'tea', 'tomato', 'towel', 'truck', 'turtle', 'worm']
Liczba obrazów treningowych: 88011


<div style="font-size: 40px">
model architecture
</div>

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        residual = x

        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))

        out += self.shortcut(residual)
        out = F.relu(out)

        return out

In [9]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        residual = x
        
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        
        out += self.shortcut(residual)
        out = F.relu(out)
        
        return out

In [10]:
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.res1 = ResidualBlock(32, 64, stride=1)
        self.res2 = ResidualBlock(64, 128, stride=1)
        self.res3 = ResidualBlock(128, 256, stride=1)
        self.res4 = ResidualBlock(256, 512, stride=1)

        self.fc1 = nn.Linear(512 * 2 * 2, 1024)
        self.dropout = nn.Dropout(0.4)
        self.fc2 = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = self.pool4(F.relu(self.bn4(self.conv4(x))))
        x = self.pool5(F.relu(self.bn5(self.conv5(x))))

        x = x.view(-1, 512 * 7 * 7)

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Used device: {device}")


Używane urządzenie: cuda:0


In [None]:
num_classes = len(full_dataset.classes)
model = CNNModel(num_classes).to(device)

model_path = 'model.pth'
if os.path.exists(model_path):
    print(f"Loading existing model from {model_path}")
    model.load_state_dict(torch.load(model_path))
else:
    print("No existing model found. Initializing new model.")

<div style="font-size: 40px">
main training loop
</div>

In [13]:
num_classes = len(trainset.classes)
model = CNNModel(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

def train_model(model, trainloader, valloader, criterion, optimizer, scheduler, start_epoch=0, num_epochs=10):
    os.makedirs("models", exist_ok=True)

    model.train()

    training_losses = []
    training_accuracies = []
    val_losses = []
    val_accuracies = []

    for epoch in range(start_epoch, start_epoch + num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if i % 100 == 99:
                elapsed_time = time.time() - start_time
                accuracy = 100 * correct / total
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {running_loss/100:.3f}, Accuracy: {accuracy:.2f}%, Time: {elapsed_time:.2f}s')
                running_loss = 0.0

        print(f'Accuracy after Epoch {epoch+1}: {100 * correct / total:.2f}%')

train_model(model, trainloader, criterion, optimizer, num_epochs=5)

history = train_model(model, trainloader, valloader, criterion, optimizer, scheduler, start_epoch=50, num_epochs=20)

Accuracy after Epoch 1: 18.18% Loss: 0.096%
Accuracy after Epoch 2: 31.29% Loss: 0.079%
Accuracy after Epoch 2: 31.29% Loss: 0.079%
Accuracy after Epoch 3: 38.51% Loss: 0.070%
Accuracy after Epoch 3: 38.51% Loss: 0.070%
Accuracy after Epoch 4: 44.16% Loss: 0.064%
Accuracy after Epoch 4: 44.16% Loss: 0.064%
Accuracy after Epoch 5: 48.86% Loss: 0.058%
Accuracy after Epoch 5: 48.86% Loss: 0.058%
Accuracy after Epoch 6: 52.48% Loss: 0.054%
Accuracy after Epoch 6: 52.48% Loss: 0.054%
Accuracy after Epoch 7: 55.31% Loss: 0.050%
Accuracy after Epoch 7: 55.31% Loss: 0.050%
Accuracy after Epoch 8: 58.22% Loss: 0.047%
Accuracy after Epoch 8: 58.22% Loss: 0.047%
Accuracy after Epoch 9: 60.58% Loss: 0.044%
Accuracy after Epoch 9: 60.58% Loss: 0.044%
Accuracy after Epoch 10: 62.79% Loss: 0.042%
Accuracy after Epoch 10: 62.79% Loss: 0.042%
Accuracy after Epoch 11: 64.59% Loss: 0.039%
Accuracy after Epoch 11: 64.59% Loss: 0.039%
Accuracy after Epoch 12: 66.28% Loss: 0.037%
Accuracy after Epoch 12: 66

<div style="font-size: 40px">
plot training history
</div>

In [None]:
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history['train_losses'], 'b-', label='Training')
plt.plot(history['val_losses'], 'r-', label='Validation')
plt.title('Loss During Training')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history['train_accs'], 'b-', label='Training')
plt.plot(history['val_accs'], 'r-', label='Validation')
plt.title('Accuracy During Training')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.tight_layout()
plt.show()

<div style="font-size: 40px">
generate predictions
</div>

In [None]:
model.load_state_dict(torch.load('main.pth'))
model.eval()

test_dir = "dane/test_all/"
test_files = [f for f in os.listdir(test_dir)
              if f.lower().endswith(('.jpeg', '.jpg', '.png', '.JPEG'))]

print(f"Found {len(test_files)} test images")
print(f"Classes in training set: {full_dataset.classes}")

In [None]:

predictions = []
filenames = []

with torch.no_grad():
    for file in test_files:
        img_path = os.path.join(test_dir, file)
        image = Image.open(img_path).convert('RGB')
        image_tensor = test_transform(image).unsqueeze(0).to(device)

        output = model(image_tensor)
        _, pred = torch.max(output, 1)

        base_name = os.path.splitext(file)[0]
        filename_jpeg = f"{base_name}.JPEG"

        predictions.append(pred.item())
        filenames.append(filename_jpeg)

        if len(predictions) % 100 == 0:
            print(f"Processed {len(predictions)}/{len(test_files)} images")

In [None]:
with open('pred.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    for filename, pred in zip(filenames, predictions):
        writer.writerow([filename, pred])