In [1]:
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
!unzip data.zip

--2025-12-09 01:48:58--  https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/405934815/e712cf72-f851-44e0-9c05-e711624af985?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-12-09T02%3A46%3A03Z&rscd=attachment%3B+filename%3Ddata.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-12-09T01%3A45%3A50Z&ske=2025-12-09T02%3A46%3A03Z&sks=b&skv=2018-11-09&sig=ownIC3NEHsVbdi3RqKmJGys3oZyet7LhE3wt69egjJQ%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc2NTI0NjczOCwibmJmIjoxNzY1MjQ0OTM4LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5ibG

In [8]:
!ls data

test  train


In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import statistics

import numpy as np
import torch

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [16]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3), stride=1, padding=0)
        self.pool  = nn.MaxPool2d(kernel_size=(2,2))

        flattened_size = 32 * 99 * 99  # after conv+pool from 200x200

        self.fc1 = nn.Linear(flattened_size, 64)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x


In [17]:
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

train_dir = "./data/train"
test_dir  = "./data/test"

full_train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
test_dataset       = datasets.ImageFolder(test_dir,  transform=train_transforms)

train_size = int(0.8 * len(full_train_dataset))
val_size   = len(full_train_dataset) - train_size
train_dataset, validation_dataset = random_split(full_train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True,  num_workers=2)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=2)


In [18]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': [],
           'test_acc': [], 'test_loss': []}

for epoch in range(num_epochs):
    # ---- TRAIN ----
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.float().unsqueeze(1).to(device)  # (batch,1)

        optimizer.zero_grad()
        outputs = model(images)  # logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train

    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for images, labels in validation_loader:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)

            outputs = model(images)  # logits
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val

    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    test_running_loss = 0.0
    correct_test = 0
    total_test = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)

            outputs = model(images)  # logits
            loss = criterion(outputs, labels)

            test_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct_test += (predicted == labels).sum().item()
            total_test += labels.size(0)

    test_epoch_loss = test_running_loss / len(test_dataset)
    test_epoch_acc = correct_test / total_test

    history['test_loss'].append(test_epoch_loss)
    history['test_acc'].append(test_epoch_acc)

    print(
        f"Epoch {epoch+1}/{num_epochs} | "
        f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f} | "
        f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f} | "
        f"Test Loss: {test_epoch_loss:.4f}, Test Acc: {test_epoch_acc:.4f}"
    )

median_train_acc = statistics.median(history['acc'])
std_train_loss   = statistics.pstdev(history['loss'])
mean_test_loss   = statistics.mean(history['test_loss'])
avg_last5_test_acc = statistics.mean(history['test_acc'][-5:])

print("\n=== FINAL METRICS ===")
print("Train acc per epoch:", history['acc'])
print("Train loss per epoch:", history['loss'])
print("Test acc per epoch :", history['test_acc'])
print("Test loss per epoch:", history['test_loss'])

print("\nQ3 median train acc:", median_train_acc)
print("Q4 std train loss:", std_train_loss)
print("Q5 mean test loss:", mean_test_loss)
print("Q6 avg test acc last 5 epochs:", avg_last5_test_acc)


Epoch 1/10 | Train Loss: 0.6488, Train Acc: 0.6547 | Val Loss: 0.6079, Val Acc: 0.6687 | Test Loss: 0.6071, Test Acc: 0.6318
Epoch 2/10 | Train Loss: 0.5513, Train Acc: 0.7203 | Val Loss: 0.5983, Val Acc: 0.6937 | Test Loss: 0.5914, Test Acc: 0.6468
Epoch 3/10 | Train Loss: 0.5266, Train Acc: 0.7078 | Val Loss: 0.6057, Val Acc: 0.7125 | Test Loss: 0.5895, Test Acc: 0.6617
Epoch 4/10 | Train Loss: 0.4792, Train Acc: 0.7594 | Val Loss: 0.6405, Val Acc: 0.6937 | Test Loss: 0.6565, Test Acc: 0.6070
Epoch 5/10 | Train Loss: 0.4326, Train Acc: 0.7906 | Val Loss: 0.6521, Val Acc: 0.6937 | Test Loss: 0.6463, Test Acc: 0.6716
Epoch 6/10 | Train Loss: 0.3668, Train Acc: 0.8422 | Val Loss: 0.6288, Val Acc: 0.7000 | Test Loss: 0.6257, Test Acc: 0.6816
Epoch 7/10 | Train Loss: 0.3133, Train Acc: 0.8641 | Val Loss: 0.8562, Val Acc: 0.6937 | Test Loss: 0.8437, Test Acc: 0.6418
Epoch 8/10 | Train Loss: 0.3350, Train Acc: 0.8438 | Val Loss: 0.5827, Val Acc: 0.7063 | Test Loss: 0.6199, Test Acc: 0.6716
