In [23]:
import kagglehub

path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\Uporabnik\.cache\kagglehub\datasets\karakaggle\kaggle-cat-vs-dog-dataset\versions\1


In [None]:
import os
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
from torchvision.datasets.folder import IMG_EXTENSIONS
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np

# Albumentations transforms
basic_transform = A.Compose([
    A.Resize(64, 64),
    A.RandomCrop(64, 64),
    ToTensorV2()
])
advanced_transform = A.Compose([
    A.Resize(64, 64),
    A.RandomCrop(64, 64),
    A.HorizontalFlip(),
    A.RandomBrightnessContrast(),
    A.RandomRotate90(),
    A.HueSaturationValue(),
    ToTensorV2()
])
# Albumentations wrapper
class AlbumentationsTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, img):
        img = np.array(img)  # Pretvoriti iz PIL fotografije v NumPy array
        augmented = self.transform(image=img)
        return augmented["image"]
# Filtered ImageFolder da izločimo neveljavne datoteke
class FilteredImageFolder(ImageFolder):
    def __init__(self, root, transform=None, target_transform=None):
        super().__init__(root, transform, target_transform)
        self.samples = [
            (path, class_idx) for path, class_idx in self.samples
            if path.lower().endswith(tuple(IMG_EXTENSIONS))
        ]
        self.targets = [s[1] for s in self.samples]
class CombinedDataset(Dataset):
    def __init__(self, dataset_path, basic_transform, advanced_transform):
        self.image_folder = FilteredImageFolder(root=dataset_path)
        self.basic_transform = AlbumentationsTransform(basic_transform)
        self.advanced_transform = AlbumentationsTransform(advanced_transform)

    def __len__(self):
        return 2 * len(self.image_folder)

    def __getitem__(self, idx):
        original_idx = idx % len(self.image_folder)
        img, label = self.image_folder[original_idx]

        if idx < len(self.image_folder):
            img = self.basic_transform(img)
        else:
            img = self.advanced_transform(img)

        return img, label

dataset_path = os.path.join(
    'C:\\Users\\Uporabnik\\.cache\\kagglehub\\datasets\\karakaggle\\kaggle-cat-vs-dog-dataset\\versions\\1\\kagglecatsanddogs_3367a',
    'PetImages'
)

combined_dataset = CombinedDataset(dataset_path, basic_transform, advanced_transform)
train_size = int(0.9 * len(combined_dataset))
val_size = len(combined_dataset) - train_size
train_dataset, val_dataset = random_split(combined_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [32]:
import matplotlib.pyplot as plt
from torchvision.utils import make_grid

def show_image(image, label, classes):
    plt.imshow(image.permute(1, 2, 0))
    plt.title(classes[label])
    plt.show()
def show_images(dataloader, dataset):
    images, labels = next(iter(dataloader))
    grid = make_grid(images, nrow=8)
    plt.figure(figsize=(20, 20))
    plt.imshow(grid.permute(1, 2, 0))
    plt.title([dataset.image_folder.classes[i] for i in labels])
    plt.show()

# Fetch Batcha
train_features_batch, train_labels_batch = next(iter(train_dataloader))
print(train_features_batch.shape, train_labels_batch.shape)

#show_image(train_features_batch[0], train_labels_batch[0], combined_dataset.image_folder.classes)
#show_images(train_dataloader, combined_dataset)


torch.Size([32, 3, 64, 64]) torch.Size([32])


In [103]:
print(f"Length of train_dataloader is {len(train_dataloader)} with batches of size {train_dataloader.batch_size}")

Length of train_dataloader is 1404 with batches of size 32


In [210]:
from torch import nn
class DogOrCatModelV0(nn.Module):
    def __init__(self, 
                 input_shape: int, 
                 hidden_units: int, 
                 output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(hidden_units), # samo normalizira, ne spreminja oblike. Ni nekaj pomembnega, lahko izpustimo samo I think da bi bli podatki slabši oz. tak je razloženo, nimam časa naštudirati njegove podrobnosti
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        second_hidden_units = hidden_units * 2
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=second_hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=second_hidden_units, 
                      out_channels=second_hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(second_hidden_units), 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        third_hidden_units = hidden_units * 4
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=second_hidden_units, 
                      out_channels=third_hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=third_hidden_units, 
                      out_channels=third_hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(third_hidden_units),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # Fully connected layer; in_features is calculated dynamically
            nn.Linear(in_features=hidden_units * 4 * 8 * 8,# na začetku dat na hidden_units*0, da najdemo napake kar se tiče dimenzij, potem pa spremenimo
                      out_features=128), 
            nn.Tanh(),
            nn.Linear(128, output_shape),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        print(f"E: {x.shape}")
        x = self.block_2(x)
        print(f"E: {x.shape}")
        x = self.block_3(x)
        print(f"E: {x.shape}")
        x = self.classifier(x)
        return x

In [211]:
torch.manual_seed(42)
model = DogOrCatModelV0(input_shape=3, hidden_units=16, output_shape=2)

In [212]:
torch.manual_seed(42)

images = torch.randn(size=(1, 3, 64, 64))
model(images)

E: torch.Size([1, 16, 32, 32])
E: torch.Size([1, 32, 16, 16])
E: torch.Size([1, 64, 8, 8])


tensor([[0.3888, 0.6112]], grad_fn=<SoftmaxBackward0>)

In [123]:
torch.manual_seed(42)

#num_classes = combined_dataset.image_folder.classes
#print(f"Number of classes: {num_classes}")  # Should output 2

model_v0 = DogOrCatModelV0(
    input_shape=64*64*3, 
    hidden_units=10, 
    output_shape=len(combined_dataset.image_folder.classes)
    ).to("cpu")

In [124]:
# troubleshoot da vidim ce dimensions delajo
dummy_x = torch.randn([4, 3, 64, 64])
model_v0(dummy_x)

tensor([[0.0000, 0.1484],
        [0.0655, 0.0047],
        [0.0000, 0.0000],
        [0.0475, 0.1204]], grad_fn=<ReluBackward0>)

In [129]:
#from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(model_v0.parameters(), lr=0.0001)
optimizer = torch.optim.SGD(params=model_v0.parameters(), 
                            lr=0.0001)

In [130]:
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc


In [131]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [138]:
def train_step(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    train_loss, train_acc = 0, 0
    model.train()
    for batch, (X, y) in enumerate(data_loader): # Iti skozi vse batche
        X = X.float()
        # Podatke na napravo
        X, y = X.to(device), y.to(device)

        # Forward pass
        y_pred = model(X)
        
        # Loss
        loss = loss_fn(y_pred, y)
        train_loss += loss # Dodaj loss v train_loss
        # Kalkulacija točnosti
        train_acc += accuracy_fn(y_true=y, 
                                y_pred=y_pred.argmax(dim=1))

        # Optimizer zero grad
        optimizer.zero_grad()

        # Loss backward
        loss.backward()

        # Optimizer step
        optimizer.step()
    
    # Loss avg za batch
    train_loss /= len(val_dataloader)
    # Točnost avg za batch
    train_acc /= len(val_dataloader)
    
    print(f"\nTrain loss: {train_loss:.5f} | Training Acc: {train_acc:.2f}%")


def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0
    model.to(device)
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            X = X.float()
            X, y = X.to(device), y.to(device)

            test_pred = model(X)

            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y, 
                                    y_pred=test_pred.argmax(dim=1))
        
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}%")
  

In [139]:
torch.manual_seed(42)
from timeit import default_timer as timer
train_time_start = timer()

epochs = 3

from tqdm.auto import tqdm
for epoch in tqdm(range(epochs)):
    print(f"Epoch {epoch + 1}\n-------------------------------")
    train_step(model=model_v0, 
               data_loader=train_dataloader, 
               loss_fn=loss_fn,
               optimizer=optimizer,
               accuracy_fn=accuracy_fn,
               device=device)
    test_step(model=model_v0,
              data_loader=val_dataloader,
              loss_fn=loss_fn,
              accuracy_fn=accuracy_fn,
              device=device)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch 1
-------------------------------

Train loss: 6.23844 | Training Acc: 450.12%


 33%|███▎      | 1/3 [02:25<04:51, 145.53s/it]

Test loss: 0.69315 | Test Acc: 50.30%
Epoch 2
-------------------------------

Train loss: 6.23844 | Training Acc: 450.12%


 67%|██████▋   | 2/3 [06:25<03:20, 200.88s/it]

Test loss: 0.69315 | Test Acc: 50.30%
Epoch 3
-------------------------------

Train loss: 6.23844 | Training Acc: 450.12%


100%|██████████| 3/3 [10:55<00:00, 218.65s/it]

Test loss: 0.69315 | Test Acc: 50.30%



