In [23]:
import kagglehub

path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\Uporabnik\.cache\kagglehub\datasets\karakaggle\kaggle-cat-vs-dog-dataset\versions\1


In [None]:
import os
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
from torchvision.datasets.folder import IMG_EXTENSIONS
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np

# Albumentations transforms
basic_transform = A.Compose([
    A.Resize(64, 64),
    A.RandomCrop(64, 64),
    ToTensorV2()
])
advanced_transform = A.Compose([
    A.Resize(64, 64),
    A.RandomCrop(64, 64),
    A.HorizontalFlip(),
    A.RandomBrightnessContrast(),
    A.RandomRotate90(),
    A.HueSaturationValue(),
    ToTensorV2()
])
# Albumentations wrapper
class AlbumentationsTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, img):
        img = np.array(img)  # Pretvoriti iz PIL fotografije v NumPy array
        augmented = self.transform(image=img)
        return augmented["image"]
# Filtered ImageFolder da izločimo neveljavne datoteke
class FilteredImageFolder(ImageFolder):
    def __init__(self, root, transform=None, target_transform=None):
        super().__init__(root, transform, target_transform)
        self.samples = [
            (path, class_idx) for path, class_idx in self.samples
            if path.lower().endswith(tuple(IMG_EXTENSIONS))
        ]
        self.targets = [s[1] for s in self.samples]
class CombinedDataset(Dataset):
    def __init__(self, dataset_path, basic_transform, advanced_transform):
        self.image_folder = FilteredImageFolder(root=dataset_path)
        self.basic_transform = AlbumentationsTransform(basic_transform)
        self.advanced_transform = AlbumentationsTransform(advanced_transform)

    def __len__(self):
        return 2 * len(self.image_folder)

    def __getitem__(self, idx):
        original_idx = idx % len(self.image_folder)
        img, label = self.image_folder[original_idx]

        if idx < len(self.image_folder):
            img = self.basic_transform(img)
        else:
            img = self.advanced_transform(img)

        return img, label

dataset_path = os.path.join(
    'C:\\Users\\Uporabnik\\.cache\\kagglehub\\datasets\\karakaggle\\kaggle-cat-vs-dog-dataset\\versions\\1\\kagglecatsanddogs_3367a',
    'PetImages'
)

combined_dataset = CombinedDataset(dataset_path, basic_transform, advanced_transform)
train_size = int(0.9 * len(combined_dataset))
val_size = len(combined_dataset) - train_size
train_dataset, val_dataset = random_split(combined_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [32]:
import matplotlib.pyplot as plt
from torchvision.utils import make_grid

def show_image(image, label, classes):
    plt.imshow(image.permute(1, 2, 0))
    plt.title(classes[label])
    plt.show()
def show_images(dataloader, dataset):
    images, labels = next(iter(dataloader))
    grid = make_grid(images, nrow=8)
    plt.figure(figsize=(20, 20))
    plt.imshow(grid.permute(1, 2, 0))
    plt.title([dataset.image_folder.classes[i] for i in labels])
    plt.show()

# Fetch Batcha
train_features_batch, train_labels_batch = next(iter(train_dataloader))
print(train_features_batch.shape, train_labels_batch.shape)

#show_image(train_features_batch[0], train_labels_batch[0], combined_dataset.image_folder.classes)
#show_images(train_dataloader, combined_dataset)


torch.Size([32, 3, 64, 64]) torch.Size([32])


In [103]:
print(f"Length of train_dataloader is {len(train_dataloader)} with batches of size {train_dataloader.batch_size}")

Length of train_dataloader is 1404 with batches of size 32


In [104]:
from torch import nn
class DogOrCatModelV0(nn.Module):
    def __init__(self, 
                 input_shape: int, 
                 hidden_units: int, 
                 output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.Linear(in_features=hidden_units, out_features=output_shape)
        )
    def forward(self, x):
        return self.layer_stack(x)

In [105]:
flatten_model = nn.Flatten()
x = train_features_batch[0]
output = flatten_model(x)

x.shape, output.shape

(torch.Size([3, 64, 64]), torch.Size([3, 4096]))

In [106]:
torch.manual_seed(42)

#num_classes = combined_dataset.image_folder.classes
#print(f"Number of classes: {num_classes}")  # Should output 2

model_v0 = DogOrCatModelV0(
    input_shape=64*64*3, 
    hidden_units=22, 
    output_shape=len(combined_dataset.image_folder.classes)
    ).to("cpu")

In [107]:
# troubleshoot da vidim ce dimensions delajo
dummy_x = torch.randn([4, 3, 64, 64])
model_v0(dummy_x)

tensor([[-0.7286,  0.5637],
        [-0.5120,  0.6398],
        [-0.5710,  0.6762],
        [-0.5161,  0.5698]], grad_fn=<AddmmBackward0>)

In [108]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_v0.parameters(), lr=0.0001)

In [109]:
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc


In [110]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [111]:
from tqdm.auto import tqdm

torch.manual_seed(42)

epochs = 3

for epoch in tqdm(range(epochs)):
    print(f"Epoch {epoch}\n---------")
    train_loss = 0
    # X je image, y je label
    for batch, (X, y) in enumerate(train_dataloader): # Iti skozi vse batche
        model_v0.train()
        # Forward pass
        X = X.float()
        y_pred = model_v0(X)
        # Loss
        loss = loss_fn(y_pred, y)
        train_loss += loss # Dodaj loss v train_loss
        # Optimizer zero grad
        optimizer.zero_grad()
        # Loss backward
        loss.backward()
        # Optimizer step
        optimizer.step()
        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")
    
    train_loss /= len(train_dataloader)

    # Testiranje modela
    test_loss, test_acc = 0, 0
    model_v0.eval()
    with torch.inference_mode():
        for X_test, y_test in val_dataloader:
            # Forward pass
            X_test = X_test.float()
            test_pred = model_v0(X_test)
            # Akumulirana kalkulacia lossa 
            test_loss += loss_fn(test_pred, y_test)
            # Kalkulacija točnosti
            test_acc += accuracy_fn(y_true=y_test, y_pred=test_pred.argmax(dim=1))
        # Loss avg za batch
        test_loss /= len(val_dataloader)
        # Točnost avg za batch
        test_acc /= len(val_dataloader)
    
    print(f"\nEpoch: {epoch}, Training Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f},Test Accuracy: {test_acc:.4f}")

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch 0
---------
Looked at 0/44926 samples




Looked at 12800/44926 samples
Looked at 25600/44926 samples
Looked at 38400/44926 samples


 33%|███▎      | 1/3 [03:06<06:13, 186.91s/it]


Epoch: 0, Training Loss: 5.7820, Test Loss: 2.7756,Test Accuracy: 54.1667
Epoch 1
---------
Looked at 0/44926 samples
Looked at 12800/44926 samples
Looked at 25600/44926 samples
Looked at 38400/44926 samples


 67%|██████▋   | 2/3 [07:10<03:40, 220.46s/it]


Epoch: 1, Training Loss: 2.5564, Test Loss: 1.4374,Test Accuracy: 56.8309
Epoch 2
---------
Looked at 0/44926 samples
Looked at 12800/44926 samples
Looked at 25600/44926 samples
Looked at 38400/44926 samples


100%|██████████| 3/3 [11:00<00:00, 220.14s/it]


Epoch: 2, Training Loss: 1.2028, Test Loss: 0.8053,Test Accuracy: 54.6074



