In [30]:
import kagglehub

path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\Uporabnik\.cache\kagglehub\datasets\karakaggle\kaggle-cat-vs-dog-dataset\versions\1


In [31]:
import os
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
from torchvision.datasets.folder import IMG_EXTENSIONS
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import warnings

# Albumentations transforms
basic_transform = A.Compose([
    A.Resize(64, 64),
    A.RandomCrop(64, 64),
    ToTensorV2()
])
advanced_transform = A.Compose([
    A.Resize(64, 64),
    A.RandomCrop(64, 64),
    A.HorizontalFlip(),
    A.RandomBrightnessContrast(),
    A.RandomRotate90(),
    A.HueSaturationValue(),
    ToTensorV2()
])

# Albumentations wrapper
class AlbumentationsTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, img):
        img = np.array(img)  # Convert from PIL image to NumPy array
        augmented = self.transform(image=img)
        return augmented["image"]

# Filtered ImageFolder to handle invalid files
class FilteredImageFolder(ImageFolder):
    def __init__(self, root, transform=None, target_transform=None):
        super().__init__(root, transform, target_transform)
        valid_samples = []
        for path, class_idx in self.samples:
            try:
                # Try opening the image to check for validity
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")  # Suppress warnings
                    img = Image.open(path)
                    img.verify()  # Verify image integrity
                valid_samples.append((path, class_idx))
            except (IOError, SyntaxError, ValueError) as e:
                print(f"Invalid image file {path}: {e}")
        self.samples = valid_samples
        self.targets = [s[1] for s in self.samples]

# Combined dataset for basic and advanced transforms
class CombinedDataset(Dataset):
    def __init__(self, dataset_path, basic_transform, advanced_transform):
        self.image_folder = FilteredImageFolder(root=dataset_path)
        self.basic_transform = AlbumentationsTransform(basic_transform)
        self.advanced_transform = AlbumentationsTransform(advanced_transform)

    def __len__(self):
        return 2 * len(self.image_folder)

    def __getitem__(self, idx):
        original_idx = idx % len(self.image_folder)
        img, label = self.image_folder[original_idx]

        if idx < len(self.image_folder):
            img = self.basic_transform(img)
        else:
            img = self.advanced_transform(img)

        return img, label

# Dataset path
dataset_path = os.path.join(
    'C:\\Users\\Uporabnik\\.cache\\kagglehub\\datasets\\karakaggle\\kaggle-cat-vs-dog-dataset\\versions\\1\\kagglecatsanddogs_3367a',
    'PetImages'
)

# Create datasets and dataloaders
combined_dataset = CombinedDataset(dataset_path, basic_transform, advanced_transform)
train_size = int(0.9 * len(combined_dataset))
val_size = len(combined_dataset) - train_size
train_dataset, val_dataset = random_split(combined_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
# show img and imgs
import matplotlib.pyplot as plt
from torchvision.utils import make_grid

def show_image(image, label, classes):
    plt.imshow(image.permute(1, 2, 0))
    plt.title(classes[label])
    plt.show()
def show_images(dataloader, dataset):
    images, labels = next(iter(dataloader))
    grid = make_grid(images, nrow=8)
    plt.figure(figsize=(20, 20))
    plt.imshow(grid.permute(1, 2, 0))
    plt.title([dataset.image_folder.classes[i] for i in labels])
    plt.show()

# Fetch Batcha
train_features_batch, train_labels_batch = next(iter(train_dataloader))
print(train_features_batch.shape, train_labels_batch.shape)

#show_image(train_features_batch[0], train_labels_batch[0], combined_dataset.image_folder.classes)
#show_images(train_dataloader, combined_dataset)


torch.Size([32, 3, 64, 64]) torch.Size([32])


In [33]:
print(f"Length of train_dataloader is {len(train_dataloader)} with batches of size {train_dataloader.batch_size}")

Length of train_dataloader is 1404 with batches of size 32


In [104]:
# Model

from torch import nn
import torch
class DogOrCatModelV0(nn.Module):
    def __init__(self, 
                 input_shape: int, 
                 hidden_units: int, 
                 output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            #nn.BatchNorm2d(hidden_units),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(hidden_units), # samo normalizira, ne spreminja oblike. Ni nekaj pomembnega, lahko izpustimo samo I think da bi bli podatki slabši oz. tak je razloženo, nimam časa naštudirati njegove podrobnosti
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        second_hidden_units = hidden_units * 2
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=second_hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            #nn.BatchNorm2d(second_hidden_units),
            nn.ReLU(),
            nn.Conv2d(in_channels=second_hidden_units, 
                      out_channels=second_hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(second_hidden_units), 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        third_hidden_units = hidden_units * 4
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=second_hidden_units, 
                      out_channels=third_hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            #nn.BatchNorm2d(third_hidden_units),
            nn.ReLU(),
            nn.Conv2d(in_channels=third_hidden_units, 
                      out_channels=third_hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(third_hidden_units),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # Fully connected layer; in_features is calculated dynamically
            nn.Linear(in_features=hidden_units * 4 * 8 * 8,# na začetku dat na hidden_units*0, da najdemo napake kar se tiče dimenzij, potem pa spremenimo
                      out_features=128), 
            nn.Tanh(),
            nn.Linear(128, output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        #print(f"E: {x.shape}")
        x = self.block_2(x)
        #print(f"E: {x.shape}")
        x = self.block_3(x)
        #print(f"E: {x.shape}")
        x = self.classifier(x)
        return x


In [121]:
torch.manual_seed(42)
model = DogOrCatModelV0(input_shape=3, hidden_units=16, output_shape=2)

In [122]:
# Test to see if it works
torch.manual_seed(42)

images = torch.randn(size=(1, 3, 64, 64))
# model(images) # zagon in izpis modela

In [123]:
# Funkcija za izračun točnosti
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [124]:
# Loss in optimizacija

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

In [125]:
# Cuda, če je na voljo

import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
#device # izpis naprave

In [126]:
from torch.utils.tensorboard import SummaryWriter
import os

# Set the log directory for TensorBoard
log_dir = os.path.join(os.getcwd(), 'runs')  # Full path to the 'runs' folder
writer = SummaryWriter(log_dir=log_dir)

In [127]:
# train and test step

def train_step(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device,
               epoch: int = 0):
    train_loss, train_acc = 0, 0
    model.train()
    for batch, (X, y) in enumerate(data_loader): # Iti skozi vse batche
        X = X.float()
        # Podatke na napravo
        X, y = X.to(device), y.to(device)

        # Forward pass
        y_pred = model(X)
        
        # Loss
        loss = loss_fn(y_pred, y)
         # Dodaj loss v train_loss
        train_loss += loss.item()
        # Kalkulacija točnosti
        train_acc += accuracy_fn(y_true=y, 
                                y_pred=y_pred.argmax(dim=1))

        # Optimizer zero grad
        optimizer.zero_grad()

        # Loss backward
        loss.backward()

        # Optimizer step
        optimizer.step()
    
    # Loss avg za batch
    train_loss /= len(val_dataloader)
    writer.add_scalar("Loss/train", train_loss, epoch)
    # Točnost avg za batch
    train_acc /= len(val_dataloader)
    
    print(f"\nTrain loss: {train_loss:.5f} | Training Acc: {train_acc:.2f}%")


def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device,
              epoch: int = 0):
    test_loss, test_acc = 0, 0
    model.to(device)
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            X = X.float()
            X, y = X.to(device), y.to(device)

            test_pred = model(X)

            test_loss += loss_fn(test_pred, y).item()
            test_acc += accuracy_fn(y_true=y, 
                                    y_pred=test_pred.argmax(dim=1))
        
        test_loss /= len(data_loader)
        writer.add_scalar("Loss/test", test_loss, epoch)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}%")

In [128]:
# save and load model
def save_checkpoint(state, filename="checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)

def load_checkpoint(checkpoint):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"], strict=False)
    optimizer.load_state_dict(checkpoint["optimizer"])

In [129]:
import torch
from timeit import default_timer as timer
from tqdm.auto import tqdm

# Set the random seed for reproducibility
torch.manual_seed(42)

# Training setup
train_time_start = timer()
epochs = 1
load_model = False

if load_model:
    try:
        checkpoint = torch.load("checkpoint_9_1.pth.tar")
        load_checkpoint(checkpoint)
    except FileNotFoundError:
        print("Checkpoint file not found. Starting from scratch.")
else:
    # Training loop
    for epoch in tqdm(range(epochs)):
        print(f"Epoch {epoch}\n-------------------------------")

        # Perform training step and get the training loss
        train_loss = train_step(
            model=model, 
            data_loader=train_dataloader, 
            loss_fn=loss_fn,
            optimizer=optimizer,
            accuracy_fn=accuracy_fn,
            device=device,
            epoch=epoch
        )
        
        # Perform testing step and get the test loss
        test_loss = test_step(
            model=model,
            data_loader=val_dataloader,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn,
            device=device,
            epoch=epoch
        )
        
        checkpoint = {
            'state_dict': model.state_dict(), 
            'optimizer': optimizer.state_dict()
        }
        if (epoch+1) % 1 == 0:
            save_checkpoint(checkpoint, filename=f"checkpoint_{epoch+1}.pth.tar")
        
        writer.flush()

    # Calculate and print training time
    train_time_end = timer()
    print(f"Training time: {train_time_end - train_time_start:.2f}s")
    writer.close()

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 0
-------------------------------

Train loss: 6.42745 | Training Acc: 488.95%


100%|██████████| 1/1 [07:08<00:00, 428.51s/it]

Test loss: 0.67016 | Test Acc: 58.57%
=> Saving checkpoint


100%|██████████| 1/1 [07:08<00:00, 428.51s/it]

Training time: 428.50s





In [130]:
# Evaluate model
torch.manual_seed(42)
def eval_model(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               accuracy_fn):
    """Returns a dictionary containing the results of model predicting on data_loader.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            # Make predictions with the model
            X = X.float()
            y_pred = model(X)
            
            # Accumulate the loss and accuracy values per batch
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, 
                                y_pred=y_pred.argmax(dim=1)) # For accuracy, need the prediction labels (logits -> pred_prob -> pred_labels)
        
        # Scale loss and acc to find the average loss/acc per batch
        loss /= len(data_loader)
        acc /= len(data_loader)
        
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}

In [131]:
# Calculate model results on test dataset
model_0_results = eval_model(model=model, data_loader=val_dataloader,
    loss_fn=loss_fn, accuracy_fn=accuracy_fn
)
model_0_results

{'model_name': 'DogOrCatModelV0',
 'model_loss': 0.668354332447052,
 'model_acc': 59.39503205128205}

In [135]:
import torchvision
from torchvision import transforms

image_path = "test_imgs/11.jpg"
image = torchvision.io.read_image(image_path).type(torch.float32) / 255.0

min_dim = min(image.shape[1], image.shape[2])  # image.shape is (C, H, W)
image_transform = transforms.Compose([
    transforms.CenterCrop(min_dim),  # Crop to the smallest dimension (1:1 ratio)
    transforms.Resize((64, 64))      # Resize to 64x64
])
image = image_transform(image)

with torch.inference_mode():
    custom_image_prediction = model(image.unsqueeze(0).to(device)) # Add batch dimension, make sure it's on the right device

"""
# show the image with the label and no grid
def show_image(image, label, classes):
    plt.imshow(image.permute(1, 2, 0))
    plt.title(classes[label])
    plt.axis("off")
    plt.show()

# Show the image with the prediction
show_image(image, custom_image_prediction.argmax(dim=1).item(), combined_dataset.image_folder.classes)
"""

custom_image_prediction


tensor([[1.0694, 0.4352]])