In [11]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt

In [None]:
print("numpy version:", np.__version__)
print("torch version:", torch.__version__)
print("torchvision version:", torchvision.__version__)
DEVICE = "cuda" if torch.cuda.is_available() else 'cpu'
print("device:", DEVICE)

numpy version: 2.0.2
torch version: 2.9.0+cu126
torchvision version: 0.24.0+cu126
device: cuda


In [12]:
from pathlib import Path
import sys

PROJECT_ROOT = Path("/content/drive/MyDrive/cats-vs-dogs-cnn")

# Add project root to Python import path (so "import src..." works)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("cwd:", Path.cwd())
print("PROJECT_ROOT exists:", PROJECT_ROOT.exists())
print("sys.path[0]:", sys.path[0])

cwd: /content/drive/MyDrive/cats-vs-dogs-cnn
PROJECT_ROOT exists: True
sys.path[0]: /content/drive/MyDrive/cats-vs-dogs-cnn


In [13]:
# Test call (outside function, correct)
from src.utils.data_utils import get_paths_by_class

paths_by_class = get_paths_by_class(
    Path("/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/")
)

In [14]:
paths_by_class['cat'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/9104.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/9126.jpg')]

In [15]:
paths_by_class['dog'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/9075.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/9052.jpg')]

In [16]:
len(paths_by_class['cat']), len(paths_by_class['dog'])

(12500, 12500)

In [17]:
from src.utils.seed_utils import set_seed
set_seed(42)

In [20]:
from src.utils.sample_utils import sample_paths_by_class

In [21]:
sample_paths = sample_paths_by_class(
    paths_by_class=paths_by_class,
    sample_size=200,
    seed=42
)

In [22]:
sample_paths['cat'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/11846.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/8969.jpg')]

In [23]:
sample_paths['dog'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/11754.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/8923.jpg')]

In [24]:
len(sample_paths['cat']), len(sample_paths['dog'])

(200, 200)

In [94]:
from src.utils.split_utils import split_paths
from src.utils.combine_utils import combine_shuffle_paths

In [95]:
sample_cat = split_paths(sample_paths['cat'], (0.75, 0.10, 0.15))
sample_dog = split_paths(sample_paths['dog'], (0.75, 0.10, 0.15))

In [96]:
len(sample_cat['train']), len(sample_cat['val']), len(sample_cat['test'])

(150, 20, 30)

In [97]:
len(sample_dog['train']), len(sample_cat['val']), len(sample_cat['test'])

(150, 20, 30)

In [98]:
splits_by_class = {
    "cat": sample_cat,
    "dog": sample_dog
}

combined = combine_shuffle_paths(splits_by_class, seed=42)


In [99]:
print(len(combined["train"]))
print(len(combined["val"]))
print(len(combined["test"]))

300
40
60


In [100]:
combined["train"][:10]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/5771.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/9172.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/1808.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/9106.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/2773.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/3255.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/10497.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/7891.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/10111.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/8667.jpg')]

In [113]:
from src.utils.label_utils import build_labeled_splits

In [115]:
data_labeled = build_labeled_splits(combined)

In [116]:
data_labeled['train'][:5]

[(PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/5771.jpg'),
  0),
 (PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/9172.jpg'),
  1),
 (PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/1808.jpg'),
  0),
 (PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/9106.jpg'),
  0),
 (PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/2773.jpg'),
  0)]