In [27]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt

In [28]:
print("numpy version:", np.__version__)
print("torch version:", torch.__version__)
print("torchvision version:", torchvision.__version__)
DEVICE = "cuda" if torch.cuda.is_available() else 'cpu'
print("device:", DEVICE)

numpy version: 2.0.2
torch version: 2.9.0+cu126
torchvision version: 0.24.0+cu126
device: cuda


In [78]:
from pathlib import Path
import sys

PROJECT_ROOT = Path("/content/drive/MyDrive/cats-vs-dogs-cnn")

# Add project root to Python import path (so "import src..." works)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("cwd:", Path.cwd())
print("PROJECT_ROOT exists:", PROJECT_ROOT.exists())
print("sys.path[0]:", sys.path[0])

cwd: /content/drive/MyDrive/cats-vs-dogs-cnn/src/utils
PROJECT_ROOT exists: True
sys.path[0]: /content/drive/MyDrive/cats-vs-dogs-cnn


In [99]:
# Test call (outside function, correct)
from src.utils.data_utils import get_paths_by_class

paths_by_class = get_paths_by_class(
    Path("/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/")
)

In [100]:
paths_by_class['cat'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/9104.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/9126.jpg')]

In [101]:
paths_by_class['dog'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/9075.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/9052.jpg')]

In [102]:
len(paths_by_class['cat']), len(paths_by_class['dog'])

(12500, 12500)

In [94]:
from src.utils.seed_utils import set_seed
set_seed(42)

In [121]:
from pathlib import Path
import random
from typing import Optional



def sample_list(
    paths: list[Path],
    sample_size: Optional[int],
    seed: int = 42
) -> list[Path]:

  if not isinstance(paths, list):
      raise TypeError("paths must be a list of Path objects")

  if not paths:
      raise ValueError("paths list is empty")

  if sample_size is None:
      return paths

  if not isinstance(sample_size, int):
      raise TypeError(f"sample_size must be int or None; got {type(sample_size)}")

  if sample_size < 0:
      raise ValueError(f"sample_size must be non-negative; got {sample_size}")

  if sample_size > len(paths):
      raise ValueError(
          f"sample_size ({sample_size}) exceeds number of paths ({len(paths)})"
      )

  rng = random.Random(seed)
  return rng.sample(paths, sample_size)


def sample_paths_by_class(
    paths_by_class: dict[str, list[Path]],
    sample_size: Optional[int],
    seed: int = 42
) -> dict[str, list[Path]]:

  if not isinstance(paths_by_class, dict):
      raise TypeError("paths_by_class must be a dict")

  sampled = {}

  for class_name, paths in paths_by_class.items():
    sampled[class_name] = sample_list(
        paths=paths,
        sample_size=sample_size,
        seed=seed
    )

  return sampled

In [122]:
sample_paths = sample_paths_by_class(
    paths_by_class=paths_by_class,
    sample_size=200,
    seed=42
)

In [124]:
sample_paths['cat'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/11846.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat/8969.jpg')]

In [125]:
sample_paths['dog'][:2]

[PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/11754.jpg'),
 PosixPath('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog/8923.jpg')]

In [126]:
len(sample_paths['cat']), len(sample_paths['dog'])

(200, 200)

In [132]:
!git config --global user.email "sh.sevada@gmail.com"
!git config --global user.name "Sevada Shaghoolian"

In [138]:
!git status

On branch main
Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   ../../notebooks/00_project_setup.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")
