In [24]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt

In [34]:
print("numpy version:", np.__version__)
print("torch version:", torch.__version__)
print("torchvision version:", torchvision.__version__)
DEVICE = "cuda" if torch.cuda.is_available() else 'cpu'
print("device:", DEVICE)

numpy version: 2.0.2
torch version: 2.9.0+cu126
torchvision version: 0.24.0+cu126
device: cuda


In [86]:
!pwd

/content/drive/MyDrive/cats-vs-dogs-cnn


In [95]:
from pathlib import Path

dataset_path = Path('/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/')

cat_dir = dataset_path / 'Cat'
dog_dir = dataset_path / 'Dog'

print(cat_dir)
print(dog_dir)

/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Cat
/content/drive/MyDrive/cats-vs-dogs-cnn/datasets/PetImages/Dog


In [96]:
print(dataset_path.exists())
print(cat_dir.exists())
print(dog_dir.exists())

True
True
True


In [98]:
# check file types in directories

from collections import Counter

file_types = Counter()

for img in cat_dir.rglob('*'):
  if img.is_file():
    file_types[img.suffix.lower()] += 1

for img in dog_dir.rglob('*'):
  if img.is_file():
    file_types[img.suffix.lower()] += 1

print(file_types)


Counter({'.jpg': 25000, '.db': 2})


In [99]:
# collecting image paths

def collect_image_paths(img_dir):
  img_paths = []
  for img in img_dir.rglob('*'):
    if img.is_file() and img.suffix.lower() == '.jpg':
      img_paths.append(img)
  return img_paths

cat_paths = collect_image_paths(cat_dir)
dog_paths = collect_image_paths(dog_dir)

print(len(cat_paths))
print(len(dog_paths))

12500
12500


In [108]:
# collect sample datasets
import random

SEED = 42
sample_size = 200
rng = random.Random(SEED)

cat_sample = rng.sample(cat_paths, sample_size)
dog_sample = rng.sample(dog_paths, sample_size)

print(len(cat_sample))
print(len(dog_sample))

200
200


In [117]:
# train set, validation set, test set

train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.1

def split_data(data, train_ratio, validation_ratio, test_ratio, SEED=42):
  data_shuffeld = list(data)
  rng = random.Random(SEED)
  rng.shuffle(data_shuffeld)

  train_size = int(train_ratio * len(data_shuffeld))
  validation_size = int(validation_ratio * len(data_shuffeld))

  train_data = data_shuffeld[:train_size]
  validation_data = data_shuffeld[train_size:train_size+validation_size]
  test_data = data_shuffeld[train_size+validation_size:]

  return train_data, validation_data, test_data

cat_train_sample, cat_validation_sample, cat_test_sample = split_data(cat_sample, train_ratio, validation_ratio, test_ratio)
dog_train_sample, dog_validation_sample, dog_test_sample = split_data(dog_sample, train_ratio, validation_ratio, test_ratio)

print("CATS  :", len(cat_train_sample), len(cat_validation_sample), len(cat_test_sample))
print("DOGS  :", len(dog_train_sample), len(dog_validation_sample), len(dog_test_sample))


CATS  : 150 30 20
DOGS  : 150 30 20


In [118]:
train_paths = list(cat_train_sample) + list(dog_train_sample)
val_paths   = list(cat_validation_sample) + list(dog_validation_sample)
test_paths  = list(cat_test_sample) + list(dog_test_sample)

SEED = 42
rng = random.Random(SEED)

rng.shuffle(train_paths)
rng.shuffle(val_paths)
rng.shuffle(test_paths)

print("train/val/test sizes:", len(train_paths), len(val_paths), len(test_paths))

train/val/test sizes: 300 60 40


In [119]:
# Label mapping (explicit) ---
class_to_idx = {"cat": 0, "dog": 1}
idx_to_class = {v: k for k, v in class_to_idx.items()}

print(idx_to_class)

{0: 'cat', 1: 'dog'}


In [121]:
# Assumes your paths look like .../Cat/xxx.jpg or .../Dog/yyy.jpg
def label_from_path(p: Path) -> int:
    folder = p.parent.name.lower()  # ("Cat"/"Dog")
    if folder not in class_to_idx:
        raise ValueError(f"Unexpected class folder '{p.parent.name}' for file: {p}")
    return class_to_idx[folder]

for p in train_paths[:5]:
    p = Path(p)
    print(p.name, "->", label_from_path(p), "(", idx_to_class[label_from_path(p)], ")")

10103.jpg -> 0 ( cat )
10180.jpg -> 1 ( dog )
9794.jpg -> 0 ( cat )
2877.jpg -> 0 ( cat )
11457.jpg -> 0 ( cat )


SyntaxError: invalid syntax (ipython-input-1949294663.py, line 1)