<a href="https://colab.research.google.com/github/Gokul160704/Training/blob/main/chapter_appendix-tools-for-deep-learning/jupyter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
TRAIN_DIR = "/content/drive/MyDrive/Gender/Training"
VAL_DIR   = "/content/drive/MyDrive/Gender/Validation"
print(TRAIN_DIR, VAL_DIR)

/content/drive/MyDrive/Gender/Training /content/drive/MyDrive/Gender/Validation


In [2]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

IMG_SIZE = 224
BATCH = 16 if device.type == "cpu" else 32
NUM_WORKERS = 0 if device.type == "cpu" else 2

train_tf = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

eval_tf = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.15)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

train_ds = datasets.ImageFolder(TRAIN_DIR, transform=train_tf)
val_ds   = datasets.ImageFolder(VAL_DIR,   transform=eval_tf)

# sanity check: class order must match across splits
assert train_ds.classes == val_ds.classes, f"class mismatch: {train_ds.classes} vs {val_ds.classes}"
print("Classes:", train_ds.classes)
print("Train images:", len(train_ds), "| Val images:", len(val_ds))

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))
val_loader   = DataLoader(val_ds,   batch_size=BATCH, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))


Using device: cuda
Classes: ['female', 'male']
Train images: 6148 | Val images: 1084


In [3]:
from pathlib import Path
import shutil, random

TRAIN_DIR = Path("/content/drive/MyDrive/Gender/Training")
VAL_DIR   = Path("/content/drive/MyDrive/Gender/Validation")

valid_ext = {".jpg",".jpeg",".png",".webp",".jfif",".bmp",".tif",".tiff",".ppm",".pgm"}

def list_imgs(p: Path):
    return [x for x in p.iterdir() if x.is_file() and x.suffix.lower() in valid_ext]

def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

# Make sure subfolders exist
for cls in ["male","female"]:
    ensure_dir(TRAIN_DIR/cls)
    ensure_dir(VAL_DIR/cls)

# Count current files
counts = {}
for split, root in [("train", TRAIN_DIR), ("val", VAL_DIR)]:
    for cls in ["male","female"]:
        counts[(split,cls)] = len(list_imgs(root/cls))

print("Before:")
print({k:v for k,v in counts.items()})

# If a validation class is empty, move 15% (or at least 100) from training
random.seed(42)
for cls in ["male","female"]:
    val_cls = VAL_DIR/cls
    train_cls = TRAIN_DIR/cls
    val_files = list_imgs(val_cls)
    train_files = list_imgs(train_cls)
    if len(val_files) == 0 and len(train_files) > 0:
        move_n = max(100, int(0.15 * len(train_files)))
        random.shuffle(train_files)
        to_move = train_files[:move_n]
        for p in to_move:
            shutil.move(str(p), str(val_cls/p.name))
        print(f"Moved {len(to_move)} {cls} images from Training → Validation.")

# Show new counts
for split, root in [("train", TRAIN_DIR), ("val", VAL_DIR)]:
    for cls in ["male","female"]:
        counts[(split,cls)] = len(list_imgs(root/cls))

print("After:")
print({k:v for k,v in counts.items()})


Before:
{('train', 'male'): 3176, ('train', 'female'): 2972, ('val', 'male'): 560, ('val', 'female'): 524}
After:
{('train', 'male'): 3176, ('train', 'female'): 2972, ('val', 'male'): 560, ('val', 'female'): 524}


In [4]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

IMG_SIZE = 224
BATCH = 16 if device.type == "cpu" else 32
NUM_WORKERS = 0 if device.type == "cpu" else 2

train_tf = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

eval_tf = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.15)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

def valid_file(x):
    x=x.lower()
    return x.endswith((".jpg",".jpeg",".png",".webp",".jfif",".bmp",".tif",".tiff",".ppm",".pgm"))

train_ds = datasets.ImageFolder(TRAIN_DIR, transform=train_tf, is_valid_file=valid_file)
val_ds   = datasets.ImageFolder(VAL_DIR,   transform=eval_tf,   is_valid_file=valid_file)

assert train_ds.classes == val_ds.classes, f"class mismatch: {train_ds.classes} vs {val_ds.classes}"
print("Classes:", train_ds.classes)
print("Train images:", len(train_ds), "| Val images:", len(val_ds))

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))
val_loader   = DataLoader(val_ds,   batch_size=BATCH, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))



Using device: cuda
Classes: ['female', 'male']
Train images: 6148 | Val images: 1084


In [5]:
import torch, torch.nn as nn, torch.optim as optim
from torchvision.models import mobilenet_v3_large
from tqdm.auto import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model
model = mobilenet_v3_large(weights="DEFAULT")
in_features = model.classifier[-1].in_features
model.classifier[-1] = nn.Linear(in_features, 2)        # ['female','male']
model = model.to(device)

# Loss/optim/scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.05)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

EPOCHS = 12
best_val_acc, best_path = 0.0, "/content/gender_mobilenetv3.pth"
scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))

def acc(logits, y): return (logits.argmax(1)==y).float().mean().item()

for epoch in range(1, EPOCHS+1):
    # ---- train ----
    model.train()
    tr_loss = tr_acc = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS} • train"):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(device.type=="cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        tr_loss += loss.item()*xb.size(0)
        tr_acc  += acc(logits.detach(), yb)*xb.size(0)

    # ---- validate ----
    model.eval()
    val_loss = val_acc = 0.0
    with torch.no_grad(), torch.cuda.amp.autocast(enabled=(device.type=="cuda")):
        for xb, yb in tqdm(val_loader, desc=f"Epoch {epoch}/{EPOCHS} • val"):
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = criterion(logits, yb)
            val_loss += loss.item()*xb.size(0)
            val_acc  += acc(logits, yb)*xb.size(0)

    tr_loss /= len(train_ds); tr_acc /= len(train_ds)
    val_loss /= len(val_ds);   val_acc /= len(val_ds)
    scheduler.step()
    print(f"Epoch {epoch}: train {tr_loss:.4f}/{tr_acc:.4f} | val {val_loss:.4f}/{val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({"model": model.state_dict(), "classes": train_ds.classes}, best_path)
        print(f"✅ Saved best to {best_path} (val_acc={val_acc:.4f})")



  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))


Epoch 1/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast(enabled=(device.type=="cuda")):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(device.type=="cuda")):


Epoch 1/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Epoch 1: train 0.2099/0.9188 | val 0.1691/0.9428
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9428)


Epoch 2/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 2/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 2: train 0.1168/0.9571 | val 0.1311/0.9493
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9493)


Epoch 3/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 3/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 3: train 0.0754/0.9727 | val 0.1606/0.9345


Epoch 4/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Epoch 4/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 4: train 0.0667/0.9766 | val 0.1172/0.9566
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9566)


Epoch 5/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
Exception ignored in:   File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>self._shutdown_workers()

Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
        if w.is_alive():
self._shutdown_workers() 
   File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
       if w.is_alive():
    ^ ^^  ^ ^^ ^^^^^^^^^^
^  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
^^    assert self._parent_pid == os.getpid(), 'can only test a child process'^^
^^^
  File "/usr/lib/python

Epoch 5/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Exception ignored in: AssertionError<function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>: can only test a child process
Traceback (most recent call last):

  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()Exception ignored in: 
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/d

Epoch 5: train 0.0413/0.9852 | val 0.1437/0.9576
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9576)


Epoch 6/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 6/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 6: train 0.0361/0.9868 | val 0.1084/0.9649
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9649)


Epoch 7/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 7/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 7: train 0.0243/0.9912 | val 0.1164/0.9677
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9677)


Epoch 8/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    Exception ignored in: self._shutdown_workers()
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360

Epoch 8/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360><function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    Exception ignored in:     self._shutdown_workers()<function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>self._shutdown_workers()


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
            self._shutdown_worke

Epoch 8: train 0.0177/0.9935 | val 0.1283/0.9659


Epoch 9/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 9/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 9: train 0.0142/0.9954 | val 0.1182/0.9686
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9686)


Epoch 10/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 10/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 10: train 0.0066/0.9987 | val 0.1177/0.9714
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9714)


Epoch 11/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 11/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 11: train 0.0076/0.9979 | val 0.1186/0.9714


Epoch 12/12 • train:   0%|          | 0/193 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c7dc0e28360>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Epoch 12/12 • val:   0%|          | 0/34 [00:00<?, ?it/s]

Epoch 12: train 0.0097/0.9967 | val 0.1187/0.9742
✅ Saved best to /content/gender_mobilenetv3.pth (val_acc=0.9742)


In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
