In [None]:
DATA_DIR = "/data/input/hagrid-sample-30k-384p"

In [None]:
import os
import random
import shutil
import torch
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import numpy as np

In [None]:
SOURCE_DIR = "/kaggle/input/hagrid-sample-30k-384p/hagrid-sample-30k-384p/hagrid_30k"
TARGET_DIR = "/kaggle/working/hagrid_split"

TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

random.seed(42)

In [None]:
for split in ["train", "val", "test"]:
    for cls in os.listdir(SOURCE_DIR):
        os.makedirs(os.path.join(TARGET_DIR, split, cls), exist_ok=True)

In [None]:
for cls in os.listdir(SOURCE_DIR):
    cls_path = os.path.join(SOURCE_DIR, cls)
    images = [
        f for f in os.listdir(cls_path)
        if f.lower().endswith((".jpg", ".png", ".jpeg"))
    ]

    random.shuffle(images)

    total = len(images)
    train_end = int(total * TRAIN_RATIO)
    val_end = int(total * (TRAIN_RATIO + VAL_RATIO))

    train_imgs = images[:train_end]
    val_imgs = images[train_end:val_end]
    test_imgs = images[val_end:]

    for img in train_imgs:
        shutil.copy(
            os.path.join(cls_path, img),
            os.path.join(TARGET_DIR, "train", cls, img)
        )

    for img in val_imgs:
        shutil.copy(
            os.path.join(cls_path, img),
            os.path.join(TARGET_DIR, "val", cls, img)
        )

    for img in test_imgs:
        shutil.copy(
            os.path.join(cls_path, img),
            os.path.join(TARGET_DIR, "test", cls, img)
        )

    print(f"{cls}: train={len(train_imgs)}, val={len(val_imgs)}, test={len(test_imgs)}")

In [None]:
def count_images(path):
    return sum(
        len(files)
        for _, _, files in os.walk(path)
        if files
    )

print("Train images:", count_images(os.path.join(TARGET_DIR, "train")))
print("Validation images:", count_images(os.path.join(TARGET_DIR, "val")))
print("Test images:", count_images(os.path.join(TARGET_DIR, "test")))

In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [None]:
from torchvision import datasets

In [None]:
DATA_DIR = "/kaggle/working/hagrid_split"

BATCH_SIZE = 16
NUM_WORKERS = 2
IMG_SIZE = 224

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
val_test_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
train_dataset = datasets.ImageFolder(
    root=f"{DATA_DIR}/train",
    transform=train_transforms
)

val_dataset = datasets.ImageFolder(
    root=f"{DATA_DIR}/val",
    transform=val_test_transforms
)

test_dataset = datasets.ImageFolder(
    root=f"{DATA_DIR}/test",
    transform=val_test_transforms
)

In [None]:
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))
print(len(train_dataset.classes))
print(train_dataset.classes)

In [None]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS
)

In [None]:
print("Classes:", train_dataset.classes)
print("Number of classes:", len(train_dataset.classes))

images, labels = next(iter(train_loader))
print("Batch shape:", images.shape)
print("Labels shape:", labels.shape)