In [3]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, WeightedRandomSampler
import numpy as np
import os


TRAIN_DIR = r"C:\Users\alann\OneDrive\Desktop\plantdisease\dataset\images\train"

VAL_DIR = r"C:\Users\alann\OneDrive\Desktop\plantdisease\dataset\images\val" 


norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]


train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std)
])


val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std)
])

In [4]:
# Load the dataset
if os.path.exists(VAL_DIR):
    train_dataset = datasets.ImageFolder(TRAIN_DIR, transform=train_transforms)
    val_dataset = datasets.ImageFolder(VAL_DIR, transform=val_transforms)
else:
    # Split training data 80/20 if no val folder exists
    full_dataset = datasets.ImageFolder(TRAIN_DIR)
    train_size = int(0.8 * len(full_dataset))
    val_size = len(full_dataset) - train_size
    train_indices, val_indices = torch.utils.data.random_split(
        range(len(full_dataset)), [train_size, val_size]
    )
    
    # Create datasets with specific transforms
    train_dataset = Subset(datasets.ImageFolder(TRAIN_DIR, transform=train_transforms), train_indices)
    val_dataset = Subset(datasets.ImageFolder(TRAIN_DIR, transform=val_transforms), val_indices)

print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

Train samples: 43444
Validation samples: 10861


In [5]:
def create_sampler(dataset):
    # Get targets (labels) depending on whether it's a Subset or full ImageFolder
    if isinstance(dataset, Subset):
        targets = np.array(dataset.dataset.targets)[dataset.indices]
    else:
        targets = np.array(dataset.targets)
    
    # Calculate weight for each class
    class_sample_count = np.array([len(np.where(targets == t)[0]) for t in np.unique(targets)])
    weight = 1. / class_sample_count
    samples_weight = torch.from_numpy(np.array([weight[t] for t in targets]))
    
    return WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight))

train_sampler = create_sampler(train_dataset)

In [6]:

train_loader = DataLoader(
    train_dataset, 
    batch_size=32, 
    sampler=train_sampler, 
    num_workers=2, 
    pin_memory=True
)


val_loader = DataLoader(
    val_dataset, 
    batch_size=32, 
    shuffle=False, 
    num_workers=2, 
    pin_memory=True
)


images, labels = next(iter(train_loader))
print(f"Batch shape: {images.shape}")

  super().__init__(loader)


Batch shape: torch.Size([32, 3, 224, 224])
