### Data Normalization

In [None]:

import os
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# Directory Paths
AUGMENT_DIR = r"C:\Users\USER\Documents\Thesis Dataset\Processed Dataset\train_augmented"
VAL_DIR     = r"C:\Users\USER\Documents\Thesis Dataset\Processed Dataset\val"
TEST_DIR    = r"C:\Users\USER\Documents\Thesis Dataset\Processed Dataset\test"
IMG_EXTENSIONS = (".jpg", ".jpeg", ".png")

# Parameters for Normalization
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
IMG_SIZE = 224

# Data Transfroms
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomRotation(15),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

val_test_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

# Printing the Datasets and its classes
class PlantDiseaseDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_to_idx = {}
        
        class_idx = 0
        for crop_type in sorted(os.listdir(data_dir)):
            crop_path = os.path.join(data_dir, crop_type)
            if not os.path.isdir(crop_path):
                continue
                
            for disease_class in sorted(os.listdir(crop_path)):
                class_path = os.path.join(crop_path, disease_class)
                if not os.path.isdir(class_path):
                    continue
                
                full_class_name = f"{crop_type}_{disease_class}"
                if full_class_name not in self.class_to_idx:
                    self.class_to_idx[full_class_name] = class_idx
                    class_idx += 1
                
                for img_file in os.listdir(class_path):
                    if img_file.lower().endswith(IMG_EXTENSIONS):
                        img_path = os.path.join(class_path, img_file)
                        self.images.append(img_path)
                        self.labels.append(self.class_to_idx[full_class_name])
        
        print(f"Dataset loaded: {len(self.images)} images, {len(self.class_to_idx)} classes")
        print(f"Classes: {list(self.class_to_idx.keys())}")
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# Normalization Phase
print("🔄 Creating normalized datasets...")

train_dataset = PlantDiseaseDataset(AUGMENT_DIR, transform=train_transforms)
val_dataset = PlantDiseaseDataset(VAL_DIR, transform=val_test_transforms)
test_dataset = PlantDiseaseDataset(TEST_DIR, transform=val_test_transforms)

BATCH_SIZE = 32

train_loader = DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    num_workers=4,
    pin_memory=True if torch.cuda.is_available() else False
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=4,
    pin_memory=True if torch.cuda.is_available() else False
)

test_loader = DataLoader(
    test_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=4,
    pin_memory=True if torch.cuda.is_available() else False
)

print("✅ Normalization setup complete!")
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Number of classes: {len(train_dataset.class_to_idx)}")
print(f"Batch size: {BATCH_SIZE}")

# Verify whether the normalization works
def verify_normalization():
    """Verify that normalization is applied correctly"""
    sample_batch = next(iter(train_loader))
    images, labels = sample_batch
    
    print("\n📊 Normalization Verification:")
    print(f"Image tensor shape: {images.shape}")
    print(f"Image tensor range: [{images.min():.3f}, {images.max():.3f}]")
    print(f"Mean per channel: {images.mean(dim=[0,2,3])}")
    print(f"Std per channel: {images.std(dim=[0,2,3])}")
    print("✅ If values are close to mean≈[0,0,0] and std≈[1,1,1], normalization is working!")

verify_normalization()


🔄 Creating normalized datasets...
Dataset loaded: 66156 images, 18 classes
Classes: ['Banana_Leaf_Disease_Dataset_Bangladesh_Healthy', 'Banana_Leaf_Disease_Dataset_Bangladesh_Panama Disease', 'Banana_Leaf_Disease_Dataset_Bangladesh_Yellow and Black Sigatoka', 'Banana_Leaf_Disease_Dataset_Bangladesh_cordana', 'Banana_Leaf_Disease_Dataset_Bangladesh_pestalotiopsis', 'Coconut Tree Disease Dataset_Bud Root Dropping', 'Coconut Tree Disease Dataset_Bud Rot', 'Coconut Tree Disease Dataset_Gray Leaf Spot', 'Coconut Tree Disease Dataset_Healthy_Leaves', 'Coconut Tree Disease Dataset_Leaf Rot', 'Coconut Tree Disease Dataset_WCLWD_DryingofLeaflets', 'Coconut Tree Disease Dataset_WCLWD_Flaccidity', 'Coconut Tree Disease Dataset_WCLWD_Yellowing', 'Sugarcane Leaf Disease Dataset_Healthy', 'Sugarcane Leaf Disease Dataset_Mosaic', 'Sugarcane Leaf Disease Dataset_RedRot', 'Sugarcane Leaf Disease Dataset_Rust', 'Sugarcane Leaf Disease Dataset_Yellow']
Dataset loaded: 2365 images, 18 classes
Classes: ['B

### Train CNN-ViT