# Data Augmentation Pipeline for Computer Vision

This notebook demonstrates advanced data augmentation techniques for image classification on NVIDIA GPUs.

**Run in Google Colab:**[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Rishav-raj-github/End-to-End-Computer-Vision-Pipeline-EfficientNet-on-NVIDIA-GPUs/blob/main/colab_notebooks/01_Data_Augmentation_Colab.ipynb)

In [None]:
# Mount Google Drive (if needed)
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# Install required dependencies
!pip install -q torch torchvision timm albumentations tensorboard

In [None]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

print(f'PyTorch version: {torch.__version__}')
print(f'CUDA Available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
class DataAugmentationPipeline:
    """Advanced data augmentation with RandAugment, Mixup, CutMix"""
    
    def __init__(self, augmentation_strength=9):
        self.augmentation_strength = augmentation_strength
        self.setup_transforms()
    
    def setup_transforms(self):
        self.train_transforms = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2),
            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        
        self.val_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    
    def get_train_loader(self, batch_size=32, num_workers=2):
        dataset = CIFAR10(root='./data', train=True, download=True, transform=self.train_transforms)
        return DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    
    def get_val_loader(self, batch_size=32, num_workers=2):
        dataset = CIFAR10(root='./data', train=False, download=True, transform=self.val_transforms)
        return DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [None]:
# Initialize pipeline and load data
augmentation = DataAugmentationPipeline()
train_loader = augmentation.get_train_loader(batch_size=64)

print(f'Training samples: {len(train_loader.dataset)}')
print(f'Batch size: {train_loader.batch_size}')
print(f'Number of batches: {len(train_loader)}')

In [None]:
# Visualize augmented images
def visualize_batch(loader, num_images=8):
    images, labels = next(iter(loader))
    fig, axes = plt.subplots(2, 4, figsize=(12, 6))
    for i, ax in enumerate(axes.flat):
        img = images[i].permute(1, 2, 0).numpy()
        img = (img + 1) / 2  # denormalize
        ax.imshow(np.clip(img, 0, 1))
        ax.set_title(f'Label: {labels[i].item()}')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

visualize_batch(train_loader)