<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/_Technology_(Deep_Learning_CNN_Optimizations_and_Transfer_Learning).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.optim.lr_scheduler import CosineAnnealingLR

# Data augmentation with RandomErasing
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3))  # Random Erasing augmentation
])

train_data = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)

# Use a pre-trained model and replace the final layer
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 10)  # Adjust for CIFAR-10's 10 classes

# Move model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set up AdamW optimizer, CosineAnnealingLR, and CrossEntropyLoss
optimizer = optim.AdamW(model.parameters(), lr=0.001)
scheduler = CosineAnnealingLR(optimizer, T_max=10)  # Cosine Annealing over all epochs
criterion = nn.CrossEntropyLoss()

# Adjust GradScaler and autocast for CPU compatibility
if device.type == "cuda":
    from torch.cuda.amp import GradScaler, autocast
    scaler = GradScaler(device_type="cuda")  # Specify device for GradScaler
else:
    from torch.amp import GradScaler, autocast
    scaler = GradScaler()  # Use default CPU scaler

# Training settings
num_epochs = 10
accumulation_steps = 4  # For gradient accumulation

# Training loop with mixed precision and gradient accumulation
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        # Enable mixed precision with autocast
        with autocast(device_type=device.type):
            outputs = model(images)
            loss = criterion(outputs, labels) / accumulation_steps  # Scale loss for accumulation
        scaler.scale(loss).backward()  # Scale gradients

        # Update weights after every `accumulation_steps` batches
        if (i + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()  # Reset gradients

        # Collect stats for monitoring progress
        running_loss += loss.item() * accumulation_steps  # Scale back loss
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    scheduler.step()  # Adjust learning rate based on the scheduler
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

print("Training completed!")