## Model Architecture and Training 

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt


In [None]:

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value='random')
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

# **Load CIFAR-10 dataset**
batch_size = 128
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# **Training-Validation Split**
train_size = int(0.9 * len(trainset))  # 90% Train, 10% Validation
val_size = len(trainset) - train_size
train_subset, val_subset = random_split(trainset, [train_size, val_size])

trainloader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
valloader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)


In [None]:
class AdvancedCNN(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(AdvancedCNN, self).__init__()

        # First Conv Block
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(2, 2)

        # Second Conv Block (Added Extra Layer)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv3_extra = nn.Conv2d(128, 128, kernel_size=3, padding=1)  # Extra Layer
        self.bn3_extra = nn.BatchNorm2d(128)
        self.pool2 = nn.MaxPool2d(2, 2)

        # Third Conv Block (Added Extra Layer)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(256)
        self.conv5_extra = nn.Conv2d(256, 256, kernel_size=3, padding=1)  # Extra Layer
        self.bn5_extra = nn.BatchNorm2d(256)
        self.pool3 = nn.MaxPool2d(2, 2)

        # Fully Connected Layers
        self.fc1 = nn.Linear(256 * 4 * 4, 512)
        self.bn_fc1 = nn.BatchNorm1d(512)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(F.relu(self.bn2(self.conv2(x))))

        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn3_extra(self.conv3_extra(x)))  # Extra Layer
        x = self.pool2(x)

        x = F.relu(self.bn5(self.conv5(x)))
        x = F.relu(self.bn5_extra(self.conv5_extra(x)))  # Extra Layer
        x = self.pool3(x)

        x = x.view(-1, 256 * 4 * 4)
        x = F.relu(self.bn_fc1(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)

        return x


In [None]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = AdvancedCNN().to(device)
print(model)

# **Loss Function, Optimizer & Learning Rate Scheduler**
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=75)


# Training
def train_model(model, optimizer, scheduler, num_epochs=75, patience=5):
    best_val_loss = float("inf")
    counter = 0  # Early stopping counter
    train_losses, val_losses, train_acc, val_acc = [], [], [], []

    for epoch in range(num_epochs):
        model.train()
        correct, total, running_loss = 0, 0, 0.0

        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_losses.append(running_loss / len(trainloader))
        train_acc.append(100 * correct / total)

        # **Validation Step**
        model.eval()
        correct, total, val_loss = 0, 0, 0.0
        with torch.no_grad():
            for images, labels in valloader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_losses.append(val_loss / len(valloader))
        val_acc.append(100 * correct / total)
        scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_acc[-1]:.2f}%, Val Loss: {val_losses[-1]:.4f}, Val Acc: {val_acc[-1]:.2f}%")

        # **Early Stopping Logic**
        if val_losses[-1] < best_val_loss:
            best_val_loss = val_losses[-1]
            counter = 0
            torch.save(model.state_dict(), "best_model.pth")  # Save the best model
        else:
            counter += 1
            if counter >= patience:
                print(f" Early stopping triggered at epoch {epoch+1}.")
                break  # Stop training

    return train_losses, train_acc, val_losses, val_acc


# **Train the Model & Get Metrics**
train_losses, train_acc, val_losses, val_acc = train_model(model, optimizer, scheduler)

# **Load Best Model Before Testing**
model.load_state_dict(torch.load("best_model.pth"))

# **Test Accuracy Evaluation**
correct, total = 0, 0
model.eval()
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f" Final Test Accuracy: {test_accuracy:.2f}%")

# **Plot Training & Validation Curves**
def plot_metrics(train_losses, val_losses, train_acc, val_acc):
    epochs = range(1, len(train_losses) + 1)

    # **Loss Plot**
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label="Train Loss")
    plt.plot(epochs, val_losses, label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training & Validation Loss")
    plt.legend()

    # **Accuracy Plot**
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_acc, label="Train Accuracy")
    plt.plot(epochs, val_acc, label="Validation Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy (%)")
    plt.title("Training & Validation Accuracy")
    plt.legend()

    plt.show()

# **Generate Plots**
plot_metrics(train_losses, val_losses, train_acc, val_acc)


## Test

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import requests
from io import BytesIO
from dotenv import load_dotenv

load_dotenv()

# Define CIFAR-10 classes
cifar10_classes = [
    "airplane", "automobile", "bird", "cat", "deer", 
    "dog", "frog", "horse", "ship", "truck"
]

model = AdvancedCNN()  # Instantiate the model
model.load_state_dict(torch.load("state_of_the_art.pth", map_location=torch.device('cpu')))  # Load weights
model.eval()  # Set to evaluation mode
# Generate an image using OpenAI's API (DALL·E)
from openai import OpenAI

openai_api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

# Choose a random class
import random
chosen_class = random.choice(cifar10_classes)

# Generate an image using DALL·E

response = client.images.generate(
    model="dall-e-3",
    # prompt="a white siamese cat",
    prompt=f"An abstract, surreal, and visually ambiguous 32x32 pixel image inspired by CIFAR-10 classes. The image should contain overlapping features from multiple CIFAR-10 categories, such as a bird perched on a truck, or a cat blending into an airplane silhouette. The style should be distorted, blurred, or in an unusual artistic style that makes classification non-trivial.",
    size="1024x1024",
    quality="standard",
    n=1,
)


image_url = response.data[0].url
image = Image.open(BytesIO(requests.get(image_url).content))

# Preprocess the image (resize, normalize)
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize to CIFAR-10 size
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])  # CIFAR-10 normalization
])

input_tensor = transform(image).unsqueeze(0)  # Add batch dimension

# Predict using the model
with torch.no_grad():
    output = model(input_tensor)
    predicted_class_idx = output.argmax(dim=1).item()
    predicted_class = cifar10_classes[predicted_class_idx]

# Show Results
print(f"Generated Image Class: {chosen_class}")
print(f"Model Predicted Class: {predicted_class}")

# Display the generated image
image.show()


Generated Image Class: bird
Model Predicted Class: airplane
