# Understanding Pooling and Padding in CNN

# Pooling in CNN
"Pooling in CNN"
=="Pooling is used in CNN to reduce the spatial dimensions of the input volume, which decreases the number of parameters and computations in the network, and controls overfitting."

# Difference between min pooling and max pooling
"Max pooling takes the maximum value from the patch of the feature map covered by the filter, while average pooling takes the average value.

# Padding in CNN
"Padding in CNN"==
Padding is used to control the spatial dimensions of the output volume. It helps preserve the spatial dimensions of the input after convolution, which is useful in deeper networks.

# Zero-padding vs Valid-padding
Zero-padding adds zeros around the input volume, maintaining the input size after convolution. Valid-padding (no padding) reduces the input size after convolution."

# Exploring LeNet

# LeNet-5 Overview
"LeNet-5 Overview==
=LeNet-5, developed by Yann LeCun, is a pioneering convolutional neural network architecture designed for handwritten digit recognition. It consists of 7 layers including convolutional layers, subsampling layers, and fully connected layers.
# Key components of LeNet-5
"Key components of LeNet-5"
print("1. Convolutional layers: Extract spatial features from the input image.")
"2. Subsampling layers (average pooling): Reduce the spatial dimensions and the number of parameters."
"3. Fully connected layers: Perform classification based on extracted features."
"Advantages: Efficient for small images, early demonstration of CNN effectiveness.
"Limitations: Not suitable for large images, limited by hardware of its time."

import tensorflow as tf
from tensorflow.keras import datasets, layers, models

# Load and preprocess the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0

# Reshape data to add a single channel
train_images = train_images.reshape((train_images.shape[0], 28, 28, 1))
test_images = test_images.reshape((test_images.shape[0], 28, 28, 1))

# Implement LeNet-5
lenet_model = models.Sequential([
    layers.Conv2D(6, (5, 5), activation='tanh', input_shape=(28, 28, 1)),
    layers.AveragePooling2D(),
    layers.Conv2D(16, (5, 5), activation='tanh'),
    layers.AveragePooling2D(),
    layers.Flatten(),
    layers.Dense(120, activation='tanh'),
    layers.Dense(84, activation='tanh'),
    layers.Dense(10, activation='softmax')
])

# Compile and train the model
lenet_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
lenet_model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = lenet_model.evaluate(test_images, test_labels)
print("LeNet-5 Test Accuracy:", test_acc)

# Analyzing AlexNet

# AlexNet Overview
print("AlexNet Overview")
print("AlexNet, developed by Alex Krizhevsky, won the ImageNet Large Scale Visual Recognition Challenge in 2012. It has 8 layers: 5 convolutional layers followed by 3 fully connected layers.")

# Innovations in AlexNet
print("Innovations in AlexNet")
print("1. Use of ReLU activation function for faster training.")
print("2. Overlapping max pooling for better generalization.")
print("3. Dropout layers to reduce overfitting.")
print("4. Data augmentation for improved model robustness.")
print("Convolutional layers extract features from input images, pooling layers reduce spatial dimensions, and fully connected layers perform classification.")

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Define AlexNet architecture
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Set up data transformations
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize and train AlexNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AlexNet(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):  # training for 10 epochs
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch + 1}/10], Loss: {running_loss/len(train_loader)}")

# Evaluate AlexNet
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print("AlexNet Test Accuracy:", 100 * correct / total)
