In [5]:
# 9.1 AlexNet

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

class AlexNetCIFAR10(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNetCIFAR10, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = AlexNetCIFAR10().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10000 test images: {100 * correct / total} %')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:36<00:00, 4701446.00it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch [1/10], Loss: 1.7743


KeyboardInterrupt: 

The provided code defines and utilizes a PyTorch implementation of the AlexNet architecture tailored for classifying images from the CIFAR-10 dataset. Below is a detailed breakdown of the components and functionalities in the code.

 Imports and Setup

- torch: The main PyTorch library, providing support for tensors, dynamic computation graphs, and many utility functions for deep learning.
- torch.nn: A sub-library of PyTorch, providing a high-level API for building neural networks. It contains classes like `Module` (the base class for all neural network modules), and layers like `Conv2d`, `ReLU`, etc.
- torch.optim: This module includes optimization algorithms like SGD, Adam, etc., for training neural network models.
- torchvision: A package that provides access to popular datasets, model architectures, and image transformations for computer vision.
- DataLoader: A utility that provides an iterable over a dataset, with support for batching, sampling, shuffling, and multiprocess data loading.
- transforms: Provides common image transformations that can be composed together.

 AlexNetCIFAR10 Class

This class defines a modified version of the AlexNet model, adapted for the CIFAR-10 dataset. CIFAR-10 images are much smaller than the ImageNet images AlexNet was originally designed for.

- __init__ function: Initializes the model architecture.
  - The `features` block defines the convolutional base of the network, using `nn.Conv2d` for convolutional layers and `nn.ReLU` for activation functions, interspersed with `nn.MaxPool2d` layers for downsampling.
  - `self.avgpool` is an adaptive average pooling layer that resizes the output of the features block to a fixed size.
  - The `classifier` block consists of fully connected layers (`nn.Linear`), dropout layers (`nn.Dropout`) to reduce overfitting, and `ReLU` activations. The final layer outputs logits for the 10 CIFAR-10 classes.
- forward function: Defines the forward pass of the model. It sequentially applies the features block, adaptive pooling, and classifier block to the input tensor, and returns the output logits.

 Data Loading and Transformation

- transform: Composes several image transformations, including resizing the images to 224x224 pixels, converting them to tensors, and normalizing their pixel values.
- train_dataset and test_dataset: Load the CIFAR-10 training and testing datasets, applying the specified transformations.
- train_loader and test_loader: Wrap the datasets in DataLoader instances, batching and shuffling the training data.

 Training Setup

- device: Determines if a GPU is available and moves tensors to the GPU if so, for faster processing.
- model: An instance of the AlexNetCIFAR10 class, moved to the appropriate device.
- criterion: The loss function, `CrossEntropyLoss`, suitable for classification tasks.
- optimizer: The optimization algorithm, `Adam`, used for updating model weights.

 Training Loop

Iterates over epochs, where each epoch goes through the entire training dataset once. For each batch in the training data:
- Moves the images and labels to the appropriate device.
- Performs a forward pass, calculates the loss, performs a backward pass to compute gradients, and updates the model weights.

 Evaluation

- Sets the model to evaluation mode.
- Disables gradient computation to save memory and speed up computation.
- Iterates over the test dataset to calculate the total and correct predictions to compute the accuracy of the model on the test data.

 Output

- Prints the loss at the end of each training epoch.
- Prints the model's accuracy on the test dataset after training is complete.


In [3]:
# 9.2 VGG16
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Load CIFAR-10 data
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# Preprocess data: Normalize and Convert class vectors to binary class matrices.
train_images, test_images = train_images / 255.0, test_images / 255.0
train_labels = tf.keras.utils.to_categorical(train_labels, 10)
test_labels = tf.keras.utils.to_categorical(test_labels, 10)

# Because VGG16 expects input size of at least 224x224, we need to resize CIFAR-10 images.
# For a more efficient approach, consider designing a custom VGG-style model suited for 32x32 inputs.

def resize_images(images):
    return tf.image.resize(images, [224, 224])

train_images_resized = resize_images(train_images)
test_images_resized = resize_images(test_images)

# Load VGG16 model pre-trained on ImageNet without the top layer and freeze its layers.
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# we know that the filters starts with random number and the model keep changing it to get to the best filters
# in transfer learning the model has the best filter, so we are setting this to false so the model doesn't change it.
base_model.trainable = False

# Create a new model on top
model = models.Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax'),
])

model.compile(optimizer=Adam(lr=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Data augmentation
datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True)

# Fit the model
history = model.fit(datagen.flow(train_images_resized, train_labels, batch_size=64),
                    epochs=10,
                    validation_data=(test_images_resized, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images_resized, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)


ResourceExhaustedError: {{function_node __wrapped__ResizeBilinear_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[50000,224,224,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResizeBilinear] name: 

In [None]:
# 9.3 resnet


def build_resnet_model(input_shape, num_classes=3):
    input_layer = Input(shape=input_shape)
    resnet_model = tf.keras.applications.ResNet50(input_shape=input_shape[:2] + (3,), include_top=False)(input_layer)
    x = GlobalAveragePooling2D()(resnet_model)
    output_layer = Dense(num_classes, activation='softmax')(x)
    return Model(inputs=input_layer, outputs=output_layer)


In [None]:
# 9.4 densnet
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D, Multiply
from tensorflow.keras.models import Model
from tensorflow.keras import layers
def build_densnet(input_shape, num_classes,embed_dim=32):
    inputs = Input(input_shape)

    # DenseNet121 for Feature Extraction
    densenet = DenseNet121(include_top=False, input_tensor=inputs, weights='imagenet')
    densenet.trainable = False

    # Custom layers onto densenet
    x = densenet.output

    # Adding layers to process the output into a suitable form for classification
    x = GlobalAveragePooling2D()(x)
    x = Dense(embed_dim, activation='relu')(x)  # Embedding layer (optional, can adjust the size)
    x = Dense(num_classes, activation='softmax')(x)  # Output layer for classification

    model = Model(inputs, x)
    return model


In [None]:
# 9.5 Transfer Learning
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D, Multiply
from tensorflow.keras.models import Model
from tensorflow.keras import layers
class EdgeDetectionLayer(Layer):
    def __init__(self):
        super(EdgeDetectionLayer, self).__init__()
        # Sobel filters
        sobel_x = tf.constant([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=tf.float32)
        sobel_y = tf.constant([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype=tf.float32)
        sobel_x = tf.reshape(sobel_x, [3, 3, 1, 1])
        sobel_y = tf.reshape(sobel_y, [3, 3, 1, 1])
        self.sobel_filters = tf.concat([sobel_x, sobel_y], axis=-1)

    def call(self, inputs):
        gray = tf.image.rgb_to_grayscale(inputs)
        filtered = tf.nn.conv2d(gray, self.sobel_filters, strides=[1, 1, 1, 1], padding='SAME')
        edges = tf.reduce_sum(tf.abs(filtered), axis=-1, keepdims=True)
        return edges

def build_cnn_with_attentionandEdgesRes(input_shape, num_classes, embed_dim=32, num_heads=2, num_transformer_layers=2, ff_dim=32):

    inputs = Input(input_shape)

    # Apply Edge Detection
    edge_detector = EdgeDetectionLayer()
    edges = edge_detector(inputs)

    # Initialize DenseNet121
    densenet = DenseNet121(include_top=False, input_tensor=inputs, weights='imagenet', pooling=None)
    densenet.trainable = False

    # Select an intermediate layer from DenseNet121
    intermediate_layer = densenet.get_layer('conv5_block16_concat').output
    intermediate_layer_resized = Resizing(inputs.shape[1], inputs.shape[2])(intermediate_layer)

    # Concatenate edge features with intermediate DenseNet121 features
    concatenated = Concatenate()([intermediate_layer_resized, edges])

    # Additional custom layers
    x = Conv2D(64, (3, 3), padding='same')(concatenated)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Flatten()(x)

    # Transformer blocks (assuming transformer_encoder is defined)
    x = Dense(embed_dim)(x)
    for _ in range(num_transformer_layers):
        x = transformer_encoder(x, embed_dim, num_heads, ff_dim)

    # Classifier head
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model