# Implementation with Keras
Keras simplifies CNN implementation, enabling preprocessing and building convolutional layers efficiently.
Example Workflow:
Load and preprocess images.
Define convolutional layers using Conv2D.
Train the model to automatically learn optimal filters for specific tasks.

In [1]:
from sklearn.datasets import load_sample_images
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, CenterCrop, Rescaling

# Load sample images
images = load_sample_images()["images"]

# Preprocess images
images = tf.keras.layers.CenterCrop(height=70, width=120)(images)
images = tf.keras.layers.Rescaling(scale=1 / 255)(images)

# Define a convolutional layer
conv_layer = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu")

# Apply the layer to the images
output = conv_layer(images)
print("Output shape:", output.shape)


Output shape: (2, 70, 120, 32)


 # filters
  convolutional layers, and their implementation in Keras provides a strong foundation for understanding Convolutional Neural Networks (CNNs)
## Keras Implementation
Loading and Preprocessing Images:

In [2]:
from sklearn.datasets import load_sample_images
import tensorflow as tf

images = load_sample_images()["images"]
images = tf.keras.layers.CenterCrop(height=70, width=120)(images)
images = tf.keras.layers.Rescaling(scale=1/255)(images)


Creating a Conv2D Layer:

In [3]:
conv_layer = tf.keras.layers.Conv2D(filters=32, kernel_size=7)
fmaps = conv_layer(images)
print(fmaps.shape)  # TensorShape([2, 64, 114, 32])


(2, 64, 114, 32)


With "same" Padding:

In [4]:
conv_layer = tf.keras.layers.Conv2D(filters=32, kernel_size=7, padding="same")
fmaps = conv_layer(images)
print(fmaps.shape)  # TensorShape([2, 70, 120, 32])


(2, 70, 120, 32)


# Weights and Biases:
Access weights and biases:

In [5]:
kernels, biases = conv_layer.get_weights()
print(kernels.shape)  # (7, 7, 3, 32)
print(biases.shape)   # (32,)


(7, 7, 3, 32)
(32,)


# Average Pooling

In [6]:
# Average Pooling Layer
avg_pool = tf.keras.layers.AveragePooling2D(pool_size=2)


# Global Average Pooling

In [7]:
# Global Average Pooling Layer
global_avg_pool = tf.keras.layers.GlobalAvgPool2D()

# Alternatively:
global_avg_pool = tf.keras.layers.Lambda(
    lambda X: tf.reduce_mean(X, axis=[1, 2])
)


# Custom Depthwise Pooling

In [8]:
class DepthPool(tf.keras.layers.Layer):
    def __init__(self, pool_size=2, **kwargs):
        super().__init__(**kwargs)
        self.pool_size = pool_size

    def call(self, inputs):
        shape = tf.shape(inputs)
        groups = shape[-1] // self.pool_size
        new_shape = tf.concat([shape[:-1], [groups, self.pool_size]], axis=0)
        return tf.reduce_max(tf.reshape(inputs, new_shape), axis=-1)


# Basic CNN Example (Fashion MNIST)

In [9]:
from functools import partial
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding="same", activation="relu", kernel_initializer="he_normal")

model = tf.keras.Sequential([
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28, 28, 1]),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation="softmax")
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Key Features:

Uses DefaultConv2D with pre-set hyperparameters for convenience.
Filters double after each pooling layer to increase feature detection.
Flatten layer connects convolutional outputs to dense layers.
Dropout prevents overfitting.
Result: Achieves ~92% accuracy on Fashion MNIST.

# Advancements in CNN Architectures
1. LeNet-5 (1998)
Designed by Yann LeCun for digit recognition (MNIST).
Structure:
Convolutional and pooling layers followed by dense layers.
Uses tanh and RBF activation functions (modern networks use ReLU and softmax).

AlexNet (2012)
Achieved a 17% top-5 error rate in ILSVRC, outperforming the second-best (26%).
Innovations:
Stacked convolutional layers without pooling after every convolution.
Data augmentation: Random shifts, flips, and lighting changes.
Dropout: 50% dropout rate for regularization.
Local Response Normalization (LRN):
Inspired by biological neurons.
Encourages feature map specialization.


# implement a ResNet-34 architecture
from scratch using Keras and TensorFlow, as well as how to use pretrained models for image classification and transfer learning.
## Key Concepts
ResidualUnit Layer: A custom Keras layer that implements a residual block from the ResNet architecture, allowing for skip connections.
Building ResNet-34: A residual neural network with multiple residual units stacked together. The network is built using Keras' Sequential API.
Pretrained Models: The passage explains how to load and use pretrained models (like ResNet-50) from Keras’ tf.keras.applications package for image classification.
Transfer Learning: The technique of using a pretrained model on a new task, often with some layers frozen to retain the general features learned from large datasets like ImageNet, and adding custom layers for specific tasks.

# ResidualUnit Layer Implementation

In [10]:
import tensorflow as tf
from functools import partial

DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, strides=1,
                        padding="same", kernel_initializer="he_normal",
                        use_bias=False)

class ResidualUnit(tf.keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = tf.keras.activations.get(activation)
        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            tf.keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            tf.keras.layers.BatchNormalization()
        ]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                tf.keras.layers.BatchNormalization()
            ]

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z + skip_Z)


# Building ResNet-34 Using the ResidualUnit Layer

In [11]:
model = tf.keras.Sequential([
    DefaultConv2D(64, kernel_size=7, strides=2, input_shape=[224, 224, 3]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same"),
])

prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2
    model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters

model.add(tf.keras.layers.GlobalAvgPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(10, activation="softmax"))


# Using Pretrained Models

In [None]:
model = tf.keras.applications.ResNet50(weights="imagenet")


In [None]:
images_resized = tf.keras.layers.Resizing(height=224, width=224, crop_to_aspect_ratio=True)(images)
inputs = tf.keras.applications.resnet50.preprocess_input(images_resized)
Y_proba = model.predict(inputs)

## Transfer Learning:
If you want to use a pretrained model like Xception for a different task (e.g., classifying flowers), you can freeze the base model’s layers and add your own classifier on top.

In [None]:
base_model = tf.keras.applications.xception.Xception(weights="imagenet", include_top=False)
avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
output = tf.keras.layers.Dense(n_classes, activation="softmax")(avg)
model = tf.keras.Model(inputs=base_model.input, outputs=output)


# Freezing Layers:
Initially freeze the weights of the base model and train only the top layers

In [None]:
for layer in base_model.layers:
    layer.trainable = False


# Fine-tuning:
After initial training, you can unfreeze some of the base model’s top layers and continue training

In [None]:
for layer in base_model.layers[56:]:
    layer.trainable = True


# Compile and Train:
After freezing/unfreezing, compile the model and train it.

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=10)

object detection

In [14]:
import tensorflow as tf

# Define the number of classes for the classification task
n_classes = 10  # Example: Change this to the number of classes in your dataset

# Load pre-trained Xception model without the top layer
base_model = tf.keras.applications.Xception(weights="imagenet", include_top=False)

# Add global average pooling to the base model
avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)

# Add a dense layer for classification output (for n_classes)
class_output = tf.keras.layers.Dense(n_classes, activation="softmax")(avg)

# Add a dense layer for localization output (for bounding box predictions)
loc_output = tf.keras.layers.Dense(4)(avg)  # Predicts (center_x, center_y, width, height)

# Create the final model with both classification and localization outputs
model = tf.keras.Model(inputs=base_model.input, outputs=[class_output, loc_output])

# Compile the model
model.compile(
    loss=["sparse_categorical_crossentropy", "mse"],  # Classification loss and MSE for localization
    loss_weights=[0.8, 0.2],  # Adjust based on what you care about more (classification vs. localization)
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"]
)

# Summary of the model
model.summary()


## Preparing the Data
Since you are working with the flowers dataset, which doesn't include bounding boxes, you will need to add these annotations manually or through an annotation tool. Once you have the bounding boxes, you will need to format your dataset to return both the class labels and the bounding boxes as a tuple.

In [15]:
import numpy as np

# Example of a batch of images with corresponding class labels and bounding boxes
def prepare_data(images, class_labels, bounding_boxes):
    # Ensure class_labels are one-hot encoded if necessary
    # Ensure bounding_boxes are in the form [x_center, y_center, width, height]

    # Return a tuple of image batches, and a tuple of (class_labels, bounding_boxes)
    return np.array(images), (np.array(class_labels), np.array(bounding_boxes))

# Example data (use actual images and annotations)
images = []  # List of images
class_labels = []  # List of class labels
bounding_boxes = []  # List of bounding boxes

# Prepare the data batch
images, (class_labels, bounding_boxes) = prepare_data(images, class_labels, bounding_boxes)


Non-Maximum Suppression (NMS) Example

In [16]:
import tensorflow as tf

def non_maximum_suppression(boxes, scores, threshold=0.5):
    # Apply NMS to remove duplicate boxes
    return tf.image.non_max_suppression(
        boxes,
        scores,
        max_output_size=50,  # Max number of boxes to keep
        iou_threshold=threshold  # Threshold for IoU
    )
