In [1]:
import numpy as np

# Convolution function
def convolve2d(image, kernel, stride, padding):
    # Add zero padding to the input image
    image_padded = np.pad(image, [(padding, padding), (padding, padding), (0, 0)], mode='constant', constant_values=0)
    
    kernel_height, kernel_width = kernel.shape[:2]
    padded_height, padded_width = image_padded.shape[:2]

    # Determine the output dimensions
    output_height = (padded_height - kernel_height) // stride + 1
    output_width = (padded_width - kernel_width) // stride + 1

    # Create an empty image to store the output
    new_image = np.zeros((output_height, output_width, image.shape[-1]))
    
    # Perform the convolution
    for x in range(0, padded_height - kernel_height + 1, stride):
        for y in range(0, padded_width - kernel_width + 1, stride):
            new_image[x // stride, y // stride] = np.sum(
                image_padded[x:x + kernel_height, y:y + kernel_width] * kernel, axis=(0, 1)
            )
    return new_image

# Pooling function
def pool2d(image, pool_size, stride, pooling_type='max'):
    # Determine the output dimensions
    output_height = (image.shape[0] - pool_size) // stride + 1
    output_width = (image.shape[1] - pool_size) // stride + 1

    # Create an empty image to store the output
    new_image = np.zeros((output_height, output_width, image.shape[-1]))

    # Perform the pooling
    for x in range(0, image.shape[0] - pool_size + 1, stride):
        for y in range(0, image.shape[1] - pool_size + 1, stride):
            if pooling_type == 'max':
                new_image[x // stride, y // stride] = np.max(
                    image[x:x + pool_size, y:y + pool_size], axis=(0, 1)
                )
            elif pooling_type == 'average':
                new_image[x // stride, y // stride] = np.mean(
                    image[x:x + pool_size, y:y + pool_size], axis=(0, 1)
                )
    return new_image

# ReLU Activation function
def relu(x):
    return np.maximum(0, x)

# Softmax function
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

# MLP implementation
def mlp(input_layer, weights, biases):
    return softmax(np.dot(weights, input_layer) + biases)

# Example input (random image data and random filter for convolution)
input_image = np.random.rand(32, 32, 3)
kernel = np.random.rand(3, 3, 3)
stride = 1
padding = 0

# Convolution 3x3
convoluted_image = convolve2d(input_image, kernel, stride, padding)

# Pooling
pooled_image = pool2d(convoluted_image, 2, 2, 'max')

# Flatten the image
flattened = pooled_image.flatten()

# MLP (assuming some random weights and biases)
weights = np.random.rand(10, flattened.size)  # 10 classes for example
biases = np.random.rand(10)

# Output from MLP
output = mlp(flattened, weights, biases)

output


array([5.50590447e-24, 3.44861937e-27, 4.65997539e-24, 8.75116818e-26,
       2.54694442e-35, 1.07614287e-37, 1.76913590e-15, 6.62121363e-20,
       2.57847755e-10, 1.00000000e+00])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import convolve2d
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Fetching the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
# Normalizing the data
scaler = MinMaxScaler()
X = scaler.fit_transform(mnist.data)
y = mnist.target.astype(np.uint8)

# Reshaping the data to have a channel dimension, here 1 channel because MNIST is grayscale
X_reshaped = X.reshape((-1, 28, 28, 1))

# Defining the convolution operation
def convolve2d_manual(input_data, kernel, stride=1, padding=0):
    """
    Perform a 2D convolution operation manually without using built-in convolve functions.
    
    Parameters:
    - input_data: Input data or image (2D array)
    - kernel: Convolution kernel (2D array)
    - stride: Stride of the convolution operation
    - padding: Zero-padding added to the input
    
    Returns:
    - output: The result of the convolution operation
    """
    # Adding zero padding to the input data
    if padding > 0:
        input_data = np.pad(input_data, [(padding, padding), (padding, padding)], mode='constant', constant_values=0)
    
    # Calculating the dimensions of the output
    output_height = ((input_data.shape[0] - kernel.shape[0]) // stride) + 1
    output_width = ((input_data.shape[1] - kernel.shape[1]) // stride) + 1
    output = np.zeros((output_height, output_width))
    
    # Performing the convolution operation
    for y in range(0, output_height):
        for x in range(0, output_width):
            output[y, x] = np.sum(input_data[y*stride:y*stride+kernel.shape[0], x*stride:x*stride+kernel.shape[1]] * kernel)
    return output

# Defining pooling operation
def pooling_manual(input_data, size=2, stride=2, mode='max'):
    """
    Perform a pooling operation manually without using built-in pool functions.
    
    Parameters:
    - input_data: Input data or feature map (2D array)
    - size: The size of the window to take a max or average over
    - stride: The stride of the pooling operation
    - mode: The pooling mode - 'max' for max pooling or 'avg' for average pooling
    
    Returns:
    - output: The result of the pooling operation
    """
    # Calculating the dimensions of the output
    output_height = ((input_data.shape[0] - size) // stride) + 1
    output_width = ((input_data.shape[1] - size) // stride) + 1
    output = np.zeros((output_height, output_width))
    
    # Performing the pooling operation
    for y in range(0, output_height):
        for x in range(0, output_width):
            window = input_data[y*stride:y*stride+size, x*stride:x*stride+size]
            if mode == 'max':
                output[y, x] = np.max(window)
            elif mode == 'avg':
                output[y, x] = np.mean(window)
    return output

# Creating a simple 3x3 convolution kernel for demonstration
kernel = np.array([[1, 0, -1],
                   [1, 0, -1],
                   [1, 0, -1]])

# Convolving the first image in the dataset
convolved_image = convolve2d_manual(X_reshaped[0, :, :, 0], kernel)

# Pooling the convolved image
pooled_image = pooling_manual(convolved_image, mode='max')

# Displaying the original, convolved, and pooled images
fig, ax = plt.subplots(1, 3, figsize=(12, 5))
ax[0].imshow(X_reshaped[0, :, :, 0], cmap='gray')
ax[0].set_title('Original Image')
ax[0].axis('off')

ax[1].imshow(convolved_image, cmap='gray')
ax[1].set_title('Convolved Image')
ax[1].axis('off')

ax[2].imshow(pooled_image, cmap='gray')
ax[2].set_title('Pooled Image')
ax[2].axis('off')

plt.show()

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a Multi
# Fetching the MNIST dataset again
mnist = fetch_openml('mnist_784', version=1)

# Normalizing the data again
scaler = MinMaxScaler()
X = scaler.fit_transform(mnist.data)
y = mnist.target.astype(np.uint8)

# Splitting the dataset into training and test sets again
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Defining the MLP classifier again
mlp = MLPClassifier(hidden_layer_sizes=(64,), max_iter=10, alpha=1e-4,
                    solver='sgd', verbose=10, random_state=1,
                    learning_rate_init=.1)

# Training the MLP classifier on the training set
mlp.fit(X_train, y_train)

# Evaluating the classifier on the test set
test_score = mlp.score(X_test, y_test)

# Print the test score
print(f"The test score of the MLP classifier is: {test_score:.4f}")

  warn(


In [1]:
import numpy as np

# Assuming the image preprocessing involves normalization and maybe some distortion, 
# we will just simulate this with a function that 'processes' images by a simple normalization for now.
def preprocess_images(images):
    # Normalize image data to 0-1
    images_normalized = images / 255.0
    return images_normalized

# A simple DNN structure for simulation purposes
class SimpleDNN:
    def __init__(self, input_shape, num_classes):
        # Simulate a simple neural network with random weights
        self.weights = np.random.rand(input_shape, num_classes)
    
    def predict(self, image):
        # Perform a simple matrix multiplication and softmax to simulate prediction
        logits = np.dot(image, self.weights)
        return np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)

# Function to create and train a Multi-Column DNN
def train_mcdnn(num_columns, input_shape, num_classes, images, labels):
    # Create a list of DNNs
    dnns = [SimpleDNN(input_shape, num_classes) for _ in range(num_columns)]
    
    # Simulate training and averaging predictions
    predictions = np.zeros((len(images), num_classes))
    for dnn in dnns:
        # There is no actual training going on; we simulate this by just predicting with untrained networks
        preds = dnn.predict(images)
        predictions += preds
        
    # Average the predictions from each column
    predictions /= num_columns
    return predictions

# Example usage
num_classes = 10  # Just as an example, say we have 10 classes
num_columns = 3   # Number of columns in the MCDNN
input_shape = 784 # Example input shape (28x28 images flattened)

# Simulate some data (e.g., MNIST)
np.random.seed(0) # For reproducibility
images = np.random.rand(100, input_shape) # 100 random images
labels = np.random.randint(0, num_classes, 100) # 100 random labels

# Preprocess the images
processed_images = preprocess_images(images)

# Train the MCDNN
predictions = train_mcdnn(num_columns, input_shape, num_classes, processed_images, labels)

# The predictions are now in `predictions` and we would typically calculate accuracy or other metrics here
# However, since we have simulated data and a non-trained model, the accuracy would not be meaningful
predictions[:5]  # Show the first 5 predictions for brevity


array([[0.09771437, 0.09976008, 0.09895422, 0.10102684, 0.10103402,
        0.10196753, 0.09813206, 0.1000662 , 0.10096947, 0.10037521],
       [0.09915239, 0.10057207, 0.10038062, 0.10033083, 0.1011093 ,
        0.10141746, 0.09710186, 0.0987585 , 0.10122078, 0.0999562 ],
       [0.09928585, 0.09839743, 0.10086476, 0.1014137 , 0.1012655 ,
        0.10100233, 0.09746659, 0.09891252, 0.10050226, 0.10088906],
       [0.09964296, 0.09998277, 0.09989744, 0.10047433, 0.10058792,
        0.10179031, 0.09823333, 0.09869494, 0.10066029, 0.10003572],
       [0.0986354 , 0.09988082, 0.10045291, 0.10099037, 0.10155249,
        0.10093839, 0.09781693, 0.09908382, 0.1006316 , 0.10001726]])

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Load and preprocess MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

class CNNColumn(nn.Module):
    def __init__(self):
        super(CNNColumn, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)  # Adding padding to maintain the size
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  # Define conv2 with appropriate in/out channels
        # Adjust the linear layer to match the output of the conv/pool layers
        self.fc = nn.Linear(7 * 7 * 64, 10)  # Assuming the output is 7x7 after pooling

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Adjust the view to match the fc layer input
        x = x.view(-1, 7 * 7 * 64)  # Match the dimension with the fc layer
        x = self.fc(x)
        return x

class MCDNN(nn.Module):
    def __init__(self, num_columns):
        super(MCDNN, self).__init__()
        self.columns = nn.ModuleList([CNNColumn() for _ in range(num_columns)])

    def forward(self, x):
        column_outputs = [column(x) for column in self.columns]
        averaged_output = torch.mean(torch.stack(column_outputs), dim=0)
        return averaged_output

# Instantiation, Training, Evaluation (Example Structure)
num_columns = 5
model = MCDNN(num_columns)
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# Training and Evaluation
num_epochs = 10  # Adjust as needed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if possible

model.to(device)  # Move the model to the device

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()  # Clear gradients
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

    # Evaluation
    model.eval()  # Set model to evaluation mode
    test_loss, test_acc = 0, 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            test_loss += loss_fn(outputs, labels).item()
            test_acc += (outputs.argmax(dim=1) == labels).float().sum().item()

    test_loss /= len(testloader)
    test_acc /= len(testloader.dataset)
    print(f"Epoch {epoch+1}, Test Loss: {test_loss:.3f}, Test Accuracy: {test_acc:.3f}")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:37<00:00, 267686.10it/s] 


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 42758804.74it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:13<00:00, 124436.50it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 1217208.41it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



RuntimeError: shape '[-1, 4608]' is invalid for input of size 346112

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define the CNN architecture (similar to AlexNet)
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):  # CIFAR-10 has 10 classes
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# CIFAR-10 loading and normalization
transform = transforms.Compose([
    transforms.Resize(224),  # Resize images to size used by AlexNet
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Initialize the network
net = AlexNet(num_classes=10)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)

# Training loop
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

# Save the trained model
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define a simple neural network with dropout
class SimpleNetWithDropout(nn.Module):
    def __init__(self):
        super(SimpleNetWithDropout, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(32 * 32 * 3, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5)  # 50% probability of dropping out each neuron
        )
        self.layer2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p=0.5)  # 50% probability of dropping out each neuron
        )
        self.layer3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the image
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

# Transform and load the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Initialize the network
net = SimpleNetWithDropout()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)

# Training loop
for epoch in range(10):  # Loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradient buffers

        outputs = net(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        running_loss += loss.item()
        if i % 200 == 199:  # Print every 200 mini-batches
            print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 200}')
            running_loss = 0.0

print('Finished Training')

# Save the trained model
PATH = './cifar_net_with_dropout.pth'
torch.save(net.state_dict(), PATH)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms

# Define the NetworkInNetwork model
class NIN(nn.Module):
    def __init__(self, num_classes=10):
        super(NIN, self).__init__()
        self.conv1 = nn.Conv2d(1, 192, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(192, 160, kernel_size=1)
        self.conv3 = nn.Conv2d(160, 96, kernel_size=1)
        self.pool = nn.AvgPool2d(kernel_size=3, stride=2)
        self.fc1 = nn.Linear(96 * 7 * 7, 384)
        self.fc2 = nn.Linear(384, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)

# Create train and test dataloaders
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NIN().to(device)

for epoch in range(num_epochs):
    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Test accuracy
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()

    print(f'Epoch {epoch+1}, Accuracy: {100*correct/total:.2f}%')

# Save the trained model
torch.save(model.state_dict(), 'nin_mnist.pth')

print('Model saved successfully!')


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define the MLP convolutional block
class MlpConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(MlpConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x

# Define the Network In Network model
class NIN(nn.Module):
    def __init__(self, num_classes=10):
        super(NIN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            MlpConv(192, 160),
            MlpConv(160, 96),
            nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
            nn.Dropout(p=0.5),
            
            nn.Conv2d(96, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            MlpConv(192, 192),
            MlpConv(192, 192),
            nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
            nn.Dropout(p=0.5),
            
            nn.Conv2d(192, 192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            MlpConv(192, 192),
            MlpConv(192, num_classes),
        )
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))

    def forward(self, x):
        x = self.features(x)
        x = self.global_avg_pooling(x)
        x = x.view(x.size(0), -1) # Flatten the tensor
        return x

# Load and normalize the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

# Instantiate the NIN model
net = NIN(num_classes=10)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)

# Training the model
for epoch in range(30):  # Loop over the dataset
        running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()

        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:    # Print every 100 mini-batches
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100}')
            running_loss = 0.0

# Testing the model
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

# Save the model checkpoint
torch.save(net.state_dict(), 'nin_model.pth')


# Very Deep Convolutional Networks for Large-Scale Image Recognition

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, MaxPooling2D, BatchNormalization, ReLU, Softmax
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar10

# Load dataset
(input_train, target_train), (input_test, target_test) = cifar10.load_data()

# Normalize data
input_train = input_train.astype('float32') / 255.0
input_test = input_test.astype('float32') / 255.0

# Convert class vectors to binary class matrices
target_train = to_categorical(target_train, 10)
target_test = to_categorical(target_test, 10)

# Define the model
def build_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    # First Conv Block
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Second Conv Block
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Third Conv Block
    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Fourth Conv Block
    x = Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Fifth Conv Block
    x = Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Fully connected layers
    x = Flatten()(x)
    x = Dense(4096, activation='relu')(x)
    x = Dense(4096, activation='relu')(x)
    x = Dense(num_classes, activation='softmax')(x)

    # Create the model
    model = Model(inputs=inputs, outputs=x)
    return model

# Build the model
model = build_model((32, 32, 3), 10)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()


# Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy

# Load and preprocess CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Define custom BatchNormalization layer (assuming image reflects element-wise BN)
class EltwiseBatchNormalization(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(EltwiseBatchNormalization, self).__init__(**kwargs)
        self.gamma = tf.Variable(tf.ones_initializer()(shape=(32,)))
        self.beta = tf.Variable(tf.zeros_initializer()(shape=(32,)))

    def call(self, inputs):
        mean, var = tf.nn.moments(inputs, -1, keepdims=True)
        std = tf.math.sqrt(var + 1e-7)
        normalized = (inputs - mean) / std * self.gamma + self.beta
        return normalized

# Create deep neural network architecture
inputs = Input(shape=(32, 32, 3))
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
x = EltwiseBatchNormalization()(x)  # Use custom BN layer
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(x)
x = EltwiseBatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)

x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = EltwiseBatchNormalization()(x)
x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = EltwiseBatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)

x = Flatten()(x)
x = Dense(128, activation='relu')(x)
outputs = Dense(10, activation='softmax')(x)

# Create and train the model
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_test, y_test))
model.save('batchnorm_dnn.h5')

print('Model trained and saved successfully!')


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define a neural network with batch normalization
class BatchNormNet(nn.Module):
    def __init__(self):
        super(BatchNormNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.bn1 = nn.BatchNorm2d(6)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.bn2 = nn.BatchNorm2d(16)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.bn3 = nn.BatchNorm1d(120)
        self.fc2 = nn.Linear(120, 84)
        self.bn4 = nn.BatchNorm1d(84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.bn1(self.conv1(x))
        x = F.relu(F.max_pool2d(x, 2))
        x = self.bn2(self.conv2(x))
        x = F.relu(F.max_pool2d(x, 2))
        x = x.view(-1, self.num_flat_features(x))
        x = self.bn3(self.fc1(x))
        x = F.relu(x)
        x = self.bn4(self.fc2(x))
        x = F.relu(x)
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

# Transform and load the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Instantiate the network
net = BatchNormNet()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)

# Training loop
for epoch in range(10):
    net.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100}')
            running_loss = 0.0

# Testing loop
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


# Delving Deep into Recti ers: Surpassing Human-Level Performance on ImageNet Classication

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define a custom Parametric ReLU activation
class ParametricReLU(nn.Module):
    def __init__(self, num_parameters):
        super(ParametricReLU, self).__init__()
        self.num_parameters = num_parameters
        self.alpha = nn.Parameter(torch.zeros(num_parameters))

    def forward(self, x):
        return F.relu(x) - self.alpha * F.relu(-x)

# Define a neural network with parametric ReLUs
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.prelu1 = ParametricReLU(6)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.prelu2 = ParametricReLU(16)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.prelu3 = ParametricReLU(120)
        self.fc2 = nn.Linear(120, 84)
        self.prelu4 = ParametricReLU(84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.prelu1(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = self.prelu2(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, self.num_flat_features(x))
        x = self.prelu3(self.fc1(x))
        x = self.prelu4(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # All dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

# Initialize weights function based on the paper's specifications
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.constant_(m.bias, 0)

# CIFAR-10 dataset transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load and normalize CIFAR-10 training set
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

# Load and normalize CIFAR-10 test set
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

# Network, loss function, and optimizer
net = Net()
net.apply(weights_init)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Train the network
for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[Epoch: {epoch + 1}, Batch: {i + 1}] loss: {running_loss / 2000}')
            running_loss = 0.0

# Test the network on the test data
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total}%')


# Rethinking the Inception Architecture for Computer Vision

In [None]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model


def create_inceptionv3_model(num_classes):
  # Load the InceptionV3 model without the top layer.
  base_model = InceptionV3(weights="imagenet", include_top=False)

  # Add a global average pooling layer.
  x = base_model.output
  x = GlobalAveragePooling2D()(x)

  # Add a dropout layer.
  x = Dropout(0.5)(x)

  # Add a dense layer with the specified number of classes.
  x = Dense(num_classes, activation="softmax")(x)

  # Create the final model.
  model = Model(inputs=base_model.input, outputs=x)

  # Freeze the base layers of the model.
  for layer in base_model.layers:
    layer.trainable = False

  return model

# Compile the model.
model = create_inceptionv3_model(1000)
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Print a summary of the model.
model.summary()


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Concatenate, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.applications.inception_v3 import preprocess_input

# Define the inception module
def inception_module(x,
                     filters_1x1,
                     filters_3x3_reduce,
                     filters_3x3,
                     filters_5x5_reduce,
                     filters_5x5,
                     filters_pool_proj):
    path1 = Conv2D(filters_1x1, (1, 1), padding='same', activation='relu')(x)

    path2 = Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu')(x)
    path2 = Conv2D(filters_3x3, (3, 3), padding='same', activation='relu')(path2)

    path3 = Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu')(x)
    path3 = Conv2D(filters_5x5, (5, 5), padding='same', activation='relu')(path3)

    path4 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    path4 = Conv2D(filters_pool_proj, (1, 1), padding='same', activation='relu')(path4)

    return Concatenate(axis=-1)([path1, path2, path3, path4])

# Build the model
input_layer = Input(shape=(299, 299, 3))

x = Conv2D(32, (3, 3), strides=(2, 2), padding='valid', activation='relu')(input_layer)
x = Conv2D(32, (3, 3), padding='valid', activation='relu')(x)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)

x = MaxPooling2D((3, 3), strides=(2, 2))(x)

x = Conv2D(80, (1, 1), padding='valid', activation='relu')(x)
x = Conv2D(192, (3, 3), padding='valid', activation='relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)

# Inception modules
x = inception_module(x, 64, 96, 128, 16, 32, 32)
x = inception_module(x, 128, 128, 192, 32, 96, 64)

x = MaxPooling2D((3, 3), strides=(2, 2))(x)

x = inception_module(x, 192, 96, 208, 16, 48, 64)
x = inception_module(x, 160, 112, 224, 24, 64, 64)
x = inception_module(x, 128, 128, 256, 24, 64, 64)
x = inception_module(x, 112, 144, 288, 32, 64, 64)
x = inception_module(x, 256, 160, 320, 32, 128, 128)

x = MaxPooling2D((3, 3), strides=(2, 2))(x)

x = inception_module(x, 256, 160, 320, 32, 128, 128)
x = inception_module(x, 384, 192, 384, 48, 128, 128)

# Global Average Pooling
x = GlobalAveragePooling2D()(x)

# Dense layer
x = Dense(2048, activation='relu')(x)

# Classifier
output = Dense(1000, activation='softmax')(x)

model = Model(input_layer, output)

# Compile the model
model.compile(optimizer=SGD(lr=0.01, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# Load CIFAR-10 data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Resize images to match the input size of the model
x_train = tf.image.resize(x_train, (299, 299))
x_test = tf.image.resize(x_test, (299, 299))

# Train the model
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

# Evaluate the model
model.evaluate(x_test, y_test)



# Deep Residual Learning for Image Recognition

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms


class BottleneckBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BottleneckBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * 4)
        self.downsample = downsample

    def forward(self, x):
        residual = x

        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = F.relu(out)

        return out


class ResNet(nn.Module):
    def __init__(self, block, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, 64)
        self.layer2 = self._make_layer(block, 128, 128, stride=2)
        self.layer3 = self._make_layer(block, 256, 256, stride=2)
        self.layer4 = self._make_layer(block, 512, 512, stride=2)

        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * 4, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * 4:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * 4, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * 4))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        for i in range(1, num_blocks):
            layers.append(block(out_channels * 4, out_channels))
        self.in_channels = out_channels * 4
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)

        return out


if __name__ == "__main__":
    model = ResNet(BottleneckBlock)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Define the Residual Block
class ResidualBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != self.expansion*out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion*out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

# Define the ResNet
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Instantiate the model
def ResNet18():
    return ResNet(ResidualBlock, [2, 2, 2, 2])

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)

# Initialize the model and loss function
model = ResNet18()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print loss
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


# Identity Mappings in Deep Residual Networks

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x

        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = F.relu(out)

        return out


class ResNet(nn.Module):
    def __init__(self, block, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, 2)  # Change number of blocks to 2 as per the image
        self.layer2 = self._make_layer(block, 128, 2, stride=2)
        self.layer3 = self._make_layer(block, 256, 2, stride=2)
        self.layer4 = self._make_layer(block, 512, 2, stride=2)

        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * 4, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * 4:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * 4, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * 4))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        for i in range(1, num_blocks):
            layers.append(block(out_channels * 4, out_channels))
        self.in_channels = out_channels * 4
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)

        return out


if __name__ == "__main__":
    model = ResNet(ResidualBlock)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainset = datasets.CIFAR10(root="~/torch_data", train=True, download=


In [None]:
```python
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, Add, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

# Define the identity block
def identity_block(X, f, filters):
    F1, F2, F3 = filters
    
    X_shortcut = X

    # First component
    X = Conv2D(filters=F1, kernel_size=1, strides=(1,1), padding='valid')(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Second component
    X = Conv2D(filters=F2, kernel_size=f, strides=(1,1), padding='same')(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Third component
    X = Conv2D(filters=F3, kernel_size=1, strides=(1,1), padding='valid')(X)
    X = BatchNormalization(axis=3)(X)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    
    return X

# Define the convolutional block
def convolutional_block(X, f, filters, s=2):
    F1, F2, F3 = filters
    
    X_shortcut = X

    # First component
    X = Conv2D(F1, (1, 1), strides=(s,s))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Second component
    X = Conv2D(F2, (f, f), strides=(1,1), padding='same')(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Third component
    X = Conv2D(F3, (1, 1), strides=(1,1), padding='valid')(X)
    X = BatchNormalization(axis=3)(X)

    # Shortcut Path
    X_shortcut = Conv2D(F3, (1, 1), strides=(s,s), padding='valid')(X_shortcut)
    X_shortcut = BatchNormalization(axis=3)(X_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    
    return X

# Define the ResNet model
def ResNet50(input_shape=(32, 32, 3), classes=10):
    X_input = Input(input_shape)

    X = Conv2D(64, (7, 7), strides=(2, 2))(X_input)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    X = convolutional_block(X, f=3, filters=[64, 64, 256], s=1)
    X = identity_block(X, 3, [64, 64, 256])
    X = identity_block(X, 3, [64, 64, 256])

    X = convolutional_block(X, f=3, filters=[128, 128, 512], s=2)
    X = identity_block(X, 3, [128, 128, 512])
    X = identity_block(X, 3, [128, 128, 512])
    X = identity_block(X, 3, [128, 128, 512])

    X = convolutional_block(X, f=3, filters=[256, 256, 1024], s=2)
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])

    X = convolutional_block(X, f=3, filters=[512, 512, 2048], s=2)
    X = identity_block(X, 3, [512, 512, 2048])
    X = identity_block(X, 3, [512, 512, 2048])

    X = GlobalAveragePooling2D()(X)
    # Output layer
    X = Dense(classes, activation='softmax')(X)
    
    # Create model
    model = Model(inputs=X_input, outputs=X, name='ResNet50')

    return model

# CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalize image vectors
x_train /= 255
x_test /= 255

# Convert training and test labels to one hot matrices
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Instantiate the model
model = ResNet50(input_shape=(32, 32, 3), classes=10)

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=10, batch_size=32)

# Evaluate the model
preds = model.evaluate(x_test, y_test)
print('Loss = ' + str(preds[0]))
print('Test Accuracy = ' + str(preds[1]))


# Deep Networks with Stochastic Depth

In [None]:
import tensorflow as tf
from tensorflow.keras import layers


class StochasticDepth(layers.Wrapper):
    def __init__(self, layer, p, **kwargs):
        super(StochasticDepth, self).__init__(layer, **kwargs)
        self.p = p

    def call(self, inputs, training=None):
        if training:
            bernoulli = tf.random.uniform(tf.shape(inputs), 0, 1)
            mask = tf.cast(bernoulli >= self.p, tf.float32)
            return self.layer(inputs * mask)
        else:
            return self.layer(inputs)


def create_resnet_block(filters, stride=1, p=0.5):
    conv1 = layers.Conv2D(filters, kernel_size=3, strides=stride, padding="same")
    bn1 = layers.BatchNormalization()
    relu1 = layers.ReLU()
    conv2 = layers.Conv2D(filters, kernel_size=3, strides=1, padding="same")
    bn2 = layers.BatchNormalization()
    shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride, padding="same")
    bn3 = layers.BatchNormalization()

    residual = StochasticDepth(relu1(bn1(conv1(inputs))))(inputs)
    residual = layers.add([residual, StochasticDepth(conv2(bn2(relu1(residual))))(residual)])
    output = StochasticDepth(layers.add([shortcut(bn3(inputs)), residual]))(inputs)
    return output


def create_resnet_model(num_classes, p=0.5):
    inputs = layers.Input(shape=(32, 32, 3))
    conv1 = layers.Conv2D(64, kernel_size=7, strides=2, padding="same")
    bn1 = layers.BatchNormalization()
    relu1 = layers.ReLU()
    maxpool1 = layers.MaxPooling2D(pool_size=(3, 3), strides=2, padding="same")

    block1 = create_resnet_block(64)
    block2 = create_resnet_block(64, stride=2)
    block3 = create_resnet_block(128, stride=2)
    block4 = create_resnet_block(256, stride=2)

    avgpool = layers.AvgPool2D()
    fc = layers.Dense(num_classes, activation="softmax")

    outputs = fc(avgpool(block4(block3(block2(block1(relu1(bn1(conv1(inputs))))))))))
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model


model = create_resnet_model(100, p=0.2)
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10

# Define the Stochastic Depth Block
class StochasticDepthBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride, p):
        super(StochasticDepthBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        
        self.p = p
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride

    def forward(self, x):
        if self.training:
            if torch.rand(1).item() < self.p:
                return self.shortcut(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

# Define the ResNet with Stochastic Depth
class ResNetStochastic(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNetStochastic, self).__init__()
        self.in_channels = 16
        self.conv = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1, p=1.0)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2, p=0.8)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2, p=0.6)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride, p):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride, p))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Model architecture
def ResNet110():
    return ResNetStochastic(StochasticDepthBlock, [18, 18, 18])

# Load CIFAR-10 dataset
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=100, shuffle=False, num_workers=2)

# Initialize the model
model = ResNet110().cuda()

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD()
model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Learning rate adjustment
def adjust_learning_rate(optimizer, epoch):
    lr = 0.1
    if epoch >= 81:
        lr /= 10
    if epoch >= 122:
        lr /= 10
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# Training the model
def train(epoch):
    print(f'\nEpoch: {epoch}')
    model.train()
    adjust_learning_rate(optimizer, epoch)
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        print(f'Loss: {train_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f}% ({correct}/{total})')

# Testing the model
def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            print(f'Loss: {test_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f}% ({correct}/{total})')

for epoch in range(0, 164):
    train(epoch)
    test(epoch)


# Wide Residual Networks

In [None]:
import tensorflow as tf
from tensorflow.keras import layers


class WideResBlock(layers.Layer):
    def __init__(self, in_channels, out_channels, stride=1, k=1, **kwargs):
        super(WideResBlock, self).__init__(**kwargs)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride
        self.k = k

        self.conv1 = layers.Conv2D(in_channels * k, kernel_size=3, strides=stride, padding="same")
        self.bn1 = layers.BatchNormalization()
        self.relu1 = layers.ReLU()

        self.conv2 = layers.Conv2D(out_channels, kernel_size=3, strides=1, padding="same")
        self.bn2 = layers.BatchNormalization()

        if in_channels != out_channels or stride != 1:
            self.shortcut = layers.Conv2D(out_channels, kernel_size=1, strides=stride, padding="same")
            self.bn3 = layers.BatchNormalization()
        else:
            self.shortcut = layers.Lambda(lambda x: x)
            self.bn3 = None

    def call(self, inputs, training=None):
        out = self.conv1(inputs)
        out = self.bn1(out)
        out = self.relu1(out)

        out = self.conv2(out)
        out = self.bn2(out)

        shortcut = self.shortcut(inputs)
        if self.bn3 is not None:
            shortcut = self.bn3(shortcut)

        out += shortcut
        return out


def create_wrn_model(num_classes, depth=40, k=10, width=4):
    inputs = layers.Input(shape=(32, 32, 3))
    conv1 = layers.Conv2D(16, kernel_size=3, strides=1, padding="same")
    bn1 = layers.BatchNormalization()
    relu1 = layers.ReLU()

    # First Residual Block with 16 output channels
    out = WideResBlock(16, 16, k=k)(relu1(bn1(conv1(inputs))))

    # Residual Blocks based on the image
    for i in range(depth):
        out = WideResBlock(16 * width**i, 16 * width**(i+1), stride=2 if (i // 6) % 2 == 1 else 1, k=k)(out)

    avgpool = layers.AvgPool2D()
    fc = layers.Dense(num_classes, activation="softmax")(avgpool(out))

    model = tf.keras.Model(inputs=inputs, outputs=fc)
    return model


model = create_wrn_model(100, depth=40, k=10, width=4)  # As per the image
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# Define the Wide Residual Block
class WideResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride, dropRate=0.0):
        super(WideResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.dropout = nn.Dropout(p=dropRate)
        self.equalInOut = (in_channels == out_channels)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False) or None

    def forward(self, x):
        if not self.equalInOut:
            x = self.relu(self.bn1(x))
        else:
            out = self.relu(self.bn1(x))
        out = self.relu(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.dropout is not None:
            out = self.dropout(out)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

# Define the Wide ResNet
class WideResNet(nn.Module):
    def __init__(self, depth, widen_factor, dropRate, num_classes):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert ((depth-4) % 6 == 0), 'Wide-resnet depth should be 6n+4'
        n = (depth-4) // 6
        block = WideResidualBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False)
        # 1st block
        self.block1 = self._wide_layer(block, nChannels[0], nChannels[1], n, stride=1, dropRate=dropRate)
        # 2nd block
        self.block2 = self._wide_layer(block, nChannels[1], nChannels[2], n, stride=2, dropRate=dropRate)
        # 3rd block
        self.block3 = self._wide_layer(block, nChannels[2], nChannels[3], n, stride=2, dropRate=dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

    def _wide_layer(self, block, in_channels, out_channels, num_blocks, stride, dropRate):
        layers = []
        for i in range(num_blocks):
            stride = stride if i == 0 else 1
            layers.append(block(in_channels, out_channels, stride, dropRate))
            in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = torch.nn.functional.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out)

# Example instantiation for WRN-28-10 with dropout
model = WideResNet(depth=28, widen_factor=10, dropRate=0.3, num_classes=10)

# Data loading
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,


# Aggregated Residual Transformations for Deep Neural Networks

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms


class BottleneckBlock(nn.Module):
    def __init__(self, in_channels, out_channels, cardinality, strides=1, dilation=1):
        super(BottleneckBlock, self).__init__()
        group_width = out_channels // cardinality

        self.conv1 = nn.Conv2d(in_channels, group_width, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(group_width)
        self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, padding=dilation, stride=strides, bias=False)
        self.bn2 = nn.BatchNorm2d(group_width)
        self.conv3 = nn.Conv2d(group_width, out_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if in_channels != out_channels or strides != 1:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0, stride=strides, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNeXt(nn.Module):
    def __init__(self, block, cardinality, base_width, num_classes=1000):
        super(ResNeXt, self).__init__()
        self.cardinality = cardinality
        self.base_width = base_width

        self.conv1 = nn.Conv2d(3, self.base_width, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.base_width)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, self.base_width, self.base_width, cardinality, 3)
        self.layer2 = self._make_layer(block, self.base_width * 2, self.base_width * 2, cardinality, 4, stride=2)
        self.layer3 = self._make_layer(block, self.base_width * 4, self.base_width * 4, cardinality, 6, stride=2)
        self.layer4 = self._make_layer(block, self.base_width * 8, self.base_width * 8, cardinality, 3, stride=2)

        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(self.base_width * 8, num_classes)

    def _make_layer(self, block, planes, in_planes, cardinality, num_blocks, stride=1):
        layers = []
        for i in range(num_blocks):
            if i == 0:
                layers.append(block(in_planes, planes, cardinality, stride))
            else:
                layers.append(block(planes, planes, cardinality))
            in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Define the Aggregated Residual Transformation Block
class AggregatedResidualTransformation(nn.Module):
    def __init__(self, in_channels, out_channels, stride, groups, reduction):
        super(AggregatedResidualTransformation, self).__init__()
        self.reduced_channels = out_channels // reduction
        self.conv1 = nn.Conv2d(in_channels, self.reduced_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.reduced_channels)
        self.conv2 = nn.Conv2d(self.reduced_channels, self.reduced_channels, kernel_size=3,
                               stride=stride, padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(self.reduced_channels)
        self.conv3 = nn.Conv2d(self.reduced_channels, out_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

# Define the ResNeXt Model
class ResNeXt(nn.Module):
    def __init__(self, block, num_blocks, cardinality, bottleneck_width, num_classes=10):
        super(ResNeXt, self).__init__()
        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.in_channels = 64
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 256, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 512, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 1024, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 2048, num_blocks[3], stride=2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(2048, num_classes)
        
    def _make_layer(self, block, out_channels, num_blocks, stride):
        layers = []
        for i in range(num_blocks):
            layers.append(block(self.in_channels, out_channels, stride if i == 0 else 1, self.cardinality, self.bottleneck_width))
            self.in_channels = out_channels
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Define ResNeXt model parameters
def ResNeXt50_2x32d():
    return ResNeXt(AggregatedResidualTransformation, [3, 4, 6, 3], cardinality=32, bottleneck_width=2)

# Prepare the CIFAR10 dataset
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size


# Densely Connected Convolutional Networks

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms


class DenseBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, num_convs):
        super(DenseBlock, self).__init__()
        layers = []
        for _ in range(num_convs):
            layers.append(nn.Conv2d(in_channels, growth_rate, kernel_size=3, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(growth_rate))
            layers.append(nn.ReLU(inplace=True))
            in_channels += growth_rate
        self.denseblock = nn.Sequential(*layers)

    def forward(self, x):
        out = torch.cat([x, self.denseblock(x)], 1)
        return out


class TransitionDown(nn.Sequential):
    def __init__(self, in_channels, out_channels):
        super(TransitionDown, self).__init__()
        self.add_module('conv', nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False))
        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))


class DenseNet(nn.Module):
    def __init__(self, growth_rate, block_config, num_classes=1000):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate

        num_convs = [3, 3, 3, 3]  # As per the image
        num_filters = 2 * growth_rate  # As per the image

        self.conv1 = nn.Conv2d(3, num_filters, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(num_filters)
        self.relu = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.denseblocks = nn.ModuleList()
        for i in range(len(num_convs)):
            num_in_filters = num_filters if i == 0 else num_filters + growth_rate * i
            num_out_filters = num_filters + growth_rate * (sum(num_convs[:i + 1]) - 1)
            self.denseblocks.append(DenseBlock(num_in_filters, growth_rate, num_convs[i]))
            if i != len(num_convs) - 1:
                self.denseblocks.append(TransitionDown(num_out_filters, num_out_filters // 2))
                num_filters = num_out_filters // 2

        self.fc = nn.Linear(num_filters, num_classes)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.pool1(out)

        for denseblock, transition in zip(self.denseblocks[:-2], self.denseblocks[2::2]):
            out = denseblock(out)
            out = transition(out)

        out = self.denseblocks[-1](out)
        out = F.avg_pool2d(out, kernel_size=4)
        out = torch. flatten(out, 1)
        out = self.fc(out)

        return out


model = DenseNet(growth_rate=12, block_config=[4, 4, 4, 3], num_classes=1000)  # As per the image
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

trainset = datasets.CIFAR10(root="~/torch_data", train=True, download=True, transform=transform)
trainloader = torch.utils.data.


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

# Define the Dense Layer
class DenseLayer(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super(DenseLayer, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.conv1 = nn.Conv2d(in_channels, 4 * growth_rate, kernel_size=1, stride=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
        self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat([x, out], 1)
        return out

# Define the Dense Block
class DenseBlock(nn.Module):
    def __init__(self, num_layers, in_channels, growth_rate):
        super(DenseBlock, self).__init__()
        layers = []
        for i in range(num_layers):
            layers.append(DenseLayer(in_channels + i * growth_rate, growth_rate))
        self.layer = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.layer(x)

# Define the Transition Layer
class TransitionLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TransitionLayer, self).__init__()
        self.bn = nn.BatchNorm2d(in_channels)
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        out = self.conv(F.relu(self.bn(x)))
        out = self.pool(out)
        return out

# Define the DenseNet
class DenseNet(nn.Module):
    def __init__(self, growth_rate, block_config, num_classes=10):
        super(DenseNet, self).__init__()
        num_blocks = len(block_config)
        num_channels = 2 * growth_rate
        
        self.conv1 = nn.Conv2d(3, num_channels, kernel_size=3, stride=1, padding=1, bias=False)
        
        blocks = []
        for i, num_layers in enumerate(block_config):
            blocks.append(DenseBlock(num_layers=num_layers, in_channels=num_channels, growth_rate=growth_rate))
            num_channels += num_layers * growth_rate
            if i != num_blocks - 1:
                blocks.append(TransitionLayer(in_channels=num_channels, out_channels=num_channels // 2))
                num_channels = num_channels // 2
        
        self.blocks = nn.Sequential(*blocks)
        self.bn = nn.BatchNorm2d(num_channels)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(num_channels, num_classes)

    def forward(self, x):
        out = self.conv1(x)
        out = self.blocks(out)
        out = self.bn(out)
        out = F.relu(out)
        out = self.pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Model architecture
def DenseNet121():
    return DenseNet(growth_rate=32, block_config=[6, 12, 24, 16])

# Dataset and DataLoader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

# Instantiate the model, loss function, and optimizer
model = DenseNet121().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training the model
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/10], Loss: {running_loss/len(train_loader)}')

# Testing the
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.cuda(), labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy of the model on the 10000 test images: {accuracy}%')


# Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning

In [None]:
import tensorflow as tf

model = tf.keras.applications.InceptionResNetV2(weights='imagenet')


In [None]:
import torchvision.models as models

model = models.inception_v3(pretrained=True)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms


class InceptionA(nn.Module):
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        self.branch1x1 = nn.Conv2d(in_channels, 64, kernel_size=1, bias=False)
        self.bn1x1 = nn.BatchNorm2d(64)

        self.branch3x3_reduce = nn.Conv2d(in_channels, 96, kernel_size=1, bias=False)
        self.bn3x3_reduce = nn.BatchNorm2d(96)
        self.branch3x3 = nn.Conv2d(96, 128, kernel_size=3, padding=1, bias=False)
        self.bn3x3 = nn.BatchNorm2d(128)

        self.branch1x3_reduce = nn.Conv2d(in_channels, 16, kernel_size=1, bias=False)
        self.bn1x3_reduce = nn.BatchNorm2d(16)
        self.branch1x3 = nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False)
        self.bn1x3 = nn.BatchNorm2d(32)

        self.branch2x2_reduce = nn.Conv2d(in_channels, 32, kernel_size=1, bias=False)
        self.bn2x2_reduce = nn.BatchNorm2d(32)
        self.branch2x2 = nn.Conv2d(32, 32, kernel_size=3, padding=1, bias=False)
        self.bn2x2 = nn.BatchNorm2d(32)

    def forward(self, x):
        branch1x1 = self.bn1x1(self.branch1x1(x))

        branch3x3 = self.bn3x3(self.branch3x3(self.bn3x3_reduce(self.branch3x3_reduce(x))))

        branch1x3 = self.bn1x3(self.branch1x3(self.bn1x3_reduce(self.branch1x3_reduce(x))))

        branch2x2 = self.bn2x2(self.branch2x2(self.bn2x2_reduce(self.branch2x2_reduce(x))))

        outputs = [branch1x1, branch3x3, branch1x3, branch2x2]
        return F.concat(outputs, 1)


class InceptionC(nn.Module):
    def __init__(self, in_channels):
        super(InceptionC, self).__init__()
        self.branch1x1 = nn.Conv2d(in_channels, 192, kernel_size=1, bias=False)
        self.bn1x1 = nn.BatchNorm2d(192)

        self.branch3x3_reduce = nn.Conv2d(in_channels, 256, kernel_size=1, bias=False)
        self.bn3x3_reduce = nn.BatchNorm2d(256)
        self.branch3x3_0 = nn.Conv2d(256, 384, kernel_size=1, bias=False)
        self.bn3x3_0 = nn.BatchNorm2d(384)
        self.branch3x3_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False)
        self.bn3x3_1 = nn.BatchNorm2d(256)

        self.branch1x3_reduce = nn.Conv2d(in_channels, 384, kernel_size=1, bias=False)
        self.bn1x3_reduce = nn.BatchNorm2d(384)
        self.branch1x3_0 = nn.Conv2d(384, 256, kernel_size=1, bias=False)
        self.branch1x3_1 = nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False) 
        self.bn1x3_1 = nn.BatchNorm2d(256)

        self.branchpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.branch3x3_pool = nn.Conv2d(in_channels, 256, kernel_size=1, bias=False)
        self.bn3x3_pool = nn.BatchNorm2d(256)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPooling2D, Concatenate

def inception_stem(input_tensor):
    # Conv and Max Pooling layers as per Inception-v4 and Inception-ResNet-v1 & v2 stem structure
    x = Conv2D(32, (3, 3), strides=(2, 2), padding='valid')(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(32, (3, 3), padding='valid')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(64, (3, 3))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = Conv2D(80, (1, 1), padding='valid')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(192, (3, 3), padding='valid')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    # Assuming 'input_tensor' is of shape (299, 299, 3)
    # You would typically return 'x' to feed into the rest of the network
    return x

# Define the model
input_tensor = tf.keras.Input(shape=(299, 299, 3))
output_tensor = inception_stem(input_tensor)

model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)

import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, AveragePooling2D, Concatenate

def inception_a(input_tensor):
    # Branch 1: 96 filters of 1x1 conv
    branch1 = Conv2D(96, (1, 1), padding='same', activation='relu')(input_tensor)
    
    # Branch 2: 64 filters of 1x1 conv, followed by 96 filters of 3x3 conv
    branch2 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_tensor)
    branch2 = Conv2D(96, (3, 3), padding='same', activation='relu')(branch2)
    
    # Branch 3: 64 filters of 1x1 conv, followed by 96 filters of 3x3 conv, followed by 96 filters of 3x3 conv
    branch3 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_tensor)
    branch3 = Conv2D(96, (3, 3), padding='same', activation='relu')(branch3)
    branch3 = Conv2D(96, (3, 3), padding='same', activation='relu')(branch3)
    
    # Branch 4: 3x3 average pooling, followed by 96 filters of 1x1 conv
    branch4 = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(input_tensor)
    branch4 = Conv2D(96, (1, 1), padding='same', activation='relu')(branch4)
    
    # Concatenating the branches
    x = Concatenate()([branch1, branch2, branch3, branch4])
    return x

# Example usage:
input_tensor = tf.keras.Input(shape=(299, 299, 3))
output_tensor = inception_a(input_tensor)
model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPooling2D, AveragePooling2D, concatenate

def reduction_a(input_tensor, k=192, l=224, m=256, n=384):
    # Implementation of Reduction-A block for Inception-v4 and Inception-ResNet-v1 & v2
    # As per the diagram you've provided, the Reduction-A block is composed of several branches
    # that are concatenated together

    # Branch 1: 3x3 Max Pooling
    branch_pool = MaxPooling2D((3, 3), strides=(2, 2), padding='valid')(input_tensor)

    # Branch 2: 3x3 Conv
    branch3x3 = Conv2D(n, (3, 3), strides=(2, 2), padding='valid')(input_tensor)
    branch3x3 = BatchNormalization()(branch3x3)
    branch3x3 = Activation('relu')(branch3x3)

    # Branch 3: 1x1 Conv -> 3x3 Conv -> 3x3 Conv
    branch3x3dbl = Conv2D(k, (1, 1))(input_tensor)
    branch3x3dbl = BatchNormalization()(branch3x3dbl)
    branch3x3dbl = Activation('relu')(branch3x3dbl)

    branch3x3dbl = Conv2D(l, (3, 3))(branch3x3dbl)
    branch3x3dbl = BatchNormalization()(branch3x3dbl)
    branch3x3dbl = Activation('relu')(branch3x3dbl)

    branch3x3dbl = Conv2D(m, (3, 3), strides=(2, 2), padding='valid')(branch3x3dbl)
    branch3x3dbl = BatchNormalization()(branch3x3dbl)
    branch3x3dbl = Activation('relu')(branch3x3dbl)

    # Concatenate all the branches
    x = concatenate([branch_pool, branch3x3, branch3x3dbl], axis=3)

    return x

# Usage example
input_tensor = tf.keras.Input(shape=(299, 299, 3))
output_tensor = reduction_a(input_tensor)
model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model

def inception_v4(input_shape):
    input_tensor = Input(shape=input_shape)

    # Build the stem block
    x = stem(input_tensor)

    # Build Inception-A blocks
    # x = inception_a(x)

    # Build Reduction-A block
    # x = reduction_a(x)

    # Build Inception-B blocks
    # x = inception_b(x)

    # Build Reduction-B block
    # x = reduction_b(x)

    # Build Inception-C blocks
    # x = inception_c(x)

    # Final pooling and prediction layers
    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = Dropout(0.8)(x)
    x = Dense(1000, activation='softmax')(x)

    # Create model
    model = Model(input_tensor, x, name='inception_v4')
    return model
