In [49]:
import numpy as np

def max_pooling(input_matrix, kernel_size, padding, stride):
    if padding == 'same':
        # Calculate the padding to achieve 'same' padding
        padding_height = max(0, ((input_matrix.shape[0] - 1) * stride + kernel_size[0] - input_matrix.shape[0]))
        padding_width = max(0, ((input_matrix.shape[1] - 1) * stride + kernel_size[1] - input_matrix.shape[1]))
        
        # Calculate padding for both sides
        padding_top = padding_height // 2
        padding_bottom = padding_height - padding_top
        padding_left = padding_width // 2
        padding_right = padding_width - padding_left
        
        # Add padding to the input matrix
        input_matrix = np.pad(input_matrix, pad_width=((padding_top, padding_bottom), (padding_left, padding_right)), mode='constant')
    
    # Get the dimensions of the input matrix and the kernel
    input_height, input_width = input_matrix.shape
    kernel_height, kernel_width = kernel_size
    
    # Calculate the output dimensions
    output_height = (input_height - kernel_height) // stride + 1
    output_width = (input_width - kernel_width) // stride + 1
    
    # Initialize an empty output matrix (single channel)
    output_matrix = np.zeros((output_height, output_width))
    
    # Perform max pooling
    for i in range(0, input_height - kernel_height + 1, stride):
        for j in range(0, input_width - kernel_width + 1, stride):
            window = input_matrix[i:i + kernel_height, j:j + kernel_width]
            max_value = np.max(window)
            output_matrix[i // stride, j // stride] = max_value
    
    return output_matrix

# Test the function with a single-channel input
input_matrix = np.array([[1, 2, 3, 4, 5],
                         [6, 7, 8, 9, 10],
                         [11, 12, 13, 14, 15],
                         [16, 17, 18, 19, 20],
                         [21, 22, 23, 24, 25]])

kernel_size = (3, 3)
padding = 'same'
stride = 1

result = max_pooling(input_matrix, kernel_size, padding, stride)
print(result)


[[ 7.  8.  9. 10. 10.]
 [12. 13. 14. 15. 15.]
 [17. 18. 19. 20. 20.]
 [22. 23. 24. 25. 25.]
 [22. 23. 24. 25. 25.]]


In [50]:
import numpy as np

def convolution_layer_with_padding(input_data, kernel, stride, activation='relu'):
    """
    Perform convolution and activation operations on multi-channel input data with a single-channel kernel
    while applying padding to maintain a specific output size.

    Args:
    input_data (numpy.ndarray): Input data with shape (height, width, num_input_channels).
    kernel (numpy.ndarray): Convolutional kernel with shape (kernel_height, kernel_width, num_input_channels).
    stride (int): Stride for the convolution operation.
    activation (str): Activation function ('relu' or None).

    Returns:
    numpy.ndarray: Output data after convolution and activation.
    """

    input_height, input_width, num_input_channels = input_data.shape
    kernel_height, kernel_width, _ = kernel.shape

    # Calculate the output dimensions without padding
    output_height = (input_height)
    output_width = (input_width) 

    # Calculate the amount of padding required to reach the desired output size
    padding_height = (output_height - 1) * stride + kernel_height - input_height
    padding_width = (output_width - 1) * stride + kernel_width - input_width

    # Calculate padding on each side (top, bottom, left, right)
    padding_top = padding_height // 2
    padding_bottom = padding_height - padding_top
    padding_left = padding_width // 2
    padding_right = padding_width - padding_left

    # Add the padding to the input
    padded_input_data = np.pad(input_data, ((padding_top, padding_bottom), (padding_left, padding_right), (0, 0)), mode='constant')

    # Initialize the output feature map
    output = np.zeros((output_height, output_width, 1))  # One output channel

    for y in range(output_height):
        for x in range(output_width):
            conv_result = 0
            for c_in in range(num_input_channels):
                input_region = padded_input_data[y * stride:y * stride + kernel_height, x * stride:x * stride + kernel_width, c_in]
                kernel_region = kernel[:, :, c_in]
                conv_result += np.sum(input_region * kernel_region)
                
            if activation == 'relu':
                conv_result = max(0, conv_result)
                
            output[y, x, 0] = conv_result

    return output

# # Example usage with padding to achieve 10x10x1 output:
# input_data = np.random.randint(0, 255, size=(5, 5, 3))

# # Single-channel kernel (3x3x3)
# kernel = np.array([[[1, 1, 1],
#                     [1, 1, 1],
#                     [1, 1, 1]],

#                    [[-1, -1, -1],
#                     [-1, -1, -1],
#                     [-1, -1, -1]],

#                    [[0, 0, 0],
#                     [0, 0, 0],
#                     [0, 0, 0]]])


# output = convolution_layer_with_padding(input_data, kernel, stride=1, activation='relu')
# # print(output.shape)  # Output shape will be (10, 10, 1)

# print("Input Shape:", input_data.shape)
# print("Output Shape:", output.shape)

# Example usage with padding to achieve 5x5x1 output:
input_data = np.random.randint(0, 255, size=(5, 5, 1))  # 5x5x1 input

# Single-channel kernel (3x3x1)
# kernel = np.array([[[1, -1, 0],
#                    [1, -1, 0],
#                    [1, -1, 0]]])  # 3x3x1 kernel

kernel = np.array([[[1]]])  # 1x1x1 kernel

output = convolution_layer_with_padding(input_data, kernel, stride=1, activation='relu')

print("Input Shape:", input_data.shape)
print("Output Shape:", output.shape)

Input Shape: (5, 5, 1)
Output Shape: (5, 5, 1)


In [51]:
def inception_module(input_layer, filter1x1, filter3x3, filter5x5, name):
    # 1x1 Convolution
    conv1x1 = convolution_layer_with_padding(input_layer, filter1x1, stride=1, activation='relu')

    # 3x3 Convolution
    conv3x3 = convolution_layer_with_padding(input_layer, filter3x3, stride=1, activation='relu')

    # 5x5 Convolution
    conv5x5 = convolution_layer_with_padding(input_layer, filter5x5, stride=1, activation='relu')

    # Max Pooling
    max_pool = max_pooling(input_layer, kernel_size=(3, 3), padding='same', stride=(1, 1))

    # Concatenate the outputs along the channel axis
    inception_output = np.concatenate([conv1x1, conv3x3, conv5x5, max_pool], axis=-1)

    return inception_output