In [3]:
def calculate_conv_output_size(input_size, filter_size, stride, padding):
    """
    Calculate the output size of a convolutional layer
    
    Parameters:
    input_size -- Input size (height, width, channel number)
    filter_size -- Filter size (height, width)
    stride -- Stride
    padding -- Padding
    
    Returns:
    Output size (height, width)
    """
    input_height, input_width, _ = input_size
    filter_height, filter_width = filter_size
    
    output_height = (input_height - filter_height + 2 * padding) // stride + 1
    output_width = (input_width - filter_width + 2 * padding) // stride + 1
    
    return output_height, output_width

# AlexNet parameters
input_size = (227, 227, 3)  # Input size
filter_size = (11, 11)      # Filter size
num_filters = 96            # Number of filters
stride = 4                  # Stride
padding = 0                 # Padding

# Calculate CONV1 output size
output_height, output_width = calculate_conv_output_size(input_size, filter_size, stride, padding)
output_depth = num_filters  # Output depth equals the number of filters

print("Input size: {}x{}x{}".format(*input_size))
print("CONV1 parameters: {} {}x{} filters, stride {}, padding {}".format(num_filters, *filter_size, stride, padding))
print("CONV1 output size: {}x{}x{}".format(output_height, output_width, output_depth))

Input size: 227x227x3
CONV1 parameters: 96 11x11 filters, stride 4, padding 0
CONV1 output size: 55x55x96


In [6]:
import numpy as np

def convolution_2d(feature_map, kernel, stride=1, padding=0):
    """
    Perform 2D convolution operation
    
    Parameters:
    feature_map -- Input feature map
    kernel -- Convolution kernel
    stride -- Stride
    padding -- Padding size
    
    Returns:
    Output feature map
    """
    # Get the dimensions of the feature map and kernel
    fm_height, fm_width = feature_map.shape
    k_height, k_width = kernel.shape
    
    # Add padding if necessary
    if padding > 0:
        padded_feature = np.pad(feature_map, ((padding, padding), (padding, padding)), 'constant', constant_values=0)
    else:
        padded_feature = feature_map.copy()
    
    # Calculate the dimensions of the output feature map
    output_height = (padded_feature.shape[0] - k_height) // stride + 1
    output_width = (padded_feature.shape[1] - k_width) // stride + 1
    
    # Initialize the output feature map
    output = np.zeros((output_height, output_width))
    
    # Perform the convolution operation
    for i in range(0, output_height):
        for j in range(0, output_width):
            # Extract the current window
            window = padded_feature[i*stride:i*stride+k_height, j*stride:j*stride+k_width]
            # Calculate the convolution result
            output[i, j] = np.sum(window * kernel)
    
    return output

def pooling_2d(feature_map, pool_size=2, stride=2, mode='max'):
    """
    Perform 2D pooling operation
    
    Parameters:
    feature_map -- Input feature map
    pool_size -- Pooling window size
    stride -- Stride
    mode -- Pooling mode ('max' or 'avg')
    
    Returns:
    Output feature map
    """
    # Get the dimensions of the feature map
    fm_height, fm_width = feature_map.shape
    
    # Calculate the dimensions of the output feature map
    output_height = (fm_height - pool_size) // stride + 1
    output_width = (fm_width - pool_size) // stride + 1
    
    # Initialize the output feature map
    output = np.zeros((output_height, output_width))
    
    # Perform the pooling operation
    for i in range(0, output_height):
        for j in range(0, output_width):
            # Extract the current window
            window = feature_map[i*stride:i*stride+pool_size, j*stride:j*stride+pool_size]
            # Calculate the pooling result based on the mode
            if mode == 'max':
                output[i, j] = np.max(window)
            elif mode == 'avg':
                output[i, j] = np.mean(window)
    
    return output

# Feature map and kernel for question (1)
feature_map_1 = np.array([
    [1, 2, 3, 0],
    [0, 1, 2, 3],
    [3, 0, 1, 2],
    [2, 3, 0, 1]
])

kernel = np.array([
    [2, 0, 1],
    [0, 1, 2],
    [1, 0, 2]
])

# (1) a) Convolution without padding
output_no_padding = convolution_2d(feature_map_1, kernel, stride=1, padding=0)

# (1) b) Convolution with padding to keep the output size unchanged
padding_size = kernel.shape[0] // 2  # To keep the output size unchanged
output_with_padding = convolution_2d(feature_map_1, kernel, stride=1, padding=padding_size)

# Feature map for question (2)
feature_map_2 = np.array([
    [1, 4, 2, 1],
    [5, 8, 3, 4],
    [7, 6, 4, 5],
    [1, 3, 1, 2]
])

# (2) Max pooling and average pooling
output_max_pooling = pooling_2d(feature_map_2, pool_size=2, stride=2, mode='max')
output_avg_pooling = pooling_2d(feature_map_2, pool_size=2, stride=2, mode='avg')

# Print the results
print("Question (1):")
print("a) Convolution without padding result:")
print(output_no_padding)
print("\nb) Convolution with padding result:")
print(output_with_padding)

print("\nQuestion (2):")
print("Max pooling result:")
print(output_max_pooling)
print("\nAverage pooling result:")
print(output_avg_pooling)

Question (1):
a) Convolution without padding result:
[[15. 16.]
 [ 6. 15.]]

b) Convolution with padding result:
[[ 7. 12. 10.  2.]
 [ 4. 15. 16. 10.]
 [10.  6. 15.  6.]
 [ 8. 10.  4.  3.]]

Question (2):
Max pooling result:
[[8. 4.]
 [7. 5.]]

Average pooling result:
[[4.5  2.5 ]
 [4.25 3.  ]]
