## ML - ML

In [1]:
import numpy as np
 
def conv2d(x, weights, bias=None, kernel_size=1, stride=1, padding=0, is_depthwise=False):
    """
    Performs 2D convolution with configurable kernel size
    Args:
        x: Input tensor of shape [batch_size, in_channels, height, width]
        weights: Weight tensor of shape [out_channels, in_channels, kernel_size, kernel_size]
        bias: Optional bias tensor of shape [out_channels]
        kernel_size: Size of convolution kernel (1 or 3 supported)
        stride: Convolution stride
        padding: Padding size
        is_depthwise: Boolean indicating if this is a depthwise convolution
    Returns:
        Output tensor of shape [batch_size, out_channels, out_height, out_width]
    """
    # Validate kernel size
    if kernel_size not in [1, 3]:
        raise ValueError("Only kernel sizes 1 and 3 are currently supported")
    
    # Extract dimensions
    batch_size, in_channels, height, width = x.shape
    out_channels = weights.shape[0]
    
    # Handle 1x1 convolution using matrix multiplication
    if kernel_size == 1:
        # Reshape input to [batch_size, in_channels, height * width]
        x_reshaped = x.reshape(batch_size, in_channels, -1)
        
        # Reshape weights to [out_channels, in_channels]
        weights_reshaped = weights.reshape(out_channels, -1)
        
        # Perform matrix multiplication: [batch_size, height * width, in_channels] x [in_channels, out_channels]
        result = np.matmul(x_reshaped.transpose(0, 2, 1), weights_reshaped.T)
        
        # Reshape back to [batch_size, out_channels, height, width]
        result = result.reshape(batch_size, height, width, -1).transpose(0, 3, 1, 2)
    
    # Handle 3x3 convolution (traditional or depthwise)
    else:
        # Add padding if needed
        if padding > 0:
            pad_width = (
                (0, 0),  # No padding for batch dimension
                (0, 0),  # No padding for channel dimension
                (padding, padding),  # Padding for height
                (padding, padding)   # Padding for width
            )
            x_padded = np.pad(x, pad_width, mode='constant', constant_values=0)
        else:
            x_padded = x
        
        # Calculate output dimensions
        out_height = (height + 2 * padding - kernel_size) // stride + 1
        out_width = (width + 2 * padding - kernel_size) // stride + 1
        
        # Initialize output tensor with zeros
        result = np.zeros((batch_size, out_channels, out_height, out_width))
        
        if is_depthwise:
            # Perform depthwise convolution
            for b in range(batch_size):
                for c in range(in_channels):
                    for h in range(out_height):
                        for w in range(out_width):
                            h_start = h * stride
                            w_start = w * stride
                            # Extract the current patch [kernel_size, kernel_size]
                            patch = x_padded[b, c, h_start:h_start+kernel_size, w_start:w_start+kernel_size]
                            # Perform element-wise multiplication and sum
                            result[b, c, h, w] = np.sum(patch * weights[c, 0])
        else:
            # Perform traditional convolution
            for b in range(batch_size):
                for c_out in range(out_channels):
                    for h in range(out_height):
                        for w in range(out_width):
                            h_start = h * stride
                            w_start = w * stride
                            # Extract the current patch across all input channels [in_channels, kernel_size, kernel_size]
                            patch = x_padded[b, :, h_start:h_start+kernel_size, w_start:w_start+kernel_size]
                            # Perform element-wise multiplication and sum
                            result[b, c_out, h, w] = np.sum(patch * weights[c_out])
    
    # Add bias if provided
    if bias is not None:
        # Reshape bias to [1, out_channels, 1, 1] for broadcasting
        result += bias.reshape(1, -1, 1, 1)
    
    return result
 
 
# Define input tensor [1, 2, 4, 4]
x = np.array([[
    [[1, 2, 3, 4],
        [5, 6, 7, 8],
        [9, 10, 11, 12],
        [13, 14, 15, 16]],
    
    [[17, 18, 19, 20],
        [21, 22, 23, 24],
        [25, 26, 27, 28],
        [29, 30, 31, 32]]
]])
print("Original Input Tensor (x):\n", x)
print("Shape of x:", x.shape)

Original Input Tensor (x):
 [[[[ 1  2  3  4]
   [ 5  6  7  8]
   [ 9 10 11 12]
   [13 14 15 16]]

  [[17 18 19 20]
   [21 22 23 24]
   [25 26 27 28]
   [29 30 31 32]]]]
Shape of x: (1, 2, 4, 4)


### 1x1 Conv

In [2]:
# Define weights for 1x1 convolution [2, 2, 1, 1]
weights_1x1 = np.array([
    [
        [[1]],  # Output Channel 0, Input Channel 0
        [[2]]   # Output Channel 0, Input Channel 1
    ],
    [
        [[3]],  # Output Channel 1, Input Channel 0
        [[4]]   # Output Channel 1, Input Channel 1
    ]
])
# Bias for 1x1 convolution [2]
bias_1x1 = np.array([1, 2])

# Perform 1x1 convolution
output_1x1 = conv2d(
    x,
    weights=weights_1x1,
    bias=bias_1x1,
    kernel_size=1,
    stride=1,
    padding=0,
    is_depthwise=False
)
print("\n1x1 Convolution Output:\n", output_1x1)
print("Shape:", output_1x1.shape)


1x1 Convolution Output:
 [[[[ 36  39  42  45]
   [ 48  51  54  57]
   [ 60  63  66  69]
   [ 72  75  78  81]]

  [[ 73  80  87  94]
   [101 108 115 122]
   [129 136 143 150]
   [157 164 171 178]]]]
Shape: (1, 2, 4, 4)


### 3x3 Conv

In [3]:
# weights_traditional = np.array([[
#     [[1, 0, -1],
#         [1, 0, -1],
#         [1, 0, -1]],
    
#     [[0, 1, 0],
#         [1, -4, 1],
#         [0, 1, 0]]
# ]]) 

weights_traditional = np.array([[
    [[1, 0, -1],
        [1, 1, -1],
        [1, 0, -1]],
    
    [[0, 1, 0],
        [1, -4, 1],
        [0, 1, 0]]
]])

# Bias for Traditional 3x3 convolution [1]
bias_traditional = np.array([0])
 
# Perform Traditional 3x3 convolution padding = 1
output_traditional = conv2d(
    x,
    weights=weights_traditional,
    bias=bias_traditional,
    kernel_size=3,
    stride=1,
    padding=1,
    is_depthwise=False
)
"""
x: 1,2,4,4 -> add padd -> 1,2,6,6
w: 1,2,3,3
o: 1,1,4,4
 
x: 1,3,224,224 (pad = 1, strides = 2)
w: 32,3,3,3
o: 1,32,112,112
 
"""
print("\nTraditional 3x3 Convolution Output:\n", output_traditional)
print("Shape:", output_traditional.shape)
 



Traditional 3x3 Convolution Output:
 [[[[-36. -16. -16. -23.]
   [-33.   0.   1.   4.]
   [-45.   4.   5.  16.]
   [-72. -24. -24. -27.]]]]
Shape: (1, 1, 4, 4)


### 3x3 Conv with Padding = 0

In [4]:
 
# Perform Traditional 3x3 convolution
output_traditional = conv2d(
    x,
    weights=weights_traditional,
    bias=bias_traditional,
    kernel_size=3,
    stride=1,
    padding=0,
    is_depthwise=False
)
"""
x: 1,2,4,4 -> add padd -> 1,2,6,6
w: 1,2,3,3
o: 1,1,4,4
 
x: 1,3,224,224 (pad = 1, strides = 2)
w: 32,3,3,3
o: 1,32,112,112
 
"""
print("\nTraditional 3x3 Convolution Output:\n", output_traditional)
print("Shape:", output_traditional.shape)


Traditional 3x3 Convolution Output:
 [[[[0. 1.]
   [4. 5.]]]]
Shape: (1, 1, 2, 2)


### Depthwise Conv

In [5]:
# Define weights for Depthwise 3x3 convolution [2,1,3,3]
weights_depthwise = np.array([
    [
        [[1, 0, -1],
            [1, 0, -1],
            [1, 0, -1]]
    ],
    [
        [[0, 1, 0],
            [1, -4, 1],
            [0, 1, 0]]
    ]
])
# Bias for Depthwise 3x3 convolution [2]
bias_depthwise = np.array([0, 0])
 
# Perform Depthwise 3x3 convolution
output_depthwise = conv2d(
    x,
    weights=weights_depthwise,
    bias=bias_depthwise,
    kernel_size=3,
    stride=1,
    padding=1,
    is_depthwise=True
)
print("\nDepthwise 3x3 Convolution Output:\n", output_depthwise)
print("Shape:", output_depthwise.shape)


Depthwise 3x3 Convolution Output:
 [[[[ -8.  -4.  -4.  10.]
   [-18.  -6.  -6.  21.]
   [-30.  -6.  -6.  33.]
   [-24.  -4.  -4.  26.]]

  [[-29. -14. -15. -37.]
   [-20.   0.   0. -25.]
   [-24.   0.   0. -29.]
   [-61. -34. -35. -69.]]]]
Shape: (1, 2, 4, 4)


## Example from https://www.deep-ml.com/problems/41

In [6]:
import numpy as np

def simple_conv2d(input_matrix: np.ndarray, kernel: np.ndarray, padding=1, stride=2):
	input_height, input_width = input_matrix.shape
	kernel_height, kernel_width = kernel.shape
	# Your code here

	pad_width = (
		(padding, padding),  # Padding for height
		(padding, padding)   # Padding for width
	)

	x_padded = np.pad(input_matrix, pad_width, mode='constant', constant_values=0)
        
	out_height = (input_height + 2 * padding - kernel_height) // stride + 1
	out_width = (input_width + 2 * padding - kernel_width) // stride + 1

	result = np.zeros((out_height,out_width))
	


	for h in range(out_height):
		for w in range(out_width):
			h_start = h * stride
			w_start = w * stride
			# Extract the current patch across all input channels [in_channels, kernel_size, kernel_size]
			patch = x_padded[h_start:h_start+kernel_height, w_start:w_start+kernel_width]
			# Perform element-wise multiplication and sum
			result[h, w] = np.sum(patch * kernel)

	return result


import numpy as np

input_matrix = np.array([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12],
    [13, 14, 15, 16]
])

kernel = np.array([
    [1, 0],
    [-1, 1]
])

padding = 1
stride = 2

output = simple_conv2d(input_matrix, kernel, padding, stride)
print(output)


[[ 1.  1. -4.]
 [ 9.  7. -4.]
 [ 0. 14. 16.]]
