In [106]:
import numpy as np
import matplotlib.pyplot as plt
import math


<h1>Chapter 2: Logistic regression / MLPs </h1>

<h5>Logistic regression - Cross entropy loss </h5>
memory complexity of eqn : O(N * d^2)

In [107]:
def cross_entropy_loss( y_true, y_pred ):

    # Calculate cross-entropy loss
    loss = -np.sum(y_true * np.log(y_pred + 1e-15) + (1 - y_true) * np.log(1 - y_pred + 1e-15))
    return loss


y_true = np.array([0, 1, 0])
y_pred = np.array([ 0.01, 1, 0.1])

print( "cross_entropy_loss = ", np.round(cross_entropy_loss( y_true, y_pred ), 3) )





cross_entropy_loss =  0.115


<h5> Multi Layer Perceptron</h5>


<h3><span style="color:yellow;">Attention Calculator</span> </h3>

In [108]:

import numpy as np

# Given values (example values)
wq = np.array([[0.0], [2.0]])
wk = np.array([[2.0], [0.0]])
wv = np.array([[0.5], [0.5]])
wo = np.array([[0.5, 0.5]])

x = np.array([[1, 1], [0, 0], [1, 1]])

# Calculating self-attention
q = np.dot(x, wq)  # Query
k = np.dot(x, wk)  # Key
v = np.dot(x, wv)  # Value

# Compute attention scores (unnormalized)
attention_scores = np.dot(q, k.T)  # Shape: (sequence_length, sequence_length)
attention_scores /= np.sqrt(wq.shape[1])  # Normalizing by the square root of the dimension

# Apply softmax to get attention weights
attention_weights = np.exp(attention_scores) / np.sum(np.exp(attention_scores), axis=1, keepdims=True)

# Calculate the context vector using attention weights and values
context_vector = np.dot(attention_weights, v)

# Calculate the final output using the output weights
output = np.dot(context_vector, wo)

print("Input:")
print(x)
print("\nQuery:")
print(q)
print("\nKey:")
print(k)
print("\nValue:")
print(v)
print("\nAttention Scores:")
print(np.round(attention_scores,2))
print("\nAttention Weights:")
print( np.round(attention_weights,2))
print("\nContext Vector:")
print(np.round(context_vector,2))
print("\nOutput:")
print(np.round(output,2))


Input:
[[1 1]
 [0 0]
 [1 1]]

Query:
[[2.]
 [0.]
 [2.]]

Key:
[[2.]
 [0.]
 [2.]]

Value:
[[1.]
 [0.]
 [1.]]

Attention Scores:
[[4. 0. 4.]
 [0. 0. 0.]
 [4. 0. 4.]]

Attention Weights:
[[0.5  0.01 0.5 ]
 [0.33 0.33 0.33]
 [0.5  0.01 0.5 ]]

Context Vector:
[[0.99]
 [0.67]
 [0.99]]

Output:
[[0.5  0.5 ]
 [0.33 0.33]
 [0.5  0.5 ]]


<h1> Parameter Number Calculator </h1> 

<h3><span style="color:yellow;">MLP params</span> </h3>

In [109]:
def mlp_params(layer_sizes):
    params = 0
    for i in range(len(layer_sizes) - 1):
        params += (layer_sizes[i] * layer_sizes[i+1]) + layer_sizes[i+1]
    return params

layer_sizes = [ 5, 20, 10]
print("Number of trainable mlp parameteres :",mlp_params(layer_sizes))

Number of trainable mlp parameteres : 330


<h3> <span style="color:yellow;">Batch Norm params</span></h3>

In [110]:
def batch_norm_params(input_dimension, network_type):
    # Function to calculate number of trainable parameters in batch normalization layer
    # Input: input_dimension: number of input channels for cnn, and number of neurons for FC
    #        network_type: CNN or Fully Connected
    # Output: number of trainable parameters in batch normalization layer
    if network_type == 'CNN':
        return 2*input_dimension
    elif network_type == 'FC':
        return 2*input_dimension

input_dimension = 30 # number of input channels for cnn, and number of neurons for FC
network_type = 'CNN'
print("Number of trainable parameters for", network_type, ":", batch_norm_params(input_dimension, network_type))

Number of trainable parameters for CNN : 60


<h3> <span style="color:yellow;">Layer Norm params</span></h3>

In [111]:
def layer_norm_params(input_dimension, spatial_dimension=None, network_type='FC'):
    """
    Function to calculate number of trainable parameters in layer normalization layer
    
    Inputs:
    - input_dimension: number of input channels or neurons
    - spatial_dimension: tuple (height, width) for spatial dimensions in CNN. Not required for FC.
    - network_type: 'CNN' or 'FC'
    
    Output: number of trainable parameters in layer normalization layer
    """
    
    if network_type == 'CNN':
        if spatial_dimension is None:
            raise ValueError("For CNN, spatial_dimension (height, width) must be provided")
        height, width = spatial_dimension
        return 2 * input_dimension * height * width
    elif network_type == 'FC':
        return 2 * input_dimension

input_dimension = 10  # Number of input channels to ln layer
spatial_dimension = (3, 3)  # Assuming a spatial size of 32x32 for CNN
network_type = 'CNN'
print("Number of trainable parameters for", network_type, ":", layer_norm_params(input_dimension, spatial_dimension, network_type))


Number of trainable parameters for CNN : 180


<h3> <span style="color:yellow;">Recurrent network params</span></h3>

In [112]:
def rnn_params(input_dim, hidden_dim, cell_type='RNN'):
    """
    Function to calculate number of trainable parameters in RNN, LSTM, or GRU cell
    
    Inputs:
    - input_dim: Dimensionality of input data
    - hidden_dim: Dimensionality of hidden state
    - cell_type: 'RNN', 'LSTM', or 'GRU'
    
    Output: number of trainable parameters in the specified cell
    """
    
    if cell_type == 'RNN':
        return input_dim * hidden_dim + hidden_dim**2 + hidden_dim
    elif cell_type == 'LSTM':
        return 4 * (input_dim * hidden_dim + hidden_dim**2 + hidden_dim)
    elif cell_type == 'GRU':
        return 3 * (input_dim * hidden_dim + hidden_dim**2 + hidden_dim)
    else:
        raise ValueError("Invalid cell_type. Choose from 'RNN', 'LSTM', or 'GRU'.")

input_dim = 64 # Example input dimension
hidden_dim = 256 # Example hidden dimension
cell_type = 'LSTM'
print("Number of trainable parameters for", cell_type, ":", rnn_params(input_dim, hidden_dim, cell_type))



Number of trainable parameters for LSTM : 328704


<h2> <span style="color:yellow;">2D Filter Replacement</span></h2>
<h4> Replace Filter A with Filter B, But maintain the same recpetive field. </h4>
<h4> Compare the number of trainable parameters for each </h4>

In [113]:
def maintain_rf(filter_A_width, filter_A_height, stride_A, dilation_A, 
                filter_B_width, filter_B_height, stride_B, dilation_B):
    RF_A_width = ((filter_A_width - 1) * dilation_A) + 1
    RF_A_height = ((filter_A_height - 1) * dilation_A) + 1
    
    n = 0
    RF_B_width = 1
    RF_B_height = 1
    while RF_B_width < RF_A_width or RF_B_height < RF_A_height:
        RF_B_width = ((filter_B_width - 1) * dilation_B) + 1 + (RF_B_width - 1) * stride_B
        RF_B_height = ((filter_B_height - 1) * dilation_B) + 1 + (RF_B_height - 1) * stride_B
        n += 1
    return n

def compare_parameters(input_channels, output_channels, 
                       filter_A_width, filter_A_height, 
                       filter_B_width, filter_B_height, n_B):
    params_A = input_channels * output_channels * filter_A_width * filter_A_height
    params_B = input_channels * output_channels * filter_B_width * filter_B_height * n_B
    return params_A, params_B

# Test functions
# print(maintain_rf(5, 7, 1, 1, 3, 3, 1, 1))  # Example: Replacing 5x5 filter with 3x3 filters
input_channels = 3
output_channels = 64

#Original filter
filter_A_width = 7
filter_A_height = 7
a_stride = 1
a_dilation = 1

#replacing filter
filter_B_width = 2
filter_B_height = 2
b_stride = 1
b_dilation = 2


n_B = maintain_rf(filter_A_width, filter_A_height, a_stride, a_dilation, filter_B_width, filter_B_height, b_stride, b_dilation)
print("Number of %d x %d filters to replace %d x %d filters to have the same receptive field: %d " % (filter_B_width, filter_B_height, filter_A_width, filter_A_height, n_B))

print("Rough comparison of parameters for filter A and B:", compare_parameters(input_channels, output_channels, filter_A_width, filter_A_height, filter_B_width, filter_B_height, n_B))


Number of 2 x 2 filters to replace 7 x 7 filters to have the same receptive field: 3 
Rough comparison of parameters for filter A and B: (9408, 2304)


<h2> <span style="color:yellow;">3D Filter Replacement</span></h2>
<h4> Replace Filter A with Filter B, But maintain the same recpetive field. </h4>
<h4> Compare the number of trainable parameters for each </h4>

In [114]:
def maintain_rf_3d(filter_A_depth, filter_A_height, filter_A_width, 
                   stride_A, dilation_A, 
                   filter_B_depth, filter_B_height, filter_B_width, 
                   stride_B, dilation_B):
    RF_A_depth = ((filter_A_depth - 1) * dilation_A) + 1
    RF_A_height = ((filter_A_height - 1) * dilation_A) + 1
    RF_A_width = ((filter_A_width - 1) * dilation_A) + 1
    
    n = 0
    RF_B_depth = 1
    RF_B_height = 1
    RF_B_width = 1
    
    while RF_B_depth < RF_A_depth or RF_B_height < RF_A_height or RF_B_width < RF_A_width:
        RF_B_depth = ((filter_B_depth - 1) * dilation_B) + 1 + (RF_B_depth - 1) * stride_B
        RF_B_height = ((filter_B_height - 1) * dilation_B) + 1 + (RF_B_height - 1) * stride_B
        RF_B_width = ((filter_B_width - 1) * dilation_B) + 1 + (RF_B_width - 1) * stride_B
        n += 1
    return n

def compare_parameters_3d(input_channels, output_channels, 
                          filter_A_depth, filter_A_height, filter_A_width, 
                          filter_B_depth, filter_B_height, filter_B_width, 
                          n_B):
    params_A = input_channels * output_channels * filter_A_depth * filter_A_height * filter_A_width
    params_B = input_channels * output_channels * filter_B_depth * filter_B_height * filter_B_width * n_B
    return params_A, params_B

# Test functions

input_channels = 3
output_channels = 64

# Original filter
filter_A_depth = 5
filter_A_height = 5
filter_A_width = 5
a_stride = 2
a_dilation = 4
# filter that will replace the original
filter_B_depth = 4
filter_B_height = 4
filter_B_width = 3
b_stride = 1
b_dilation = 2
n_B = maintain_rf_3d(filter_A_depth, filter_A_height, filter_A_width, 
                     a_stride, a_dilation, filter_B_depth, filter_B_height, filter_B_width, 
                     b_stride, b_dilation)
print("Number of %d x %d x %d filters to replace %d x %d x %d filter to have the same receptive field: %d" % (filter_B_depth, filter_B_height, filter_B_width, filter_A_depth, filter_A_height, filter_A_width, n_B))
print("Rough comparison of parameters: %d vs %d" % compare_parameters_3d(input_channels, output_channels,
                                                                         filter_A_depth, filter_A_height, filter_A_width,                                                                   
                                                                         filter_B_depth, filter_B_height, filter_B_width,
                                                                         n_B))


Number of 4 x 4 x 3 filters to replace 5 x 5 x 5 filter to have the same receptive field: 4
Rough comparison of parameters: 24000 vs 36864


<h2> <span style="color:yellow;">Output Dimensions and Number of multiplications for CNN </span></h2>
<h4>2D CNN </h4>



In [115]:
def cnn_multiplications_2d(H_in, W_in, C_in, C_out, K, padding, stride, dilation):
    H_out = (H_in + 2 * padding - dilation * (K - 1) - 1) // stride + 1
    W_out = (W_in + 2 * padding - dilation * (K - 1) - 1) // stride + 1
    return H_out * W_out * K * K * C_in * C_out , H_out, W_out

# input size
C_in, W_in, H_in = 3, 32, 32

# filter properties
K = 3 # filter size
C_out = 64 # number of filters
padding = 1
stride = 1
dilation = 1


num_mults, H_out, W_out = cnn_multiplications_2d(H_in, W_in, C_in, C_out, K, padding, stride, dilation)
print("Number of multiplications: {:,}".format(num_mults))
print("Output size: {} x {} x {}".format(C_out, H_out, W_out))


Number of multiplications: 1,769,472
Output size: 64 x 32 x 32


<h4>3D CNN </h4>

In [116]:
def cnn3d_multiplications(D_in, H_in, W_in, C_in, D_k, H_k, W_k, C_out, padding, stride, dilation):
 
    D_out = (D_in + 2 * padding - dilation * (D_k - 1) - 1) // stride + 1
    H_out = (H_in + 2 * padding - dilation * (H_k - 1) - 1) // stride + 1
    W_out = (W_in + 2 * padding - dilation * (W_k - 1) - 1) // stride + 1
    
    # Multiplications per output element
    mults_per_element = D_k * H_k * W_k * C_in
    
    # Total multiplications
    total_mults = mults_per_element * D_out * H_out * W_out * C_out
    
    return total_mults , D_out, H_out, W_out

# Example Usage
D_in, H_in, W_in = 120, 36,36  # Example input dimensions (depth, height, width)
C_in = 3  # Number of input channels (e.g., grayscale)
D_k, H_k, W_k = 3, 3, 3  # 3D Kernel size
C_out = 8  # Number of output channels (filters)
padding = 1
stride = 1
dilation = 1
total_mults, D_out, H_out, W_out = cnn3d_multiplications(D_in, H_in, W_in, C_in, D_k, H_k, W_k, C_out, padding, stride, dilation)
print("Total multiplications: ", total_mults)
print("Size of the 3d convoluted output Channels x Depth x Height x Width: {} x {} x {} x {}".format(C_out, D_out, H_out, W_out))

Total multiplications:  100776960
Size of the 3d convoluted output Channels x Depth x Height x Width: 8 x 120 x 36 x 36


<h2> <span style="color:yellow;">Padding ( Valid and Same) </span></h2>


In [117]:
def compute_padding(input_size, kernel_size, stride, dilation):
    """
    Computes padding for 'SAME' and 'VALID' padding types.

    Parameters:
    - input_size (int): The size of the input feature map (width or height).
    - kernel_size (int): The size of the kernel.
    - stride (int): The stride of the convolution.
    - dilation (int): The dilation rate of the kernel.

    Returns:
    - tuple: Padding for 'SAME' and 'VALID' (in that order).
    """
    
    # Effective kernel size after considering dilation
    effective_kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1)
    
    # Calculate padding
    if (input_size % stride) == 0:
        pad_same = max(effective_kernel_size - stride, 0)
    else:
        pad_same = max(effective_kernel_size - (input_size % stride), 0)
        
    pad_valid = 0   # VALID padding doesn't add any padding by definition
    
    return (pad_same, pad_valid)

# Test the function
input_size = 32 # assume square image
kernel_size = 4
stride = 1
dilation = 3

pad_same, pad_valid = compute_padding(input_size, kernel_size, stride, dilation)
print("Padding for SAME: {}".format(pad_same))
print("Padding for VALID: {}".format(pad_valid))

Padding for SAME: 9
Padding for VALID: 0


<h2> <span style="color:yellow;">Whether an Input can pass through a given CNN  </span></h2>


In [118]:


def can_pass_network(H_in, W_in, C_in, layers, final_c, final_h, final_w):
    for layer in layers:
        if layer['type'] == 'conv':
            H_in, W_in = (H_in + 2 * layer['padding'] - layer['dilation'] * (layer['K'] - 1) - 1) // layer['stride'] + 1, \
                         (W_in + 2 * layer['padding'] - layer['dilation'] * (layer['K'] - 1) - 1) // layer['stride'] + 1
            C_in = layer['C_out']
            if H_in <= 0 or W_in <= 0:
                return False
        elif layer['type'] == 'maxpool':
            H_in, W_in = (H_in - layer['K']) // layer['stride'] + 1, (W_in - layer['K']) // layer['stride'] + 1
            if H_in <= 0 or W_in <= 0:
                return False

    # Checking if final dimensions match with expected dimensions
    return C_in == final_c and H_in == final_h and W_in == final_w
######################## Input ########################


# Example usage
H_in, W_in, C_in = 28,28, 3

final_c, final_h, final_w = 16, 5, 5

layers = [{'type': 'conv', 'K': 5, 'padding': 0, 'stride': 1, 'dilation': 1, 'C_out': 6},
          {'type': 'maxpool', 'K': 2, 'stride': 2},
          {'type': 'conv', 'K': 5, 'padding': 0, 'stride': 1, 'dilation': 1, 'C_out': 16},
          {'type': 'maxpool', 'K': 2, 'stride': 2}]



######################## Output ########################
if can_pass_network(H_in, W_in, C_in, layers, final_c, final_h, final_w):
    print(" Current image of size {}x{}x{} can pass through the network".format( C_in,H_in, W_in))
else:
    print(" Current image of size {}x{}x{} cannot pass through the network".format( C_in,H_in, W_in))


 Current image of size 3x28x28 cannot pass through the network
