In [11]:
import torch
import torch.nn.functional as F

def transposed_conv_output(input_tensor, kernel, stride=1, padding=0, dilation=1, output_padding=0):
    # Convert to torch tensors if they aren't already
    input_tensor = torch.tensor(input_tensor, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # Add batch and channel dimension
    kernel = torch.tensor(kernel, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # Add channel dimensions

    # Get the dimensions
    batch_size, in_channels, h_in, w_in = input_tensor.shape
    out_channels, _, k_h, k_w = kernel.shape

    # Calculate output size
    h_out = (h_in - 1) * stride - 2 * padding + dilation * (k_h - 1) + output_padding + 1
    w_out = (w_in - 1) * stride - 2 * padding + dilation * (k_w - 1) + output_padding + 1
    
    print(f"Output shape: {batch_size}x{out_channels}x{h_out}x{w_out}")

    # Perform transposed convolution
    output = F.conv_transpose2d(input_tensor, kernel, stride=stride, padding=padding, dilation=dilation, output_padding=output_padding)

    # Convert back to a list for easier manipulation outside of PyTorch if needed
    output_list = output.squeeze().tolist()  # Remove batch and channel dimensions for simplicity

    return output_list, (h_out, w_out)

# Example usage:
input_tensor = [
    [6, 1, 2],
    [3, 3, 0],
    [6, 1, 4]
]

kernel = [
    [5, 9, 2],
    [6, 0, 5],
    [8, 2, 9]
]

output, (height, width) = transposed_conv_output(input_tensor, kernel)
print("Output tensor:")
for row in output:
    print(row)
print(output[3][3])

Output shape: 1x1x5x5
Output tensor:
[30.0, 59.0, 31.0, 20.0, 4.0]
[51.0, 48.0, 75.0, 11.0, 10.0]
[96.0, 97.0, 128.0, 66.0, 26.0]
[60.0, 36.0, 87.0, 32.0, 20.0]
[48.0, 20.0, 88.0, 17.0, 36.0]
32.0


In [26]:
import sympy as sp

def symbolic_convolution(x_len, w_len, padding, stride, dilation):
    # Create symbolic variables for x and w starting from 1
    x = sp.symbols(f'x1:{x_len+1}')
    w = sp.symbols(f'w1:{w_len+1}')
    
    # Calculate the effective kernel size considering dilation
    effective_kernel_size = (w_len - 1) * dilation + 1
    
    # Calculate the output length
    out_len = (x_len + 2 * padding - effective_kernel_size) // stride + 1
    
    y = []
    for i in range(out_len):
        start = i * stride - padding
        terms = []
        for j in range(w_len):
            idx = start + j * dilation
            # Adjust indexing to start from 1
            if 1 <= idx+1 <= x_len:  # Check if index is within the original input bounds
                terms.append(f"{w[j]}*{x[idx]}")
            elif idx == -1 or idx == x_len:  # Edge cases for padding
                terms.append(f"{w[j]}*0")  # Symbolic representation of zero padding
        
        # Join terms with '+' for addition in convolution
        y_element = ' + '.join(terms).replace(' + 0', '')  # Remove zero terms for clarity
        y.append(y_element if y_element else '0')  # If all terms were zero, output '0'
    
    return y

# Example usage:
x_length = 5  # Length of x
w_length = 4  # Length of w
pad = 1  # Padding
s = 1  # Stride
d = 1  # Dilation

symbolic_output = symbolic_convolution(x_length, w_length, pad, s, d)
for i, elem in enumerate(symbolic_output):
    print(f"y[{i}] = {elem}")



# w 2 n e w = w_n − ( 0.5 dL/dw_n + 0.4 sign(w_n) 


y[0] = w1*0 + w2*x1 + w3*x2 + w4*x3
y[1] = w1*x1 + w2*x2 + w3*x3 + w4*x4
y[2] = w1*x2 + w2*x3 + w3*x4 + w4*x5
y[3] = w1*x3 + w2*x4 + w3*x5 + w4*0


In [30]:
import math

def calculate_flops(co, ho, wo, ck, hk, wk, groups=1):
    # FLOPs calculation for both scenarios
    # co: number of output channels
    # ho, wo: output height and width
    # ck: number of channels in kernel (input channels for ordinary conv, input channels // groups for group conv)
    # hk, wk: kernel height and width
    # groups: number of groups for group convolution, 1 for ordinary convolution
    
    flops = co * ho * wo * ck * hk * wk
    if groups > 1:
        flops //= groups  # Adjust FLOPs for group convolution
    return math.log2(flops)

# Given parameters


co = 4  # Number of output feature maps or channels for both scenarios for simplicity in comparison
ho = wo = 32  # Output height and width
ck_scenario_1 = 128  # For group convolution, typically input_channels // groups
ck_scenario_2 = 512  # For ordinary convolution, typically equals input_channels
hk = wk = 2  # Kernel height and width

# Scenario 1: Group Convolution
flops_scenario_1 = calculate_flops(co, ho, wo, ck_scenario_1, hk, wk, groups=4)  # Assuming 4 groups for example

# Scenario 2: Ordinary Convolution
flops_scenario_2 = calculate_flops(co, ho, wo, ck_scenario_2, hk, wk)

print(f"Enter the number of FLOPs for scenario 1 (Group Convolution) (write as a power of 2): 2^{int(flops_scenario_1)}")
print(f"Enter the number of FLOPs for scenario 2 (Ordinary Convolution) (write as a power of 2): 2^{int(flops_scenario_2)}")

Enter the number of FLOPs for scenario 1 (Group Convolution) (write as a power of 2): 2^19
Enter the number of FLOPs for scenario 2 (Ordinary Convolution) (write as a power of 2): 2^23


In [32]:
import numpy as np

def convolve2d(image, kernel, stride=1, padding=0, dilation=1):
    # Get dimensions
    h_img, w_img = image.shape
    h_kern, w_kern = kernel.shape
    
    # Calculate output dimensions
    h_out = (h_img - h_kern + 2 * padding) // stride + 1
    w_out = (w_img - w_kern + 2 * padding) // stride + 1
    
    # Initialize output
    output = np.zeros((h_out, w_out))
    
    # Pad the image if necessary
    if padding > 0:
        image = np.pad(image, padding, mode='constant')
    
    # Perform convolution
    for i in range(h_out):
        for j in range(w_out):
            output[i, j] = np.sum(image[i*stride:i*stride+h_kern, j*stride:j*stride+w_kern] * kernel)
    
    return output

# Input feature maps (5x5 each)
input_feature1 = [
    [-1, 5,2,0	,2],
    [7,-9,7,-9 ,1],
    [-8 ,8,-3,-8,7],
    [-7 ,3,8,8,-3 ],
    [-9,4,-8 ,-9 ,-5]
]

input_feature2 = [
    [-9,2,-2,-4,6],
    [3,-9,-3,9,-6 ],
    [-6,1,6,2,-6 ],
    [5,-2 ,-9,1,-6 ],
    [6,-4,-8,4,0]
]

# Kernels (3x3 each)
kernel1 = [
    [0, -1, 2],
    [0, -1, 2],
    [0, -1, 2]
]

kernel2 = [
    [-2, 0, 1],
    [-2, 0, 1],
    [-2, 0, 1]
]

# Convert to numpy arrays
input_feature1 = np.array(input_feature1)
input_feature2 = np.array(input_feature2)
kernel1 = np.array(kernel1)
kernel2 = np.array(kernel2)

# Perform convolution for each channel
output1 = convolve2d(input_feature1, kernel1)
output2 = convolve2d(input_feature2, kernel2)

# Sum the outputs to get the final result
final_output = output1 + output2

print("Input Feature 1:")
print(input_feature1)
print("\nInput Feature 2:")
print(input_feature2)
print("\nKernel 1:")
print(kernel1)
print("\nKernel 2:")
print(kernel2)
print("\nOutput 1:")
print(output1)
print("\nOutput 2:")
print(output2)
print("\nFinal Output:")
print(final_output)


Input Feature 1:
[[-1  5  2  0  2]
 [ 7 -9  7 -9  1]
 [-8  8 -3 -8  7]
 [-7  3  8  8 -3]
 [-9  4 -8 -9 -5]]

Input Feature 2:
[[-9  2 -2 -4  6]
 [ 3 -9 -3  9 -6]
 [-6  1  6  2 -6]
 [ 5 -2 -9  1 -6]
 [ 6 -4 -8  4  0]]

Kernel 1:
[[ 0 -1  2]
 [ 0 -1  2]
 [ 0 -1  2]]

Kernel 2:
[[-2  0  1]
 [-2  0  1]
 [-2  0  1]]

Output 1:
[[  8. -40.  37.]
 [ 22. -30.  19.]
 [-21. -15.   7.]]

Output 2:
[[ 25.  19.  -8.]
 [-10.  32.  -6.]
 [-21.  17.  10.]]

Final Output:
[[ 33. -21.  29.]
 [ 12.   2.  13.]
 [-42.   2.  17.]]


In [33]:
import numpy as np

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

# Input matrices
X = np.array([
    [2, -1, -2],
    [-1, 1, -1]
])

Wq = np.array([
    [-2, 0, 0],
    [2, -1, 1],
    [0, -1, 0]
])

Wk = np.array([
    [-1, -2, 1],
    [0, 0, 0],
    [-2, -2, -2]
])

Wv = np.array([
    [1, 1, 1],
    [2, 0, -1],
    [1, 2, 2]
])

# Calculate Q, K, and V
Q = np.dot(X, Wq)
K = np.dot(X, Wk)
V = np.dot(X, Wv)

# Calculate attention scores
attention_scores = np.dot(Q, K.T)

# Apply softmax to get attention weights
attention_weights = softmax(attention_scores)

# Calculate the final output Z
Z = np.dot(attention_weights, V)

# Print results
print("Input X:")
print(X)
print("\nWeight matrix Wq:")
print(Wq)
print("\nWeight matrix Wk:")
print(Wk)
print("\nWeight matrix Wv:")
print(Wv)
print("\nQuery matrix Q:")
print(Q)
print("\nKey matrix K:")
print(K)
print("\nValue matrix V:")
print(V)
print("\nAttention scores:")
print(attention_scores)
print("\nAttention weights (after softmax):")
print(attention_weights)
print("\nOutput Z:")
print(Z)

Input X:
[[ 2 -1 -2]
 [-1  1 -1]]

Weight matrix Wq:
[[-2  0  0]
 [ 2 -1  1]
 [ 0 -1  0]]

Weight matrix Wk:
[[-1 -2  1]
 [ 0  0  0]
 [-2 -2 -2]]

Weight matrix Wv:
[[ 1  1  1]
 [ 2  0 -1]
 [ 1  2  2]]

Query matrix Q:
[[-6  3 -1]
 [ 4  0  1]]

Key matrix K:
[[2 0 6]
 [3 4 1]]

Value matrix V:
[[-2 -2 -1]
 [ 0 -3 -4]]

Attention scores:
[[-18  -7]
 [ 14  13]]

Attention weights (after softmax):
[[1.67014218e-05 9.99983299e-01]
 [7.31058579e-01 2.68941421e-01]]

Output Z:
[[-3.34028437e-05 -2.99998330e+00 -3.99994990e+00]
 [-1.46211716e+00 -2.26894142e+00 -1.80682426e+00]]


In [36]:
import numpy as np

# Input image x (3x3 matrix)
x = np.array([[-1.0, 2.0, -2.0],
              [-1.0, -2.0, 0.0],
              [2.0, 2.0, 1.0]])

# Convolution weights w (2x2 matrix)
w = np.array([[2.0, -2.0],
              [-1.0, 1.0]])

# Gradient of the loss with respect to the output ∇yL (2x2 matrix)
dL_dy = np.array([[-3.5, 5.0],
                  [1.0, -3.0]])

# Learning rate
eta = 0.1

# Initialize gradient of the loss with respect to weights (2x2 matrix)
dL_dw = np.zeros((2, 2))

# Compute the gradient of the loss with respect to each weight
# For w1: the top-left corner of the input contributes to y1 and y2
dL_dw[0, 0] = dL_dy[0, 0] * x[0, 0] + dL_dy[0, 1] * x[0, 1] + dL_dy[1, 0] * x[1, 0] + dL_dy[1, 1] * x[1, 1]

# For w2: the top-right corner of the input contributes to y1 and y2
dL_dw[0, 1] = dL_dy[0, 0] * x[0, 1] + dL_dy[0, 1] * x[0, 2] + dL_dy[1, 0] * x[1, 1] + dL_dy[1, 1] * x[1, 2]

# For w3: the bottom-left corner of the input contributes to y1 and y2
dL_dw[1, 0] = dL_dy[0, 0] * x[1, 0] + dL_dy[0, 1] * x[1, 1] + dL_dy[1, 0] * x[2, 0] + dL_dy[1, 1] * x[2, 1]

# For w4: the bottom-right corner of the input contributes to y1 and y2
dL_dw[1, 1] = dL_dy[0, 0] * x[1, 1] + dL_dy[0, 1] * x[1, 2] + dL_dy[1, 0] * x[2, 1] + dL_dy[1, 1] * x[2, 2]

# Now, we have dL_dw which contains the gradients of the loss with respect to w1, w2, w3, and w4

print("Gradient of the loss with respect to weights (dL/dw):")
print(dL_dw)

# Update the weights using gradient descent
w_new = w - eta * dL_dw

# Round the updated weights to two decimal places
w_new = np.round(w_new, 2)

print("Updated weights (w_new):")
print(w_new)


Gradient of the loss with respect to weights (dL/dw):
[[ 18.5 -19. ]
 [-10.5   6. ]]
Updated weights (w_new):
[[ 0.15 -0.1 ]
 [ 0.05  0.4 ]]
