In [12]:
import numpy as np

def scaled_dot_product_attention(Q, K, V):
    d_k = K.shape[-1]

    scores = np.dot(Q, K.T)
    scaled_scores = scores / np.sqrt(d_k)

    exp_scores = np.exp(scaled_scores - np.max(scaled_scores, axis=-1, keepdims=True))
    attn_weights = exp_scores / np.sum(exp_scores, axis=-1, keepdims=True)

    context = np.dot(attn_weights, V)
    return attn_weights, context


# ------------------------------
# Example Inputs for Testing Q1
# ------------------------------
Q = np.array([[1.0, 0.0, 1.0],
              [0.0, 1.0, 1.0]])

K = np.array([[1.0, 0.0, 1.0],
              [1.0, 1.0, 0.0]])

V = np.array([[1.0, 2.0],
              [3.0, 4.0]])

attn_weights, context = scaled_dot_product_attention(Q, K, V)

print("Attention Weights:\n", attn_weights)
print("\nContext Vector:\n", context)


2.
import torch
import torch.nn as nn

class SimpleEncoderBlock(nn.Module):
    def __init__(self, d_model=128, num_heads=8):
        super(SimpleEncoderBlock, self).__init__()

        self.self_attn = nn.MultiheadAttention(embed_dim=d_model,
                                               num_heads=num_heads,
                                               batch_first=True)

        self.ffn = nn.Sequential(
            nn.Linear(d_model, 4 * d_model),
            nn.ReLU(),
            nn.Linear(4 * d_model, d_model)
        )

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

    def forward(self, x):
        attn_output, _ = self.self_attn(x, x, x)
        x = self.norm1(x + attn_output)

        ffn_out = self.ffn(x)
        x = self.norm2(x + ffn_out)

        return x


# ------------------------------
# Testing Q2 output shape
# ------------------------------
encoder = SimpleEncoderBlock(d_model=128, num_heads=8)

# Create batch: (batch_size = 32, tokens = 10, d_model = 128)
x = torch.randn(32, 10, 128)

output = encoder(x)

print("Output Shape:", output.shape)



Attention Weights:
 [[0.64045748 0.35954252]
 [0.5        0.5       ]]

Context Vector:
 [[1.71908505 2.71908505]
 [2.         3.        ]]
Output Shape: torch.Size([32, 10, 128])
