In [83]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CustomNetwork(nn.Module):
    def __init__(self):
        super(CustomNetwork, self).__init__()

        # Nhánh chính
        self.main_branch = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32,
                      kernel_size=3, stride=1, padding=0),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32,
                      kernel_size=3, stride=1, padding=0),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=3,
                      kernel_size=3, stride=2, padding=0),
            nn.BatchNorm2d(3),
            nn.ReLU()
        )

        # Nhánh Skip Connection
        self.skip_connection = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=3,
                      kernel_size=7, stride=2, padding=0),
            nn.BatchNorm2d(3),
            nn.ReLU()
        )

    def forward(self, x):
        # Tính toán nhánh chính
        main_out = self.main_branch(x)

        # Tính toán nhánh Skip Connection
        skip_out = self.skip_connection(x)

        # Kết hợp hai nhánh
        output = main_out + skip_out
        return output


# Kiểm tra mô hình
model = CustomNetwork()
print(model)

# Input tensor
input_tensor = torch.randn(1, 3, 64, 64)  # Shape [1, 3, 64, 64]

# Forward pass
output = model(input_tensor)
print("Output shape:", output.shape)

CustomNetwork(
  (main_branch): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(32, 3, kernel_size=(3, 3), stride=(2, 2))
    (7): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (skip_connection): Sequential(
    (0): Conv2d(3, 3, kernel_size=(7, 7), stride=(2, 2))
    (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
)
Output shape: torch.Size([1, 3, 29, 29])


In [84]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the RNN with hardcoded weights
class CustomRNN(nn.Module):
    def __init__(self):
        super(CustomRNN, self).__init__()

        # Hardcoded weights for hidden state update
        self.Whh = torch.tensor([[0.0, -1.0],
                                 # Shape: (2, 2)
                                 [0.0, -1.0]], dtype=torch.float32)
        self.bhh = torch.tensor(
            [1.0, 1.0], dtype=torch.float32)        # Shape: (2,)

        # Hardcoded weights for input to hidden state
        self.Wxh = torch.tensor([[-1.0, 0.0, 0.5],
                                 # Shape: (2, 3)
                                 [1.0, 0.0, -0.5]], dtype=torch.float32)
        self.bxh = torch.tensor(
            [1.0, 1.0], dtype=torch.float32)           # Shape: (2,)

        # Hardcoded weights for hidden state to output
        self.Who = torch.tensor(
            [[0.0, 1.0]], dtype=torch.float32)  # Shape: (1, 2)
        self.bho = torch.tensor(
            [0.0], dtype=torch.float32)         # Shape: (1,)

    def forward(self, x):
        """
        x: Input sequence of shape (batch_size, seq_len, input_size)
        """
        batch_size, seq_len, input_size = x.shape

        # Initialize the hidden state
        # Initial hidden state (h_0) with size (batch_size, hidden_size)
        h_t = torch.zeros(batch_size, 2)
        outputs = []

        # Iterate through the sequence
        for t in range(seq_len):
            x_t = x[:, t, :]  # Current input (shape: batch_size, input_size)

            # Compute the hidden state: h_t = tanh(Whh @ h_{t-1} + Wxh @ x_t + b)
            h_t = torch.tanh(
                torch.matmul(h_t, self.Whh.T) +  # Hidden to hidden
                torch.matmul(x_t, self.Wxh.T) +  # Input to hidden
                self.bxh  # Bias
            )

            # Compute the output: y_t = Who @ h_t + bho
            y_t = torch.matmul(h_t, self.Who.T) + self.bho
            outputs.append(y_t)

        # Concatenate outputs along the sequence dimension
        outputs = torch.cat(outputs, dim=1)  # Shape: (batch_size, seq_len)
        return outputs


# Initialize the model
model = CustomRNN()

# Input sequence (batch_size=1, seq_len=3, input_size=3)
input_sequence = torch.tensor([[[1.0, 2.0, 3.0],
                                [4.0, 5.0, 6.0],
                                [7.0, 8.0, 9.0]]], dtype=torch.float32)

# Forward pass
output = model(input_sequence)

# Print the output
print("Output:\n", output)

Output:
 tensor([[0.4621, 0.9118, 0.9888]])


In [85]:
import torch
import torch.nn as nn

# Define the RNN with hardcoded weights
class CustomRNN(nn.Module):
    def __init__(self):
        super(CustomRNN, self).__init__()

        # Hardcoded weights for hidden state update
        self.Whh = torch.tensor([[0.0, -1.0],
                                 # Shape: (2, 2)
                                 [0.0, -1.0]], dtype=torch.float32)
        self.bhh = torch.tensor(
            [1.0, 1.0], dtype=torch.float32)        # Shape: (2,)

        # Hardcoded weights for input to hidden state
        self.Wxh = torch.tensor([[-1.0, 0.0, 0.5],
                                 # Shape: (2, 3)
                                 [1.0, 0.0, -0.5]], dtype=torch.float32)
        self.bxh = torch.tensor(
            [1.0, 1.0], dtype=torch.float32)           # Shape: (2,)

        # Fully connected layer (Who)
        self.Who = torch.tensor(
            [[0.0, 1.0]], dtype=torch.float32)  # Shape: (1, 2)
        self.bho = torch.tensor(
            [0.0], dtype=torch.float32)         # Shape: (1,)

    def forward(self, x):
        """
        x: Input sequence of shape (batch_size, seq_len, input_size)
        """
        batch_size, seq_len, input_size = x.shape

        # Initialize the hidden state
        # Initial hidden state (h_0) with size (batch_size, hidden_size)
        h_t = torch.zeros(batch_size, 2)
        outputs = []

        # Iterate through the sequence
        for t in range(seq_len):
            x_t = x[:, t, :]  # Input at time step t

            # Compute the hidden state: h_t = tanh(Whh @ h_{t-1} + Wxh @ x_t + b)
            h_t = torch.tanh(
                torch.matmul(h_t, self.Whh.T) +  # Hidden to hidden
                torch.matmul(x_t, self.Wxh.T) +  # Input to hidden
                self.bxh  # Bias
            )

            # Compute the output: y_t = Who @ h_t + bho
            y_t = torch.matmul(h_t, self.Who.T) + self.bho
            outputs.append(y_t)

        # Concatenate outputs along the sequence dimension
        outputs = torch.cat(outputs, dim=1)  # Shape: (batch_size, seq_len)
        return outputs


# Initialize the model
model = CustomRNN()

# Input sequence (batch_size=1, seq_len=3, input_size=3)
input_sequence = torch.tensor([[[1.0, 2.0, 3.0],
                                [4.0, 5.0, 6.0],
                                [7.0, 8.0, 9.0]]], dtype=torch.float32)

# Forward pass
output = model(input_sequence)

# Print the output
print("Output (after Fully Connected Layer):\n", output)

Output (after Fully Connected Layer):
 tensor([[0.4621, 0.9118, 0.9888]])


In [86]:
import torch
import math

# Hàm tính Positional Encoding
def positional_encoding(seq_len, d_model):
    """
    Tính Positional Encoding cho chuỗi đầu vào.
    
    Args:
        seq_len (int): Độ dài chuỗi (sequence length).
        d_model (int): Kích thước embedding.
    
    Returns:
        torch.Tensor: Tensor Positional Encoding với shape (seq_len, d_model).
    """
    # Tạo ma trận PE với shape (seq_len, d_model)
    PE = torch.zeros(seq_len, d_model)
    position = torch.arange(0, seq_len, dtype=torch.float32).unsqueeze(
        1)  # Shape: (seq_len, 1)
    div_term = torch.exp(torch.arange(0, d_model, 2).float(
    ) * (-math.log(10000.0) / d_model))  # Shape: (d_model/2)

    # Áp dụng công thức sin cho các chỉ số chẵn
    PE[:, 0::2] = torch.sin(position * div_term)
    # Áp dụng công thức cos cho các chỉ số lẻ
    PE[:, 1::2] = torch.cos(position * div_term)

    return PE


# Tạo Word Embedding
word_embedding = torch.tensor([
    [1.5, 0.5, 0.5, 1.5],
    [1.5, 1.0, 1.0, 1.5],
    [1.0, 0.75, 0.75, 1.0]
], dtype=torch.float32)

# Tính Positional Encoding
seq_len, d_model = word_embedding.shape
positional_embedding = positional_encoding(seq_len, d_model)

# Tổng hợp Word Embedding và Positional Encoding để tính Final Embedding
final_embedding = word_embedding + positional_embedding
final_embedding = torch.round(final_embedding * 10) / 10
# Hiển thị kết quả
print("Word Embedding:\n", word_embedding)
print("\nPositional Encoding:\n", positional_embedding)
print("\nFinal Embedding (Word + Positional):\n", final_embedding)

Word Embedding:
 tensor([[1.5000, 0.5000, 0.5000, 1.5000],
        [1.5000, 1.0000, 1.0000, 1.5000],
        [1.0000, 0.7500, 0.7500, 1.0000]])

Positional Encoding:
 tensor([[ 0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.0100,  0.9999],
        [ 0.9093, -0.4161,  0.0200,  0.9998]])

Final Embedding (Word + Positional):
 tensor([[1.5000, 1.5000, 0.5000, 2.5000],
        [2.3000, 1.5000, 1.0000, 2.5000],
        [1.9000, 0.3000, 0.8000, 2.0000]])


In [87]:
import torch

# Tạo ma trận Query Weight, Query Bias, và tính Query Matrix
query_weight = torch.tensor([
    [0.1, 0.2, 0.6, 0.4],
    [0.2, 0.3, 0.7, 0.3],
    [0.3, 0.4, 0.8, 0.2],
    [0.4, 0.5, 0.9, 0.1]
], dtype=torch.float32)

# Tạo ma trận Query Bias (b)
query_bias = torch.tensor([
    [0.1, 0.3, 0.2, 0.4],
    [0.1, 0.3, 0.2, 0.4],
    [0.1, 0.3, 0.2, 0.4]
], dtype=torch.float32)

input_matrix = final_embedding

query_matrix = torch.matmul(input_matrix, query_weight) + query_bias
# Làm tròn lên chữ số thập phân thứ 1
query_matrix = torch.round(query_matrix * 10) / 10
print("Query Matrix (Q):\n", query_matrix)

# Tạo ma trận Key Weight, Key Bias, và tính Key Matrix
key_weight = torch.tensor([
    [0.2, 0.1, 0.3, 0.0],
    [0.4, 0.3, 0.6, 0.2],
    [0.8, 0.5, 0.9, 0.4],
    [1.0, 0.7, 0.2, 0.6]
], dtype=torch.float32)

# Tạo ma trận Key Bias (b)
key_bias = torch.tensor([
    [0.8, 0.6, 0.4, 0.2],
    [0.8, 0.6, 0.4, 0.2],
    [0.8, 0.6, 0.4, 0.2]
], dtype=torch.float32)

key_matrix = torch.matmul(input_matrix, key_weight) + key_bias
# Làm tròn lên chữ số thập phân thứ 1
key_matrix = torch.round(key_matrix * 10) / 10
print("\nKey Matrix (K):\n", key_matrix)

# Tạo ma trận Value Weight, Value Bias, và tính Value Matrix
value_weight = torch.tensor([
    [0.05, 0.15, 0.25, 0.35],
    [0.35, 0.25, 0.15, 0.05],
    [0.05, 0.15, 0.25, 0.35],
    [0.35, 0.25, 0.15, 0.05]
], dtype=torch.float32)

# Tạo ma trận Value Bias (b)
value_bias = torch.tensor([
    [0.25, 0.5, 0.75, 1.0],
    [0.25, 0.5, 0.75, 1.0],
    [0.25, 0.5, 0.75, 1.0]
], dtype=torch.float32)

value_matrix = torch.matmul(input_matrix, value_weight) + value_bias
# Làm tròn lên chữ số thập phân thứ 1
value_matrix = torch.round(value_matrix * 10) / 10
print("\nValue Matrix (V):\n", value_matrix)

Query Matrix (Q):
 tensor([[1.7000, 2.5000, 4.8000, 1.8000],
        [1.9000, 2.9000, 5.7000, 2.2000],
        [1.4000, 2.1000, 4.0000, 1.6000]])

Key Matrix (K):
 tensor([[4.6000, 3.2000, 2.7000, 2.2000],
        [5.2000, 3.5000, 3.4000, 2.4000],
        [3.9000, 2.7000, 2.3000, 1.8000]])

Value Matrix (V):
 tensor([[1.8000, 1.8000, 1.8000, 1.9000],
        [1.8000, 2.0000, 2.2000, 2.4000],
        [1.2000, 1.5000, 1.8000, 2.1000]])


In [88]:
dk = key_matrix.shape[1]
attention_scores = torch.matmul(
    query_matrix, key_matrix.T) / torch.sqrt(torch.tensor(dk, dtype=torch.float32))
attention_scores = torch.round(attention_scores * 10) / 10
attention_scores

tensor([[16.4000, 19.1000, 13.8000],
        [19.1000, 22.3000, 16.2000],
        [13.7000, 16.0000, 11.6000]])

In [89]:
# Tạo ma trận Mask
mask = torch.tensor([
    [0, -float('inf'), -float('inf')],
    [0, 0, -float('inf')],
    [0, 0, 0]
], dtype=torch.float32)

# Hiển thị kết quả
print("Mask Matrix:\n", mask)

Mask Matrix:
 tensor([[0., -inf, -inf],
        [0., 0., -inf],
        [0., 0., 0.]])


In [90]:
value_matrix

tensor([[1.8000, 1.8000, 1.8000, 1.9000],
        [1.8000, 2.0000, 2.2000, 2.4000],
        [1.2000, 1.5000, 1.8000, 2.1000]])

In [101]:
scores_with_mask

tensor([[16.4000,    -inf,    -inf],
        [19.1000, 22.3000,    -inf],
        [13.7000, 16.0000, 11.6000]])

In [93]:
scores_with_mask = attention_scores + mask
attention_scores_mask = F.softmax(scores_with_mask, dim=-1)
attention_scores_mask = torch.matmul(attention_scores_mask, value_matrix)
attention_scores_mask = torch.round(attention_scores_mask * 10) / 10
attention_scores_mask

tensor([[1.8000, 1.8000, 1.8000, 1.9000],
        [1.8000, 2.0000, 2.2000, 2.4000],
        [1.8000, 2.0000, 2.2000, 2.4000]])

In [98]:
attention_scores_mask = torch.tensor([[1.8000, 1.8000, 1.8000, 1.9000],
                                      [1.8000, 2.0000, 2.2000, 2.4000],
                                      [1.8000, 2.0000, 2.2000, 2.4000]])

In [100]:
combined_input = attention_scores_mask + final_embedding
layer_norm = nn.LayerNorm(normalized_shape=combined_input.size(
    1), eps=0)

# Thủ công set γ = 1 và β = 0
with torch.no_grad():
    layer_norm.weight.fill_(1.0)  # γ = 1
    layer_norm.bias.fill_(0.0)   # β = 0
output = layer_norm(combined_input)
output

tensor([[-0.0337, -0.0337, -1.3798,  1.4471],
        [ 0.2694, -0.6543, -1.1162,  1.5011],
        [ 0.4472, -1.3416, -0.4472,  1.3416]],
       grad_fn=<NativeLayerNormBackward0>)