In [10]:
import torch
import torch.nn as nn

class MyConv1DModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, out_channels=1, kernel_size=2, fc_out_dim=2):
        super(MyConv1DModel, self).__init__()

        # (A) Embedding layer
        self.embedding = nn.Embedding(
            num_embeddings=vocab_size, embedding_dim=embedding_dim)

        custom_weights = torch.tensor([
            [0.26, -1.31],  # 0  <unk>
            [0.72, 0.43],  # 1  <pad>
            [-0.67, 0.61],  # 2  more
            [0.50, 0.50],  # 3  you
            [-0.26, -0.10],  # 4  come
            [1.29, 1.25],  # 5  get
            [1.95, 1.18],  # 6  low
            [-1.44, -1.89],  # 7  lucky
            [-0.20, 0.88],  # 8  score
            [-0.39, 1.07],  # 9  study
            [0.32, -0.05],  # 10 the 
            [0.59, -0.98],  # 11 to
        ], dtype=torch.float)


        # Gán cho embedding layer:
        with torch.no_grad():
            self.embedding.weight.copy_(custom_weights)

        # (B) Conv1d: in_channels=2, out_channels=1, kernel_size=2
        self.conv1d = nn.Conv1d(
            in_channels=embedding_dim,
            out_channels=out_channels,
            kernel_size=kernel_size
        )

        # (C) FC (6 -> 2) vì sau conv1d ta có (batch, 1, 6) => flatten => 6
        self.fc = nn.Linear(5, fc_out_dim)

        # --------------------------------------------------------
        #  GÁN TRỌNG SỐ THỦ CÔNG THEO HÌNH VẼ
        # --------------------------------------------------------
        with torch.no_grad():
            # 1) Conv1d weight & bias
            # shape = (1, 2, 2)
            custom_conv_weight = torch.tensor([[[0.33, -0.26],
                                                [0.38, -0.46]]],
                                              dtype=torch.float)
            custom_conv_bias = torch.tensor([-0.30], dtype=torch.float)

            self.conv1d.weight.copy_(custom_conv_weight)
            self.conv1d.bias.copy_(custom_conv_bias)

            # 2) FC weight & bias
            # shape(fc.weight) = (2, 6), shape(fc.bias) = (2,)
            custom_fc_weight = torch.tensor([
                [0.40, -0.06, -0.44, -0.10, 0.43,],  # node 0
                [-0.19, 0.28, -0.04, -0.01, 0.23,],  # node 1
            ], dtype=torch.float)
            custom_fc_bias = torch.tensor([0.18, 0.18], dtype=torch.float)

            self.fc.weight.copy_(custom_fc_weight)
            self.fc.bias.copy_(custom_fc_bias)
        # --------------------------------------------------------

    def forward(self, x):
        # x.shape = (batch_size, seq_len)
        # (1) Embedding => (batch_size, seq_len, embedding_dim)
        x = self.embedding(x)

        # (2) Permute => (batch_size, embedding_dim, seq_len)
        x = x.permute(0, 2, 1)

        # (3) Conv1d => (batch_size, out_channels=1, new_length=6)
        x = self.conv1d(x)
        
        x = torch.tensor([[[0.5900, -1.6224, 0.5405, -1.2632, 0.4391]],
                         [[-0.68, -0.13, 0.95, -1.38, -0.13]]])

        # (4) Flatten => (batch_size, 6)
        x = x.view(x.size(0), -1)

        # (5) FC => (batch_size, fc_out_dim=2)
        x = self.fc(x)

        return x


# =====================================
# Chuẩn bị dữ liệu
# =====================================
input_vector_1 = torch.tensor([3, 0, 5, 10, 6, 8], dtype=torch.long)
input_vector_2 = torch.tensor([2, 9, 2, 7, 4, 1], dtype=torch.long)
X = torch.stack([input_vector_1, input_vector_2], dim=0)

print("Input shape:", X.shape)  # (2, 7)

# =====================================
# Khởi tạo và chạy thử mô hình
# =====================================
vocab_size = 12
embedding_dim = 2

model = MyConv1DModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    out_channels=1,
    kernel_size=2,
    fc_out_dim=2
)

output = model(X)
print("Output shape:", output.shape)  # (2, 2)
print("Output:", output)

Input shape: torch.Size([2, 6])
Output shape: torch.Size([2, 2])
Output: tensor([[ 0.5907, -0.2944],
        [-0.4201,  0.2187]], grad_fn=<AddmmBackward0>)


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class POSTaggingModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_classes=4):
        super(POSTaggingModel, self).__init__()

        # Embedding Layer (Chuyển token -> vector)
        self.embedding = nn.Embedding(
            num_embeddings=vocab_size, embedding_dim=embedding_dim)
        
        custom_weights = torch.tensor([
            [0.26, -1.31],  # 0  <unk>
            [0.72, 0.43],  # 1  <pad>
            [-0.67, 0.61],  # 2  more
            [0.50, 0.50],  # 3  you
            [-0.26, -0.10],  # 4  come
            [1.29, 1.25],  # 5  get
            [1.95, 1.18],  # 6  low
            [-1.44, -1.89],  # 7  lucky
            [-0.20, 0.88],  # 8  score
            [-0.39, 1.07],  # 9  study
            [0.32, -0.05],  # 10 the
            [0.59, -0.98],  # 11 to
        ], dtype=torch.float)
        # Gán cho embedding layer:
        with torch.no_grad():
            self.embedding.weight.copy_(custom_weights)
        # Fully Connected Layer (Nhận embedding, xuất ra 4 class)
        self.fc = nn.Linear(embedding_dim, num_classes)

        # --------------------------------------------------------
        #  GÁN TRỌNG SỐ THỦ CÔNG THEO HÌNH VẼ
        # --------------------------------------------------------
        with torch.no_grad():
            # (A) Embedding trọng số (tạo ngẫu nhiên hoặc load từ file pretrained nếu có)
            # Ở đây mình để random, bạn có thể thay bằng pretrained weights nếu cần
            custom_embedding_weights = torch.rand(vocab_size, embedding_dim)
            self.embedding.weight.copy_(custom_embedding_weights)

            # (B) Gán weight FC theo hình
            custom_fc_weight = torch.tensor([
                [0.3792, 0.4146],  # w0
                [0.4638, -0.0273],  # w1
                [-0.2622, 0.2486],  # w2
                [0.5454, -0.3664],  # w3
            ], dtype=torch.float)
            self.fc.weight.copy_(custom_fc_weight)

            # (C) Gán bias FC theo hình
            custom_fc_bias = torch.tensor(
                [-0.62, 0.37, 0.57, -0.48], dtype=torch.float)
            self.fc.bias.copy_(custom_fc_bias)
        # --------------------------------------------------------

    def forward(self, x):
        # 1. Lấy embedding cho từng token
        x = self.embedding(x)  # (batch_size, seq_len, embedding_dim)

        # 2. Fully Connected Layer
        x = self.fc(x)  # (batch_size, seq_len, num_classes)

        # 3. Softmax để chuẩn hóa xác suất
        x = F.log_softmax(x, dim=-1)  # (batch_size, seq_len, num_classes)

        return x


# =====================================
# 🔹 Khởi tạo dữ liệu với **6 tokens**
# =====================================
vocab_size = 12   # Giả sử vocab có 12 từ (giống ảnh)
embedding_dim = 2  # Embedding có 2 chiều
num_classes = 4   # 4 class POS tagging

# Giả lập batch gồm 2 câu, mỗi câu có 6 token
input_vector_1 = torch.tensor([3, 0, 5, 10, 6, 8], dtype=torch.long)
input_vector_2 = torch.tensor([2, 9, 2, 7, 4, 1], dtype=torch.long)
X = torch.stack([input_vector_1, input_vector_2], dim=0)

# Nhãn thực tế (Target) cho 6 tokens
target_labels = torch.tensor([
    [0, 1, 2, 3, 0, 2],  # Nhãn cho câu 1
    [2, 3, 1, 0, 1, 3]   # Nhãn cho câu 2
], dtype=torch.long)

# =====================================
# 🔹 Khởi tạo và chạy thử mô hình
# =====================================
model = POSTaggingModel(vocab_size=vocab_size,
                        embedding_dim=embedding_dim, num_classes=num_classes)

# Forward Pass
output = model(X)

# Lấy nhãn dự đoán (vị trí có giá trị lớn nhất trên mỗi token)
predicted_labels = torch.argmax(output, dim=-1)  # (batch_size, seq_len)

# In kết quả
print("Output shape:", output.shape)  # (batch_size=2, seq_len=6, num_classes)
print("Output logits:\n", output)
print("Predicted POS labels:\n", predicted_labels)


# =====================================
# 🔹 Tính Loss
# =====================================
criterion = nn.CrossEntropyLoss()
loss = criterion(output.view(-1, num_classes), target_labels.view(-1))
print("Loss:", loss.item())

Output shape: torch.Size([2, 6, 4])
Output logits:
 tensor([[[-1.7674, -1.1379, -0.9054, -2.2598],
         [-1.6684, -1.0048, -1.1336, -2.0920],
         [-1.9867, -1.0435, -1.0086, -1.9246],
         [-1.9444, -1.0939, -0.9340, -2.0478],
         [-1.7223, -1.0941, -0.9748, -2.2142],
         [-1.7620, -1.0440, -1.0412, -2.0937]],

        [[-1.7695, -1.1969, -0.8359, -2.3648],
         [-1.8412, -1.0429, -1.0275, -2.0324],
         [-1.7695, -1.1969, -0.8359, -2.3648],
         [-2.0036, -0.9812, -1.1247, -1.7984],
         [-2.0313, -1.1137, -0.8974, -2.0185],
         [-1.8774, -1.0375, -1.0308, -1.9955]]], grad_fn=<LogSoftmaxBackward0>)
Predicted POS labels:
 tensor([[2, 1, 2, 2, 2, 2],
        [2, 2, 2, 1, 2, 2]])
Loss: 1.480851650238037


In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class RNNPOSModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size=3, num_classes=6):
        super(RNNPOSModel, self).__init__()

        # Embedding Layer
        self.embedding = nn.Embedding(
            num_embeddings=vocab_size, embedding_dim=embedding_dim)

        # RNN Cell
        self.rnn = nn.RNNCell(input_size=embedding_dim,
                              hidden_size=hidden_size)

        # Fully Connected Layer (Hidden state → POS class)
        self.fc = nn.Linear(hidden_size, num_classes)

        # ----------------------------------------
        # 🔹 GÁN TRỌNG SỐ THỦ CÔNG THEO HÌNH VẼ
        # ----------------------------------------
        with torch.no_grad():
            # (A) Gán trọng số Embedding (random)
            custom_weights = torch.tensor([
                [0.26, -1.31],  # 0  <unk>
                [0.72, 0.43],  # 1  <pad>
                [-0.67, 0.61],  # 2  at
                [0.50, 0.50],  # 3  based
                [-0.26, -0.10],  # 4  deepmind
                [1.29, 1.25],  # 5  demis
                [1.95, 1.18],  # 6  hassabis
                [-1.44, -1.89],  # 7  in
                [-0.20, 0.88],  # 8  is
                [-0.39, 1.07],  # 9  nadella
                [0.32, -0.05],  # 10 satya
                [0.59, -0.98],  # 11 washington
            ], dtype=torch.float)
            self.embedding.weight.copy_(custom_weights)

            # (B) Gán trọng số RNN Cell
            # W_in (3,2) - (hidden_size, embedding_dim)
            custom_W_in = torch.tensor([
                [-0.07, -0.31],
                [-0.28, -0.19],
                [-0.23, -0.15]
            ], dtype=torch.float)
            self.rnn.weight_ih.copy_(custom_W_in)

            # b_in (3,)
            custom_b_in = torch.tensor([-0.47, -0.47, 0.50], dtype=torch.float)
            self.rnn.bias_ih.copy_(custom_b_in)

            # W_hh (3,3) - (hidden_size, hidden_size)
            custom_W_hh = torch.tensor([
                [0.04, 0.37, 0.32],
                [0.46, 0.54, -0.54],
                [0.25, -0.02, 0.05]
            ], dtype=torch.float)
            self.rnn.weight_hh.copy_(custom_W_hh)

            # b_hh (3,)
            custom_b_hh = torch.tensor([0.42, -0.50, 0.41], dtype=torch.float)
            self.rnn.bias_hh.copy_(custom_b_hh)

            # (C) Trọng số Fully Connected (FC) - 6 class
            # W_fc (6,3) - (num_classes, hidden_size)
            custom_W_fc = torch.tensor([
                [0.10, 0.53, 0.23],
                [0.34, 0.32, -0.36],
                [0.24, -0.35, 0.29],
                [-0.28, 0.10, -0.18],
                [0.39, 0.15, 0.49],
                [-0.57, 0.35, 0.54]
            ], dtype=torch.float)
            self.fc.weight.copy_(custom_W_fc)

            # b_fc (6,)
            custom_b_fc = torch.tensor(
                [-0.13, 0.20, 0.13, 0.42, -0.22, 0.37], dtype=torch.float)
            self.fc.bias.copy_(custom_b_fc)
        # ----------------------------------------

    def forward(self, x):
        batch_size, seq_len = x.shape

        # 1. Embedding
        x = self.embedding(x)  # (batch_size, seq_len, embedding_dim)

        # 2. RNN Cell xử lý từng token
        # Hidden state ban đầu
        h_t = torch.zeros(batch_size, self.rnn.hidden_size)
        hidden_states = []

        for t in range(seq_len):
            h_t = self.rnn(x[:, t, :], h_t)  # (batch_size, hidden_dim)
            # (batch_size, 1, hidden_dim)
            hidden_states.append(h_t.unsqueeze(1))

        # 3. Gộp tất cả hidden state lại
        # (batch_size, seq_len, hidden_dim)
        hidden_states = torch.cat(hidden_states, dim=1)

        # 4. Fully Connected (FC) cho từng token
        out = self.fc(hidden_states)  # (batch_size, seq_len, num_classes)

        # 5. Softmax
        # out = F.log_softmax(out, dim=-1)  # (batch_size, seq_len, num_classes)

        return out


# =====================================
# 🔹 Khởi tạo dữ liệu với **6 tokens**
# =====================================
vocab_size = 12   # Giả sử vocab có 12 từ
embedding_dim = 2  # Embedding có 2 chiều
hidden_size = 3    # Hidden state của RNN
num_classes = 6    # 🔹 6 class POS tagging

# Giả lập batch gồm 2 câu, mỗi câu có 6 token
input_vector_1 = torch.tensor([10, 9, 8, 3, 7, 11], dtype=torch.long)
input_vector_2 = torch.tensor([5, 6, 0, 2, 4, 1], dtype=torch.long)
X = torch.stack([input_vector_1, input_vector_2],
                dim=0)  # (batch_size=2, seq_len=6)

# Nhãn thực tế (Target) có 6 nhãn cho mỗi token
target_labels = torch.tensor([
    [0, 1, 2, 3, 0, 2],  # Nhãn cho câu 1
    [2, 3, 1, 0, 5, 4]   # Nhãn cho câu 2
], dtype=torch.long)  # (batch_size=2, seq_len=6)

# =====================================
# 🔹 Khởi tạo và chạy thử mô hình
# =====================================
model = RNNPOSModel(vocab_size=vocab_size, embedding_dim=embedding_dim,
                    hidden_size=hidden_size, num_classes=num_classes)

# Forward Pass
output = model(X)  # (batch_size=2, seq_len=6, num_classes)
print("Output shape:", output.shape)
print("Output logits:\n", output)

# =====================================
# 🔹 Lấy nhãn dự đoán
# =====================================
predicted_labels = torch.argmax(output, dim=-1)  # (batch_size, seq_len)
print("Predicted POS labels:\n", predicted_labels)

# =====================================
# 🔹 Tính Loss
# =====================================
criterion = nn.CrossEntropyLoss()
# (batch_size * seq_len, num_classes)
loss = criterion(output.view(-1, num_classes), target_labels.view(-1))
print("Loss:", loss.item())

Output shape: torch.Size([2, 6, 6])
Output logits:
 tensor([[[-0.3919, -0.3171,  0.5895,  0.2339, -0.0224,  0.5002],
         [-0.5143, -0.4956,  0.5719,  0.3103, -0.1750,  0.6450],
         [-0.5387, -0.4904,  0.5579,  0.3242, -0.2123,  0.6228],
         [-0.5538, -0.4547,  0.5486,  0.3258, -0.2327,  0.5626],
         [-0.3275, -0.2517,  0.7864,  0.0566,  0.2581,  0.3234],
         [-0.4223, -0.3168,  0.7369,  0.1264,  0.1089,  0.3569]],

        [[-0.5719, -0.4025,  0.4513,  0.3914, -0.3493,  0.5418],
         [-0.6746, -0.4111,  0.3775,  0.4682, -0.5260,  0.5037],
         [-0.4611, -0.3578,  0.6707,  0.1934, -0.0034,  0.4261],
         [-0.4833, -0.4829,  0.6137,  0.2698, -0.0993,  0.6272],
         [-0.4782, -0.4153,  0.6553,  0.2215, -0.0423,  0.5031],
         [-0.5417, -0.4476,  0.5670,  0.3078, -0.2006,  0.5509]]],
       grad_fn=<ViewBackward0>)
Predicted POS labels:
 tensor([[2, 5, 5, 5, 2, 2],
        [5, 5, 2, 5, 2, 2]])
Loss: 1.879526138305664


In [25]:
import torch

# 1️⃣ Bộ từ vựng (Vocabulary)
vocab = {
    "<unk>": 0, "<pad>": 1, "<sep>": 2,
    "aioers": 3, "are": 4, "how": 5, "friendly": 6,
    "smart": 7
}

# 2️⃣ Dữ liệu đầu vào
question = ["how", "are", "aioers"]  # Câu hỏi
context = ["aioers", "are", "friendly"]  # Ngữ cảnh
answer = ["very", "smart"]  # Câu trả lời

# 3️⃣ Ghép lại thành 1 câu hoàn chỉnh: <question> + sep + <context>
input_tokens = question + ["<sep>"] + context
print("Merged Tokens:", input_tokens)

# 4️⃣ Chuyển thành chỉ số (tokenize & vectorize)
# Nếu token không có trong vocab, thay bằng "<unk>" (0)
input_ids = [vocab.get(token, vocab["<unk>"]) for token in input_tokens]
print("Input IDs:", input_ids)

# 5️⃣ Padding (đảm bảo max_sequence_length = 9)
max_sequence_length = 9
input_ids += [vocab["<pad>"]] * (max_sequence_length - len(input_ids))

# 6️⃣ Cập nhật Start & End (tìm vị trí "very smart" trong câu mới)
# Nếu từ không có trong danh sách thì start/end sẽ là -1
try:
    start_idx = input_tokens.index(answer[0])  # Vị trí mới của "very"
except ValueError:
    start_idx = -1  # Nếu không tìm thấy, gán -1

try:
    end_idx = input_tokens.index(answer[1])  # Vị trí mới của "smart"
except ValueError:
    end_idx = -1  # Nếu không tìm thấy, gán -1

print("\nFinal Input IDs:", input_ids)
print("New Start:", start_idx)
print("New End:", end_idx)

Merged Tokens: ['how', 'are', 'aioers', '<sep>', 'aioers', 'are', 'friendly']
Input IDs: [5, 4, 3, 2, 3, 4, 6]

Final Input IDs: [5, 4, 3, 2, 3, 4, 6, 1, 1]
New Start: -1
New End: -1


In [34]:
import torch
import torch.nn as nn

# 🔹 Encoder
class Encoder(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)

        # Gán trọng số Encoder RNN & Embedding
        with torch.no_grad():
            self.embedding.weight.copy_(torch.tensor([
                [1.9, 0.0],  # <eos>  (Index 0)
                [-2.1, 0.5],  # I      (Index 1)
                [-0.8, -1.0],  # love   (Index 2)
                [0.2, 0.6]   # you    (Index 3)
            ], dtype=torch.float))

            self.rnn.weight_ih_l0.copy_(torch.tensor(
                [[0.5, 0.1], [-0.1, -0.2], [0.6, 0.2]]))
            self.rnn.bias_ih_l0.copy_(torch.tensor([0.8, -0.3, 0.1]))

            self.rnn.weight_hh_l0.copy_(torch.tensor(
                [[0.1, 0.2, 0.2], [0.2, 0.4, -0.5], [-0.1, -0.5, 0.0]]))
            self.rnn.bias_hh_l0.copy_(torch.tensor([-0.1, -0.5, 0.0]))

    def forward(self, x):
        embedded = self.embedding(x)
        outputs, hidden = self.rnn(embedded)
        return outputs, hidden

# 🔹 Decoder
class Decoder(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

        # Gán trọng số Decoder RNN & Embedding
        with torch.no_grad():
            self.embedding.weight.copy_(torch.tensor([
                [-2.7, 1.4],  # <sos>  (Index 0)
                [0.1, 0.0],  # <eos>  (Index 1)
                [1.5, 0.2],  # tôi    (Index 2)
                [2.2, -2.8],  # yêu    (Index 3)
                [-1.3, 0.1],  # bạn    (Index 4)
                [1.0, -0.5],  # anh    (Index 5)
                [0.0, 0.0]   # em     (Index 6)
            ], dtype=torch.float))

            self.rnn.weight_ih_l0.copy_(torch.tensor(
                [[-0.7, 0.4], [-0.3, 0.9], [0.6, -0.9]]))
            self.rnn.bias_ih_l0.copy_(torch.tensor([0.0, 1.0, 1.1]))

            self.rnn.weight_hh_l0.copy_(torch.tensor(
                [[-0.1, -0.5, 0.8], [0.6, 0.0, -0.3], [-1.2, 0.0, 0.0]]))
            self.rnn.bias_hh_l0.copy_(torch.tensor([-0.1, 0.5, 0.6]))

            # Gán trọng số Fully Connected
            self.fc.weight.copy_(torch.tensor([
                [0.2, -0.9, -0.4], [0.7, 0.7, 0.9], [0.7, 0.9, -1.4],
                [-1.3, 1.2, 0.7], [-1.2, -1.3, 1.0], [0.4, -1.1, -0.7],
                [-0.3, -0.6, -1.0]
            ]))

            self.fc.bias.copy_(torch.tensor(
                [-1.1, 0.6, 0.5, 0.4, 0.4, -1.0, 0.0]))

    def forward(self, x, hidden):
        embedded = self.embedding(x).unsqueeze(1)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc(output.squeeze(1))
        return prediction, hidden

# 🔹 Seq2Seq Model
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size, trg_len = trg.shape
        vocab_size = self.decoder.fc.out_features
        outputs = torch.zeros(batch_size, trg_len, vocab_size)

        _, hidden = self.encoder(src)
        print(_)
        dec_input = trg[:, 0]
        
        for t in range(1, trg_len):
            output, hidden = self.decoder(dec_input, hidden)
            outputs[:, t] = output
            dec_input = trg[:, t] if torch.rand(
                1).item() < teacher_forcing_ratio else output.argmax(1)

        return outputs


# =====================================
# 🔹 Khởi tạo mô hình & Dữ liệu
# =====================================
vocab_size_en = 4  # Số từ trong vocab tiếng Anh
vocab_size_vi = 7  # Số từ trong vocab tiếng Việt
embedding_dim = 2
hidden_size = 3

encoder = Encoder(vocab_size_en, embedding_dim, hidden_size)
decoder = Decoder(vocab_size_vi, embedding_dim, hidden_size)
model = Seq2Seq(encoder, decoder)

# 🔹 Câu tiếng Anh: "I love you <eos>"
input_en = torch.tensor([[1, 2, 3, 0]], dtype=torch.long)  # Batch size = 1
# 🔹 Câu tiếng Việt: "<sos> tôi yêu bạn"
input_vi = torch.tensor([[0, 2, 3, 4]], dtype=torch.long)  # Batch size = 1

# Forward pass
outputs = model(input_en, input_vi)
print("Output shape:", outputs.shape)  # (batch_size, trg_len, vocab_size_vi)

tensor([[[-0.2913, -0.5980, -0.7857],
         [-0.1055, -0.4008, -0.2467],
         [ 0.6169, -0.7608,  0.5012],
         [ 0.9302, -0.8899,  0.9152]]], grad_fn=<TransposeBackward1>)
Output shape: torch.Size([1, 4, 7])


In [35]:
import torch
import torch.nn as nn

# 🔹 Encoder
class Encoder(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)

        # Gán trọng số Encoder RNN & Embedding
        with torch.no_grad():
            self.embedding.weight.copy_(torch.tensor([
                [1.9, 0.0],  # <eos>  (Index 0)
                [-2.1, 0.5],  # I      (Index 1)
                [-0.8, -1.0],  # love   (Index 2)
                [0.2, 0.6]   # you    (Index 3)
            ], dtype=torch.float))

            self.rnn.weight_ih_l0.copy_(torch.tensor(
                [[0.5, 0.1], [-0.1, -0.2], [0.6, 0.2]]))
            self.rnn.bias_ih_l0.copy_(torch.tensor([0.8, -0.3, 0.1]))

            self.rnn.weight_hh_l0.copy_(torch.tensor(
                [[0.1, 0.2, 0.2], [0.2, 0.4, -0.5], [-0.1, -0.5, 0.0]]))
            self.rnn.bias_hh_l0.copy_(torch.tensor([-0.1, -0.5, 0.0]))

    def forward(self, x):
        embedded = self.embedding(x)
        outputs, hidden = self.rnn(embedded)
        return outputs, hidden  # `hidden` là hidden state cuối cùng


# 🔹 Decoder
class Decoder(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
        self.softmax = nn.Softmax(dim=-1)  # Softmax để lấy xác suất từ logits

        # Gán trọng số Decoder RNN & Embedding
        with torch.no_grad():
            self.embedding.weight.copy_(torch.tensor([
                [-2.7, 1.4],  # <sos>  (Index 0)
                [0.1, 0.0],  # <eos>  (Index 1)
                [1.5, 0.2],  # tôi    (Index 2)
                [2.2, -2.8],  # yêu    (Index 3)
                [-1.3, 0.1],  # bạn    (Index 4)
                [1.0, -0.5],  # anh    (Index 5)
                [0.0, 0.0]   # em     (Index 6)
            ], dtype=torch.float))

            self.rnn.weight_ih_l0.copy_(torch.tensor(
                [[-0.7, 0.4], [-0.3, 0.9], [0.6, -0.9]]))
            self.rnn.bias_ih_l0.copy_(torch.tensor([0.0, 1.0, 1.1]))

            self.rnn.weight_hh_l0.copy_(torch.tensor(
                [[-0.1, -0.5, 0.8], [0.6, 0.0, -0.3], [-1.2, 0.0, 0.0]]))
            self.rnn.bias_hh_l0.copy_(torch.tensor([-0.1, 0.5, 0.6]))

            # Gán trọng số Fully Connected
            self.fc.weight.copy_(torch.tensor([
                [0.2, -0.9, -0.4], [0.7, 0.7, 0.9], [0.7, 0.9, -1.4],
                [-1.3, 1.2, 0.7], [-1.2, -1.3, 1.0], [0.4, -1.1, -0.7],
                [-0.3, -0.6, -1.0]
            ]))

            self.fc.bias.copy_(torch.tensor(
                [-1.1, 0.6, 0.5, 0.4, 0.4, -1.0, 0.0]))

    def forward(self, x, hidden, max_len=10):
        outputs = []
        dec_input = x

        for _ in range(max_len):
            embedded = self.embedding(dec_input).unsqueeze(
                1)  # (batch, 1, embedding_dim)
            # (batch, 1, hidden_size)
            output, hidden = self.rnn(embedded, hidden)

            logits = self.fc(output.squeeze(1))  # (batch, vocab_size)
            probs = self.softmax(logits)  # Xác suất của từng token
            # Chọn token có xác suất cao nhất
            predicted_token = probs.argmax(1)

            outputs.append(predicted_token.item())  # Lưu kết quả
            dec_input = predicted_token  # Token tiếp theo làm input

            if predicted_token.item() == 1:  # Gặp <eos>, dừng lại
                break

        return outputs, hidden


# 🔹 Seq2Seq Model
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, trg):
        _, hidden = self.encoder(src)  # Nhận hidden state cuối từ encoder
        dec_input = trg[:, 0]  # Token <sos>
        # Dùng hidden để khởi tạo decoder
        outputs, _ = self.decoder(dec_input, hidden)
        return outputs


# =====================================
# 🔹 Khởi tạo mô hình & Dữ liệu
# =====================================
vocab_size_en = 4  # Số từ trong vocab tiếng Anh
vocab_size_vi = 7  # Số từ trong vocab tiếng Việt
embedding_dim = 2
hidden_size = 3

encoder = Encoder(vocab_size_en, embedding_dim, hidden_size)
decoder = Decoder(vocab_size_vi, embedding_dim, hidden_size)
model = Seq2Seq(encoder, decoder)

# 🔹 Câu tiếng Anh: "I love you <eos>"
input_en = torch.tensor([[1, 2, 3, 0]], dtype=torch.long)  # Batch size = 1
# 🔹 Câu tiếng Việt: "<sos>"
# Batch size = 1, bắt đầu với <sos>
input_vi = torch.tensor([[0]], dtype=torch.long)

# Forward pass
output_tokens = model(input_en, input_vi)
print("\nPredicted sequence:", output_tokens)


Predicted sequence: [2, 3, 4, 1]


In [38]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleUNet(nn.Module):
    def __init__(self):
        super(SimpleUNet, self).__init__()

        # 🔹 Encoder
        self.conv1 = nn.Conv2d(1, 1, kernel_size=3,
                               padding=1, stride=1, bias=False)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(1, 1, kernel_size=3,
                               padding=1, stride=1, bias=False)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # 🔹 Bottleneck
        self.bottleneck = nn.Conv2d(
            1, 1, kernel_size=3, padding=1, stride=1, bias=False)

        # 🔹 Decoder
        self.upsample1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv3 = nn.Conv2d(1, 1, kernel_size=3,
                               padding=1, stride=1, bias=False)

        self.upsample2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv4 = nn.Conv2d(1, 1, kernel_size=3,
                               padding=1, stride=1, bias=False)

        # 🔹 Khởi tạo trọng số = 1 (theo yêu cầu)
        with torch.no_grad():
            for layer in [self.conv1, self.conv2, self.bottleneck, self.conv3, self.conv4]:
                layer.weight.fill_(1)

    def forward(self, x):
        # 🔹 Encoder
        x1 = self.conv1(x)  # Conv1
        x1 = self.pool1(x1)  # MaxPooling1 (4x4 → 2x2)

        x2 = self.conv2(x1)  # Conv2
        x2 = self.pool2(x2)  # MaxPooling2 (2x2 → 1x1)

        # 🔹 Bottleneck
        x_b = self.bottleneck(x2)  # (1x1 → 1x1)
        print(x_b)
        # 🔹 Decoder
        x3 = self.upsample1(x_b)  # Upsampling1 (1x1 → 2x2)
        x3 = self.conv3(x3)

        # ✅ Cần điều chỉnh x2 để cùng kích thước với x3
        x3 = x3 + F.interpolate(x2, scale_factor=2,
                                mode='nearest')  # Skip Connection 1

        x4 = self.upsample2(x3)  # Upsampling2 (2x2 → 4x4)
        x4 = self.conv4(x4)

        # ✅ Cần điều chỉnh x1 để cùng kích thước với x4
        x4 = x4 + F.interpolate(x1, scale_factor=2,
                                mode='nearest')  # Skip Connection 2

        return x4


# 🔹 Kiểm thử mô hình
model = SimpleUNet()
input_tensor = torch.tensor([[[[1, 0, 0, 0],
                               [0, 0, 0, 0],
                               [0, 0, 0, 0],
                               [0, 0, 0, 1]]]], dtype=torch.float)

output = model(input_tensor)

print("\nInput Shape:", input_tensor.shape)
print("Output Shape:", output.shape)  # (1, 1, 4, 4)
print("Output Tensor:\n", output)

tensor([[[[2.]]]], grad_fn=<ConvolutionBackward0>)

Input Shape: torch.Size([1, 1, 4, 4])
Output Shape: torch.Size([1, 1, 4, 4])
Output Tensor:
 tensor([[[[41., 61., 60., 40.],
          [61., 91., 90., 60.],
          [60., 90., 91., 61.],
          [40., 60., 61., 41.]]]], grad_fn=<AddBackward0>)
