In [25]:
import torch.nn as nn

class Seq2Seq(nn.Module):
    def __init__(self, n_hidden, n_class, n_layers=1, dropout=0.5):
        super(Seq2Seq, self).__init__()
        self.n_hidden = n_hidden
        self.n_class = n_class
        self.n_layers = n_layers
        
        # 编码器
        self.encoder = nn.LSTM(
            input_size=n_class,
            hidden_size=n_hidden,
            num_layers=n_layers,
            dropout=dropout if n_layers > 1 else 0  # 只有多层时才使用dropout
        )
        
        # 解码器
        self.decoder = nn.LSTM(
            input_size=n_class,
            hidden_size=n_hidden,
            num_layers=n_layers,
            dropout=dropout if n_layers > 1 else 0
        )
        
        # 线性层
        self.linear = nn.Linear(n_hidden, n_class)
    
    def forward(self, enc_input, enc_hidden, dec_input):
        # 编码器
        enc_input = enc_input.permute(1, 0, 2)  # 调整维度为 (seq_len, batch_size, input_size)
        enc_output, enc_hidden = self.encoder(enc_input, enc_hidden)
        
        # 解码器
        dec_input = dec_input.permute(1, 0, 2)  # 调整维度为 (seq_len, batch_size, input_size)
        dec_output, dec_hidden = self.decoder(dec_input, enc_hidden)
        
        # 线性层
        output = self.linear(dec_output)
        
        return output

In [26]:
import numpy as np
import torch
import torch.nn as nn

class Seq2Seq(nn.Module):
    def __init__(self, n_hidden, n_class):
        super(Seq2Seq, self).__init__()
        self.encoder = nn.LSTM(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.decoder = nn.LSTM(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.linear = nn.Linear(n_hidden, n_class)
        
    def forward(self, enc_input, enc_hidden, dec_input):
        enc_input = enc_input.permute(1, 0, 2)
        dec_input = dec_input.permute(1, 0, 2)
        _, enc_hidden = self.encoder(enc_input, enc_hidden)
        output, _ = self.decoder(dec_input, enc_hidden)
        output = self.linear(output)
        return output

def make_batch(seq_data, num_dic, n_step, n_class):
    input_batch, output_batch, target_batch = [], [], []

    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i] + "P" * (n_step - len(seq[i]))

        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ("S" + seq[1])]
        target = [num_dic[n] for n in (seq[1] + "E")]

        input_batch.append(np.eye(n_class)[input])
        output_batch.append(np.eye(n_class)[output])
        target_batch.append(target)

    return (
        torch.FloatTensor(input_batch),
        torch.FloatTensor(output_batch),
        torch.LongTensor(target_batch),
    )
    
def make_testbatch(input_word, num_dic, n_step, n_class):
    input_batch, output_batch = [], []

    input_w = input_word + 'P' * (n_step - len(input_word))
    input = [num_dic[n] for n in input_w]
    output = [num_dic[n] for n in 'S' + 'P' * n_step]

    input_batch = np.eye(n_class)[input]
    output_batch = np.eye(n_class)[output]

    return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0)

if __name__ == '__main__':
    n_step = 5
    n_hidden = 128

    char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
    num_dic = {n: i for i, n in enumerate(char_arr)}
    seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]

    n_class = len(num_dic)
    batch_size = len(seq_data)

    model = Seq2Seq(n_hidden, n_class)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    input_batch, output_batch, target_batch = make_batch(seq_data, num_dic, n_step, n_class)

    for epoch in range(5000):
        # 初始化隐藏状态和细胞状态
        hidden = (torch.zeros(1, batch_size, n_hidden), torch.zeros(1, batch_size, n_hidden))

        optimizer.zero_grad()
        output = model(input_batch, hidden, output_batch)
        output = output.transpose(0, 1)
        loss = criterion(output.view(-1, n_class), target_batch.view(-1))
        
        if (epoch + 1) % 1000 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.item()))
        loss.backward()
        optimizer.step()

    # Test
    def translate(word):
        input_batch, output_batch = make_testbatch(word, num_dic, n_step, n_class)

        # 初始化隐藏状态和细胞状态
        hidden = (torch.zeros(1, 1, n_hidden), torch.zeros(1, 1, n_hidden))
        output = model(input_batch, hidden, output_batch)
        predict = output.data.max(2, keepdim=True)[1]
        decoded = [char_arr[i] for i in predict]
        end = decoded.index('E')
        translated = ''.join(decoded[:end])

        return translated.replace('P', '')

    print('test')
    print('man ->', translate('man'))
    print('mans ->', translate('mans'))
    print('king ->', translate('king'))
    print('black ->', translate('black'))
    print('upp ->', translate('upp'))

RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [None]:
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn

# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps

def make_batch():
    input_batch, output_batch, target_batch = [], [], []

    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))

        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('S' + seq[1])]
        target = [num_dic[n] for n in (seq[1] + 'E')]

        input_batch.append(np.eye(n_class)[input])
        output_batch.append(np.eye(n_class)[output])
        target_batch.append(target) # not one-hot

    # make tensor
    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)

# make test batch
def make_testbatch(input_word):
    input_batch, output_batch = [], []

    input_w = input_word + 'P' * (n_step - len(input_word))
    input = [num_dic[n] for n in input_w]
    output = [num_dic[n] for n in 'S' + 'P' * n_step]

    input_batch = np.eye(n_class)[input]
    output_batch = np.eye(n_class)[output]

    return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0)

# Model
class Seq2Seq(nn.Module):
    def __init__(self):
        super(Seq2Seq, self).__init__()

        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.fc = nn.Linear(n_hidden, n_class)

    def forward(self, enc_input, enc_hidden, dec_input):
        enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]
        dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]

        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        _, enc_states = self.enc_cell(enc_input, enc_hidden)
        # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]
        outputs, _ = self.dec_cell(dec_input, enc_states)

        model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]
        return model

if __name__ == '__main__':
    n_step = 5
    n_hidden = 128

    char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
    num_dic = {n: i for i, n in enumerate(char_arr)}
    seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]

    n_class = len(num_dic)
    batch_size = len(seq_data)

    model = Seq2Seq()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    input_batch, output_batch, target_batch = make_batch()

    for epoch in range(5000):
        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]
        hidden = torch.zeros(1, batch_size, n_hidden)

        optimizer.zero_grad()
        # input_batch : [batch_size, max_len(=n_step, time step), n_class]
        # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]
        # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot
        output = model(input_batch, hidden, output_batch)
        # output : [max_len+1, batch_size, n_class]
        output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]
        loss = 0
        for i in range(0, len(target_batch)):
            # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]
            loss += criterion(output[i], target_batch[i])
        if (epoch + 1) % 1000 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
        loss.backward()
        optimizer.step()

    # Test
    def translate(word):
        input_batch, output_batch = make_testbatch(word)

        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]
        hidden = torch.zeros(1, 1, n_hidden)
        output = model(input_batch, hidden, output_batch)
        # output : [max_len+1(=6), batch_size(=1), n_class]

        predict = output.data.max(2, keepdim=True)[1] # select n_class dimension
        decoded = [char_arr[i] for i in predict]
        end = decoded.index('E')
        translated = ''.join(decoded[:end])

        return translated.replace('P', '')

    print('test')
    print('man ->', translate('man'))
    print('mans ->', translate('mans'))
    print('king ->', translate('king'))
    print('black ->', translate('black'))
    print('upp ->', translate('upp'))

Epoch: 1000 cost = 0.003528
Epoch: 2000 cost = 0.000962
Epoch: 3000 cost = 0.000410
Epoch: 4000 cost = 0.000206
Epoch: 5000 cost = 0.000111
test
man -> women
mans -> women
king -> queen
black -> white
upp -> down
