# Set Parameter
- Attention = Luong
- Teacher Forcing Ratio = 0.5
- Layer = 1
- Batch size = 32
- Drop out = 0.2
- Hidden unit = 50
- Epochs = 50
- N = 50
- Data Length = 100K
- Data = [Palindrome, Double]

# Import packages

import useful packages for experiments

In [1]:
import os
import argparse
import logging

import torch
from torch.optim.lr_scheduler import StepLR
import torchtext

from trainer.supervised_trainer import SupervisedTrainer
from trainer.supervised_trainer_unmatching import SupervisedTrainer_unmatching
from models.encoderRNN import EncoderRNN
from models.decoderRNN import DecoderRNN
from models.seq2seq import Seq2seq
from loss.loss import Perplexity
from optim.optim import Optimizer
from dataset import fields
from evaluator.predictor import Predictor

import matplotlib.pyplot as plt



# Log format

In [2]:
log_level = 'info'
LOG_FORMAT = '%(asctime)s %(levelname)-6s %(message)s'
logging.basicConfig(format=LOG_FORMAT, level=getattr(logging, log_level.upper()))

In [3]:
data_name = ["Palindrome_double", "Palindrome_double"]
data_path = ["palindrome_data", "palindrome_data_double"]
character_accuracy = []
sentance_accuracy = []

# Prepare dataset

In [None]:
for i, j in zip(data_name, data_path):
    print("data : %s" % i)
    train_path = j + "/data_train.txt"
    dev_path = j + "/data_test2.txt"

    src = fields.SourceField()
    tgt = fields.TargetField()
    max_len = 104
    def len_filter(example):
        return len(example.src) <= max_len and len(example.tgt) <= max_len
    train = torchtext.data.TabularDataset(
        path=train_path, format='tsv',
        fields=[('src', src), ('tgt', tgt)],
        filter_pred=len_filter
    )
    dev = torchtext.data.TabularDataset(
        path=dev_path, format='tsv',
        fields=[('src', src), ('tgt', tgt)],
        filter_pred=len_filter
    )
    src.build_vocab(train)
    tgt.build_vocab(train)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()
    
    optimizer = "Adam"
    hidden_size = 50
    bidirectional = True

    seq2seq = None
    encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                         bidirectional=bidirectional, variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size,
                         dropout_p=0.2, use_attention="Luong", bidirectional=bidirectional,
                         eos_id=tgt.eos_id, sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # train
    t = SupervisedTrainer(loss=loss, batch_size=32,
                          checkpoint_every=50,
                          print_every=100,
                          hidden_size=hidden_size,
                          fig_path="log/plot/" + i)

    seq2seq, ave_loss, character_accuracy_list, sentance_accuracy_list = t.train(seq2seq, train,
                                                                             num_epochs=50, dev_data=dev,
                                                                             optimizer=optimizer,
                                                                             teacher_forcing_ratio=0.5)

    character_accuracy.append(character_accuracy_list)
    sentance_accuracy.append(sentance_accuracy_list)

data : Palindrome_double


  "num_layers={}".format(dropout, num_layers))
2019-02-26 07:50:49,358 INFO   Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
), Scheduler: None


In [None]:
epochs = list(range(1, 51, 1))
plt.figure(figsize=(15,10))
plt.grid(True)
plt.plot(epochs[1::4], character_accuracy[0][1::4], color="b", LineWidth=4, label="5 Character Types")
plt.plot(epochs[1::4], character_accuracy[1][1::4], color="g", LineWidth=4, label="10 Character Types")
plt.legend(loc="best", fontsize=18)
plt.xlabel('Epoch', fontsize=24)
plt.ylabel('Character Accuracy', fontsize=24)
plt.title('Palindrome', fontsize=35, fontweight=560)
plt.savefig('log/plot/Palindrome_double_to_character_accuracy.png')

plt.figure(figsize=(15,10))
plt.grid(True)
plt.plot(epochs[1::4], sentance_accuracy[0][::5], color="b", LineWidth=4, label="5 Character Types")
plt.plot(epochs[1::4], sentance_accuracy[1][1::4], color="g", LineWidth=4, label="10 Character Types")
plt.legend(loc="best", fontsize=18)
plt.xlabel('Epoch', fontsize=24)
plt.ylabel('Sentance Accuracy', fontsize=24)
plt.title('Palindrome', fontsize=35, fontweight=560)
plt.savefig('log/plot/Palindrome_double_to_sentance_accuracy.png')