# Set Parameter
- Attention = [None, Luong]
- Teacher Forcing Ratio = 0.5
- Layer = 1
- Batch size = 32
- Drop out = 0.2
- Hidden unit = 50
- Epochs = 100
- N = 100
- Data Length = 100K
- Single
- Cype = 2
- Deduplication

# Import packages

import useful packages for experiments

In [1]:
import os
import argparse
import logging
import sys

import torch
from torch.optim.lr_scheduler import StepLR
import torchtext

sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname('__file__'))))))))

from trainer.supervised_trainer import SupervisedTrainer
from models.encoderRNN import EncoderRNN
from models.decoderRNN import DecoderRNN
from models.seq2seq import Seq2seq
from loss.loss import Perplexity
from optim.optim import Optimizer
from dataset import fields
from evaluator.predictor import Predictor

import matplotlib.pyplot as plt



# Log format

In [2]:
log_level = 'info'
LOG_FORMAT = '%(asctime)s %(levelname)-6s %(message)s'
logging.basicConfig(format=LOG_FORMAT, level=getattr(logging, log_level.upper()))

In [3]:
character_accuracy = []
sentence_accuracy = []

In [4]:
train_path = "../../../data/palindrome_dedup/K100_single_Ctype2/data_train.txt"
dev_path = "../../../data/palindrome_dedup/K100_single_Ctype2/data_test.txt"

src = fields.SourceField()
tgt = fields.TargetField()
max_len = 104
def len_filter(example):
    return len(example.src) <= max_len and len(example.tgt) <= max_len
train = torchtext.data.TabularDataset(
    path=train_path, format='tsv',
    fields=[('src', src), ('tgt', tgt)],
    filter_pred=len_filter
)
dev = torchtext.data.TabularDataset(
    path=dev_path, format='tsv',
    fields=[('src', src), ('tgt', tgt)],
    filter_pred=len_filter
)
src.build_vocab(train)
tgt.build_vocab(train)
input_vocab = src.vocab
output_vocab = tgt.vocab

weight = torch.ones(len(tgt.vocab))
pad = tgt.vocab.stoi[tgt.pad_token]
loss = Perplexity(weight, pad)
if torch.cuda.is_available():
    loss.cuda()
    
optimizer = "Adam"
hidden_size = 50
bidirectional = True



# Prepare dataset

In [5]:
print("Luong Att")

seq2seq = None
encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                    bidirectional=bidirectional, variable_lengths=True)
decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size,
                     dropout_p=0.2, use_attention="Luong", bidirectional=bidirectional,
                     eos_id=tgt.eos_id, sos_id=tgt.sos_id)
seq2seq = Seq2seq(encoder, decoder)
if torch.cuda.is_available():
    seq2seq.cuda()

for param in seq2seq.parameters():
    param.data.uniform_(-0.08, 0.08)

# train
t = SupervisedTrainer(loss=loss, batch_size=32,
                      checkpoint_every=50,
                      print_every=100,
                      hidden_size=hidden_size,
                      path="palindrome_dedup_K100_single_Ctype2_parameter/Luong_att")

seq2seq, ave_loss, character_accuracy_list, sentence_accuracy_list = t.train(seq2seq, train,
                                                                             num_epochs=100, dev_data=dev,
                                                                             optimizer=optimizer,
                                                                             teacher_forcing_ratio=0.5)

character_accuracy.append(character_accuracy_list)
sentence_accuracy.append(sentence_accuracy_list)

torch.save(seq2seq.state_dict(), '../../../log/pth/palindrome_dedup_K100_single_Ctype2_parameter_Luong_att_model_save.pth')

  "num_layers={}".format(dropout, num_layers))
2019-04-04 16:27:16,179 INFO   Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
), Scheduler: None


Luong Att


2019-04-04 16:30:26,984 INFO   Finished epoch 1: Train loss: 14.2942, Dev loss: 14.2104, Accuracy(Character): 0.4304, Accuracy(Word): 0.0161
2019-04-04 16:33:38,162 INFO   Finished epoch 2: Train loss: 5.3854, Dev loss: 6.4352, Accuracy(Character): 0.5824, Accuracy(Word): 0.0091
2019-04-04 16:36:53,694 INFO   Finished epoch 3: Train loss: 4.6129, Dev loss: 5.8984, Accuracy(Character): 0.5305, Accuracy(Word): 0.0003
2019-04-04 16:40:04,254 INFO   Finished epoch 4: Train loss: 4.6873, Dev loss: 9.4771, Accuracy(Character): 0.5301, Accuracy(Word): 0.0026
2019-04-04 16:43:13,481 INFO   Finished epoch 5: Train loss: 5.8628, Dev loss: 22.9286, Accuracy(Character): 0.4892, Accuracy(Word): 0.0094
2019-04-04 16:46:25,648 INFO   Finished epoch 6: Train loss: 6.7609, Dev loss: 17.8463, Accuracy(Character): 0.4773, Accuracy(Word): 0.0013
2019-04-04 16:49:35,206 INFO   Finished epoch 7: Train loss: 8.4099, Dev loss: 31.2310, Accuracy(Character): 0.4766, Accuracy(Word): 0.0091
2019-04-04 16:53:12,51

KeyboardInterrupt: 

In [None]:
print("None Att")

seq2seq = None
encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                    bidirectional=bidirectional, variable_lengths=True)
decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size,
                     dropout_p=0.2, use_attention=None, bidirectional=bidirectional,
                     eos_id=tgt.eos_id, sos_id=tgt.sos_id)
seq2seq = Seq2seq(encoder, decoder)
if torch.cuda.is_available():
    seq2seq.cuda()

for param in seq2seq.parameters():
    param.data.uniform_(-0.08, 0.08)

# train
t = SupervisedTrainer(loss=loss, batch_size=32,
                      checkpoint_every=50,
                      print_every=100,
                      hidden_size=hidden_size,
                      path="palindrome_dedup_K100_single_Ctype2_parameter/None_att")

seq2seq, ave_loss, character_accuracy_list, sentence_accuracy_list = t.train(seq2seq, train,
                                                                             num_epochs=100, dev_data=dev,
                                                                             optimizer=optimizer,
                                                                             teacher_forcing_ratio=0.5)

character_accuracy.append(character_accuracy_list)
sentence_accuracy.append(sentence_accuracy_list)

torch.save(seq2seq.state_dict(), '../../../log/pth/palindrome_dedup_K100_single_Ctype2_parameter_no_att_model_save.pth')

In [None]:
epochs = list(range(1, 101, 1))
plt.figure(figsize=(15,10))
plt.plot(epochs[::3], character_accuracy[0][::3], '--', LineWidth=3, label="Luong Att"")
plt.plot(epochs[::3], character_accuracy[1][::3], '-o', LineWidth=3, label="None Att")
plt.legend(loc="best", fontsize=12)
plt.xlabel('Epoch', fontsize=24)
plt.ylabel('Character Accuracy', fontsize=24)
plt.ylim([0, 1])
plt.title('palindrome Ctype2', fontsize=35, fontweight=560)
plt.savefig('../../../log/plot/palindrome_dedup_K100_single_Ctype2_parameter/epoch_to_character_accuracy.png')

plt.figure(figsize=(15,10))
plt.plot(epochs[::3], sentence_accuracy[0][::3], '--', LineWidth=3, label="Luong Att")
plt.plot(epochs[::3], sentence_accuracy[1][::3], '-o', LineWidth=3, label="None Att")
plt.legend(loc="best", fontsize=12)
plt.xlabel('Epoch', fontsize=24)
plt.ylabel('Sentence Accuracy', fontsize=24)
plt.ylim([0, 1])
plt.title('palindrome Ctype2', fontsize=35, fontweight=560)
plt.savefig('../../../log/plot/palindrome_dedup_K100_single_Ctype2_parameter/epoch_to_sentence_accuracy.png')

In [None]:
print(character_accuracy[0])
print(character_accuracy[1])

In [None]:
print(sentence_accuracy[0])
print(sentence_accuracy[1])