# Set Parameter
- Attention = Luong
- Teacher Forcing Ratio = 0.5
- Layer = 1
- Batch size = 32
- Drop out = 0.2
- Hidden unit = 50
- Depth
- Ctype
- Epochs = 100
- N = 100
- Data = 100K
- Deduplication

# Import packages

import useful packages for experiments

In [1]:
import os
import argparse
import logging
import sys

import torch
from torch.optim.lr_scheduler import StepLR
import torchtext

sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname('__file__'))))))

from models.encoderRNN import EncoderRNN
from models.decoderRNN import DecoderRNN
from models.seq2seq import Seq2seq
from loss.loss import Perplexity
from dataset import fields
from evaluator.evaluator_unmatching import Evaluator

import matplotlib
import matplotlib.pyplot as plt



# Log format

In [2]:
log_level = 'info'
LOG_FORMAT = '%(asctime)s %(levelname)-6s %(message)s'
logging.basicConfig(format=LOG_FORMAT, level=getattr(logging, log_level.upper()))

# Variable definition

In [3]:
hidden_size = 200
f1_score_lists = []
ctypes = ["single_Ctype4", "last_separator_Ctype4", "single_Ctype2_concat", "separator_Ctype4"]
bidirectional = False
print(ctypes)

['single_Ctype4', 'last_separator_Ctype4', 'single_Ctype2_concat', 'separator_Ctype4']


In [None]:
for i in ctypes:
    f1_score_list = []
    ctype = i
    # Data load
    train_path = "../../data/palindrome_rand/correction_" + ctype + "/data_train.txt"
    
    # Prepare dataset
    src = fields.SourceField()
    tgt = fields.TargetField()
    max_len = 104
    def len_filter(example):
        return len(example.src) <= max_len and len(example.tgt) <= max_len
    train = torchtext.data.TabularDataset(
        path=train_path, format='tsv',
        fields=[('src', src), ('tgt', tgt)],
        filter_pred=len_filter
    )
    src.build_vocab(train)
    tgt.build_vocab(train)
    input_vocab = src.vocab
    output_vocab = tgt.vocab
    
    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()
    
    # Model
    evaluator = Evaluator(loss=loss, batch_size=32)
    hidden_size
    print("Character type is : %s" % ctype)
    seq2seq = None
    encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                         bidirectional=bidirectional, variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size,
                         dropout_p=0.2, use_attention="Luong", bidirectional=bidirectional,
                         eos_id=tgt.eos_id, sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)
    
    lengths = list(range(4, 101, 2))
    print(lengths)
    for length in lengths:
        log_path = "../../log/pth/train_palindrome_rand_correction_ctype4_" + ctype + "_model_save.pth"
        seq2seq.load_state_dict(torch.load(log_path))
        seq2seq.eval()
        dev_path = "../../data/palindrome_rand/correction_" + ctype + "/dev_length/data_test_length_" + str(length) + ".txt"
        dev = torchtext.data.TabularDataset(
              path=dev_path, format='tsv',
              fields=[('src', src), ('tgt', tgt)],
              filter_pred=len_filter)
        dev_loss, _, _, f1_score = evaluator.evaluate(seq2seq, dev)
        print("Length:%d, Dev Loss:%0.4f, F1 Score:%0.4f\n"
                    % (length, dev_loss, f1_score))
        check_path = "../../log/check_point/palindrome_rand_correction_length_to_f1_score/" + ctype
        with open(check_path, 'a') as f:
            f.write("Length:%d, Dev Loss:%0.4f, F1 Score:%0.4f\n"
                    % (length, dev_loss, f1_score))
        
        f1_score_list.append(f1_score)
        
    f1_score_lists.append(f1_score_list)



Character type is : single_Ctype4
[4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100]
Length:4, Dev Loss:1.0001, F1 Score:1.0000

Length:6, Dev Loss:1.0000, F1 Score:1.0000



  "num_layers={}".format(dropout, num_layers))


Length:8, Dev Loss:1.0000, F1 Score:1.0000

Length:10, Dev Loss:1.0000, F1 Score:1.0000

Length:12, Dev Loss:1.0000, F1 Score:1.0000

Length:14, Dev Loss:1.0014, F1 Score:0.9983

Length:16, Dev Loss:1.0031, F1 Score:0.9936

Length:18, Dev Loss:1.0082, F1 Score:0.9837

Length:20, Dev Loss:1.0117, F1 Score:0.9760

Length:22, Dev Loss:1.0133, F1 Score:0.9552

Length:24, Dev Loss:1.0101, F1 Score:0.9532

Length:26, Dev Loss:1.0153, F1 Score:0.9249

Length:28, Dev Loss:1.0159, F1 Score:0.9031

Length:30, Dev Loss:1.0168, F1 Score:0.8793

Length:32, Dev Loss:1.0243, F1 Score:0.8197

Length:34, Dev Loss:1.0167, F1 Score:0.8480

Length:36, Dev Loss:1.0208, F1 Score:0.8074

Length:38, Dev Loss:1.0179, F1 Score:0.8192

Length:40, Dev Loss:1.0188, F1 Score:0.7882

Length:42, Dev Loss:1.0194, F1 Score:0.7631

Length:44, Dev Loss:1.0211, F1 Score:0.7242

Length:46, Dev Loss:1.0224, F1 Score:0.7279

Length:48, Dev Loss:1.0261, F1 Score:0.6567

Length:50, Dev Loss:1.0263, F1 Score:0.6707

Length:52, 

Length:48, Dev Loss:1.0455, F1 Score:0.7081

Length:50, Dev Loss:1.0453, F1 Score:0.7115

Length:52, Dev Loss:1.0418, F1 Score:0.7204

Length:54, Dev Loss:1.0437, F1 Score:0.7076

Length:56, Dev Loss:1.0436, F1 Score:0.7194

Length:58, Dev Loss:1.0531, F1 Score:0.6652

Length:60, Dev Loss:1.0466, F1 Score:0.6658

Length:62, Dev Loss:1.0448, F1 Score:0.6794

Length:64, Dev Loss:1.0461, F1 Score:0.6784



In [None]:
plt.figure(figsize=(15,7))
plt.plot(lengths, f1_score_lists[0], '-', LineWidth=3, label="Single")
plt.plot(lengths, f1_score_lists[1], '-o', LineWidth=3, label="Multiple(Last Separator)")
plt.plot(lengths, f1_score_lists[2], '-s', LineWidth=3, label="Multiple(Single Concat)")
plt.plot(lengths, f1_score_lists[3], '-x', LineWidth=3, label="Multiple(Separator)")
plt.legend(loc="best", fontsize=12)
plt.xlabel('Length', fontsize=24)
plt.ylabel('F1 Score', fontsize=24)
plt.savefig('../../log/plot/palindrome_rand_correction_length_to_f1_score/palindrome_rand_correction_length_to_f1_score.png')

In [None]:
print(f1_score_lists[0])
print(f1_score_lists[1])
print(f1_score_lists[2])
print(f1_score_lists[3])