In [1]:
from model import *
from utils import *
from Data import *
from train import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
train_data = np.squeeze(pd.read_csv('train.txt', header=None))
train_data = split_data(train_data)
y_train = y_train_make(len(train_data))

train_loader = DataTransformer(train_data, y_train, use_cuda=True)

encoder = Encoder(vocab_size=train_loader.vocab_size,
                         embedding_size=256,
                         output_size=256,
                         lat_dim=32).to(device)

decoder = Decoder(hidden_size=256,
                         output_size=train_loader.vocab_size,
                         lat_dim=32,
                         max_length=train_loader.max_length,
                         teacher_forcing_ratio=1.,
                         sos_id=train_loader.SOS_ID,
                         use_cuda=True).to(device)

seq2seq = Seq2Seq(encoder=encoder,
                  decoder=decoder)

In [3]:
test_data = np.squeeze(pd.read_csv('test.txt', header=None))
test_data = split_data(test_data)
test_data = np.array(test_data)
src, trg = src_trg_split(test_data)
test_src = []
test_trg = []

for word in src:
    test_src.append(train_loader.vocab.sequence_to_indices(word, add_eos=True))

for word in trg:
    test_trg.append(train_loader.vocab.sequence_to_indices(word, add_eos=True))
"""
sp -> p
sp -> pg
sp -> tp
sp -> tp
p  -> tp
sp -> pg
p  -> sp
pg -> sp
pg -> p
pg -> tp
"""

sp = 0
tp = 1
pg = 2
p = 3
test_c_src = np.array([sp, sp, sp, sp, p, sp, p, pg, pg, pg]).reshape(-1, 1)
test_c_trg = np.array([p, pg, tp, tp, tp, pg, sp, sp, p, tp]).reshape(-1, 1)
test_c_src = Variable(torch.LongTensor(to_one_hot(test_c_src))).to(device)
test_c_trg = Variable(torch.LongTensor(to_one_hot(test_c_trg))).to(device)

In [24]:
seq2seq.load_state_dict(torch.load("Lab4_seq2seq_vae_lstm_KL_cond_demo1.pt", map_location=device))
trainer = Trainer(seq2seq, train_loader, y_train, learning_rate=0.001, use_cuda=True)
total_score = 0.0
for i in range(len(test_src)):
    word = train_loader.vocab.indices_to_sequence(test_src[i])
    trg_true = train_loader.vocab.indices_to_sequence(test_trg[i])
    results = trainer.evaluate(word, test_c_src[i].view(1, -1), test_c_trg[i].view(1, -1))[0]
    score = trainer.compute_bleu(results, trg_true)
    print("Src_true: {:>12}".format(word), "\tTrg_true:{:>12}".format(trg_true), "\tPredict: {:>12}".format(results), "\tScore: {:>8.5f}".format(score))
    total_score += score
total_score /= len(test_src)
print("Total score:", total_score)

Src_true:      abandon 	Trg_true:   abandoned 	Predict:      abandon 	Score:  0.75148
Src_true:         abet 	Trg_true:    abetting 	Predict:      abeting 	Score:  0.68940
Src_true:        begin 	Trg_true:      begins 	Predict:       begins 	Score:  1.00000
Src_true:       expend 	Trg_true:     expends 	Predict:      expends 	Score:  1.00000
Src_true:         sent 	Trg_true:       sends 	Predict:        sents 	Score:  0.28574
Src_true:        split 	Trg_true:   splitting 	Predict:     spitting 	Score:  0.67529
Src_true:       flared 	Trg_true:       flare 	Predict:       flared 	Score:  0.75984
Src_true:  functioning 	Trg_true:    function 	Predict:     function 	Score:  1.00000
Src_true:  functioning 	Trg_true:  functioned 	Predict:      fundint 	Score:  0.13162
Src_true:      healing 	Trg_true:       heals 	Predict:        heals 	Score:  1.00000
Total score: 0.7293375734205133


In [5]:
seq2seq.load_state_dict(torch.load("Lab4_seq2seq_vae_lstm_KL_cond_demo2.pt", map_location=device))
trainer = Trainer(seq2seq, train_loader, y_train, learning_rate=0.001, use_cuda=True)
def reparaterization_trick(mean, logv):
        std = torch.exp(0.5*logv)
        eps = torch.randn_like(std)
        return  mean + eps * std
    
def Gaussian_score(words):
    words_list = []
    score = 0
    yourpath = './train.txt'#should be your directory of train.txt
    with open(yourpath,'r') as fp:
        for line in fp:
            word = line.split(' ')
            word[3] = word[3].strip('\n')
            words_list.extend([word])
        for t in words:
            for i in words_list:
                if t == i:
                    score += 1
    return score/len(words)

    
label = torch.LongTensor([[1, 0, 0, 0],
                       [0, 1, 0, 0],
                       [0, 0, 1, 0],
                       [0, 0, 0, 1]]).to(device)

words = []
for i in range(100):
    hidden_mean = torch.randn([1, 1, 32]).to(device)
    hidden_logv = torch.randn([1, 1, 32]).to(device)
    cell_mean = torch.randn([1, 1, 32]).to(device)
    cell_logv = torch.randn([1, 1, 32]).to(device)
    
    encoder_hidden = reparaterization_trick(hidden_mean, hidden_logv)
    encoder_hidden = decoder.latent2hidden(encoder_hidden)
    encoder_cell = reparaterization_trick(cell_mean, cell_logv)
    encoder_cell = decoder.latent2hidden(encoder_cell)
    
    tmp = []
    for i in range(4):
        hidden = torch.cat([encoder_hidden, label[i].view(1, 1, 4)], dim=2)
        cell = torch.cat([encoder_cell, label[i].view(1, 1, 4)], dim=2)
        decoded_indices = decoder.evaluate(context_vector=hidden, decoder_cell=cell)
        results = []
        for indices in decoded_indices:
            results.append(train_loader.vocab.indices_to_sequence(indices))
        tmp.append(results[0])
    words.append(tmp)
print(words)
print("Total Gaussian score:", Gaussian_score(words))

[['soften', 'suoveys', 'suoving', 'suoved'], ['servey', 'seffers', 'seffying', 'serveyed'], ['compate', 'compates', 'compeating', 'compeated'], ['remain', 'remains', 'remailing', 'remained'], ['soften', 'softens', 'surving', 'softened'], ['senge', 'sends', 'senting', 'sebled'], ['sunfor', 'sunfors', 'sundering', 'sunfored'], ['comber', 'combers', 'compating', 'combered'], ['compane', 'companes', 'companing', 'siffened'], ['meate', 'meates', 'meating', 'meated'], ['stack', 'suvers', 'suvering', 'suvered'], ['siffen', 'suvides', 'suviding', 'suvided'], ['compate', 'compates', 'compating', 'compated'], ['suver', 'suvers', 'suvering', 'suver'], ['meare', 'meares', 'meating', 'meased'], ['comber', 'combers', 'compating', 'combered'], ['compress', 'compresses', 'compressing', 'compressed'], ['compete', 'competes', 'comperting', 'compented'], ['subvise', 'subles', 'subles', 'subled'], ['meature', 'meacks', 'meating', 'meatured'], ['compate', 'compates', 'compating', 'compated'], ['compete', '