In [1]:
from fastNLP import DataSet
import string
import torch
import pickle
import numpy as np



In [2]:
def nli_preprocessing(path):
    ds = DataSet.read_csv(path, sep='\t')
    punct_table = str.maketrans({key: ' ' for key in string.punctuation})
    ds.apply(lambda x: x['sentence1'].lower().translate(punct_table).strip(), new_field_name='premise')
    ds.apply(lambda x: x['sentence2'].lower().translate(punct_table).strip(), new_field_name='hypothesis')
    ds.drop(lambda x: x['gold_label'] == '-')
    def labeling(ins):
        label_str = ins['gold_label'].strip()
        if label_str == 'entailment':
            return 0
        elif label_str == 'neutral':
            return 1
        elif label_str == 'contradiction':
            return 2
        else:
            return -1
    ds.apply(labeling, new_field_name='label', is_target=True)
    ds.apply(lambda x: x['premise'].split(), new_field_name='premise_words')
    ds.apply(lambda x: x['hypothesis'].split(), new_field_name='hypothesis_words')
    delete_labels = ['sentence1_binary_parse','sentence2_binary_parse',
                               'sentence1_parse','sentence2_parse','sentence1', 'sentence2', 'promptID','pairID',
                               'label1','label2','label3','label4','label5']
    for name in delete_labels:
        ds.delete_field(name)
    return ds

In [3]:
train_path = '/home/lyhe/Fudan-Deep_Learning/ESIM/test/multi_data/multinli_1.0/multinli_1.0_train.txt'
ds_train = nli_preprocessing(train_path)
with open('multinli_ds_train.pkl', 'wb') as file:
    pickle.dump(ds_train, file)
# with open('multinli_ds_train.pkl', 'rb') as file:
#     ds_train = pickle.load(file)

In [4]:
dev_path = '/home/lyhe/Fudan-Deep_Learning/ESIM/test/multi_data/multinli_1.0/multinli_1.0_dev_matched.txt'
ds_dev = nli_preprocessing(dev_path)
with open('multinli_ds_dev.pkl', 'wb') as file:
    pickle.dump(ds_dev, file)
# with open('multinli_ds_dev.pkl', 'rb') as file:
#     ds_dev = pickle.load(file)

In [22]:
dev_mis_path = '/home/lyhe/Fudan-Deep_Learning/ESIM/test/multi_data/multinli_1.0/multinli_1.0_dev_mismatched.txt'
ds_dev_mis = nli_preprocessing(dev_mis_path)
with open('multinli_ds_dev_mis.pkl', 'wb') as file:
    pickle.dump(ds_dev_mis, file)

In [5]:
from fastNLP import Vocabulary

In [6]:
vocab = Vocabulary(min_freq=1)
ds_train.apply(lambda x: [vocab.add(word) for word in x['premise_words']+x['hypothesis_words']])
print(len(vocab))
with open('multinli_vocab.pkl', 'wb') as file:
     pickle.dump(vocab, file)
# with open('multinli_vocab.pkl', 'rb') as file:
#     vocab = pickle.load(file)

70965


In [8]:
with open('emb_matrix.pkl', 'rb') as file:
    emb_matrix= pickle.load(file)

In [None]:
ds_train.apply(lambda x: [vocab.to_index(word) for word in x['premise_words']], 
                 new_field_name='p_seq', is_input=True)
ds_train.apply(lambda x: [vocab.to_index(word) for word in x['hypothesis_words']], 
                 new_field_name='h_seq', is_input=True)
ds_dev.apply(lambda x: [vocab.to_index(word) for word in x['premise_words']], 
                 new_field_name='p_seq', is_input=True)
ds_dev.apply(lambda x: [vocab.to_index(word) for word in x['hypothesis_words']], 
                 new_field_name='h_seq', is_input=True)
ds_dev_mis.apply(lambda x: [vocab.to_index(word) for word in x['premise_words']], 
                 new_field_name='p_seq', is_input=True)
ds_dev_mis.apply(lambda x: [vocab.to_index(word) for word in x['hypothesis_words']], 
                 new_field_name='h_seq', is_input=True)

In [10]:
with open('multinli_ds_train_new.pkl', 'wb') as file:
    pickle.dump(ds_train, file)
with open('multinli_ds_dev_new.pkl', 'wb') as file:
    pickle.dump(ds_dev, file)
with open('multinli_ds_dev_mis_new.pkl', 'wb') as file:
    pickle.dump(ds_dev, file)
# with open('multinli_ds_train_new.pkl', 'rb') as file:
#     ds_train= pickle.load(file)
# with open('multinli_ds_dev_new.pkl', 'rb') as file:
#     ds_dev = puckle.load(file)

In [11]:
import model

In [12]:
multisnli_model = model.ESIM(hidden_size = 300, embeds_dim = 300, linear_size = 300, num_word = len(vocab))
multisnli_model.load_pretrained_glove(emb_matrix)

In [28]:
multisnli_model_no_emb = model.ESIM(hidden_size = 300, embeds_dim = 300, linear_size = 300, num_word = len(vocab))

In [13]:
from fastNLP import CrossEntropyLoss
from fastNLP import AccuracyMetric
from fastNLP import Trainer
from fastNLP.core.optimizer import Adam

In [35]:
trainer = Trainer(model=multisnli_model,
                  train_data=ds_train,
                  dev_data=ds_dev,
                  loss=CrossEntropyLoss(pred="pred", target="label"),
                  metrics=AccuracyMetric(),
                  optimizer=Adam(lr=0.0001, weight_decay=0), 
                  n_epochs=10,
                  use_cuda=True,
                  use_tqdm=False)
trainer.train()

input fields after batch(if batch size is 2):
	p_seq: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 62]) 
	h_seq: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 12]) 
target fields after batch(if batch size is 2):
	label: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 

training epochs started 2019-01-17 03-08-16
[epoch:   1 step:   49] train loss: 32.4344 time: 0:00:05
[epoch:   1 step:   99] train loss: 33.1824 time: 0:00:10
[epoch:   1 step:  149] train loss: 32.1184 time: 0:00:15
[epoch:   1 step:  199] train loss: 32.1006 time: 0:00:20
[epoch:   1 step:  249] train loss: 32.6341 time: 0:00:25
[epoch:   1 step:  299] train loss: 33.0862 time: 0:00:30
[epoch:   1 step:  349] train loss: 32.2381 time: 0:00:35
[epoch:   1 step:  399] train loss: 32.3593 time: 0:00:40
[epoch:   1 step:  449] train loss: 33.3149 time: 0:00:46
[epoch:   1 step:  499] train loss: 31.3887 time: 0:00:51
[epoch:   1 step:  549] train loss: 32.2165

[epoch:   1 step: 7699] train loss: 32.703 time: 0:13:56
[epoch:   1 step: 7749] train loss: 32.4989 time: 0:14:01
[epoch:   1 step: 7799] train loss: 32.6829 time: 0:14:06
[epoch:   1 step: 7849] train loss: 32.2902 time: 0:14:12
[epoch:   1 step: 7899] train loss: 33.0365 time: 0:14:18
[epoch:   1 step: 7949] train loss: 32.9664 time: 0:14:23
[epoch:   1 step: 7999] train loss: 32.5759 time: 0:14:29
[epoch:   1 step: 8049] train loss: 32.2735 time: 0:14:34
[epoch:   1 step: 8099] train loss: 32.5294 time: 0:14:40
[epoch:   1 step: 8149] train loss: 33.3235 time: 0:14:46
[epoch:   1 step: 8199] train loss: 32.9331 time: 0:14:51
[epoch:   1 step: 8249] train loss: 32.5227 time: 0:14:57
[epoch:   1 step: 8299] train loss: 32.4178 time: 0:15:02
[epoch:   1 step: 8349] train loss: 33.4999 time: 0:15:08
[epoch:   1 step: 8399] train loss: 32.4291 time: 0:15:14
[epoch:   1 step: 8449] train loss: 33.0608 time: 0:15:19
[epoch:   1 step: 8499] train loss: 32.8927 time: 0:15:25
[epoch:   1 ste

[epoch:   2 step: 14649] train loss: 32.4546 time: 0:26:55
[epoch:   2 step: 14699] train loss: 32.2487 time: 0:27:00
[epoch:   2 step: 14749] train loss: 32.1457 time: 0:27:05
[epoch:   2 step: 14799] train loss: 31.9668 time: 0:27:10
[epoch:   2 step: 14849] train loss: 32.1305 time: 0:27:16
[epoch:   2 step: 14899] train loss: 32.3353 time: 0:27:21
[epoch:   2 step: 14949] train loss: 32.5325 time: 0:27:26
[epoch:   2 step: 14999] train loss: 32.5564 time: 0:27:32
[epoch:   2 step: 15049] train loss: 33.0037 time: 0:27:36
[epoch:   2 step: 15099] train loss: 32.637 time: 0:27:42
[epoch:   2 step: 15149] train loss: 31.5779 time: 0:27:47
[epoch:   2 step: 15199] train loss: 32.5032 time: 0:27:52
[epoch:   2 step: 15249] train loss: 32.7869 time: 0:27:57
[epoch:   2 step: 15299] train loss: 31.7581 time: 0:28:02
[epoch:   2 step: 15349] train loss: 32.1811 time: 0:28:07
[epoch:   2 step: 15399] train loss: 32.711 time: 0:28:12
[epoch:   2 step: 15449] train loss: 32.61 time: 0:28:17
[

[epoch:   2 step: 21649] train loss: 33.1502 time: 0:39:15
[epoch:   2 step: 21699] train loss: 32.521 time: 0:39:20
[epoch:   2 step: 21749] train loss: 32.5349 time: 0:39:26
[epoch:   2 step: 21799] train loss: 32.62 time: 0:39:31
[epoch:   2 step: 21849] train loss: 32.9576 time: 0:39:37
[epoch:   2 step: 21899] train loss: 32.5024 time: 0:39:43
[epoch:   2 step: 21949] train loss: 32.5066 time: 0:39:48
[epoch:   2 step: 21999] train loss: 32.5226 time: 0:39:53
[epoch:   2 step: 22049] train loss: 33.291 time: 0:39:59
[epoch:   2 step: 22099] train loss: 32.8653 time: 0:40:05
[epoch:   2 step: 22149] train loss: 32.3627 time: 0:40:11
[epoch:   2 step: 22199] train loss: 32.7769 time: 0:40:16
[epoch:   2 step: 22249] train loss: 32.7135 time: 0:40:22
[epoch:   2 step: 22299] train loss: 33.1222 time: 0:40:28
[epoch:   2 step: 22349] train loss: 33.0001 time: 0:40:33
[epoch:   2 step: 22399] train loss: 32.9912 time: 0:40:39
[epoch:   2 step: 22449] train loss: 32.3438 time: 0:40:44
[

[epoch:   3 step: 28549] train loss: 32.9715 time: 0:52:10
[epoch:   3 step: 28599] train loss: 32.5836 time: 0:52:15
[epoch:   3 step: 28649] train loss: 32.5328 time: 0:52:21
[epoch:   3 step: 28699] train loss: 32.3409 time: 0:52:26
[epoch:   3 step: 28749] train loss: 33.0848 time: 0:52:32
[epoch:   3 step: 28799] train loss: 32.472 time: 0:52:37
[epoch:   3 step: 28849] train loss: 32.4217 time: 0:52:43
[epoch:   3 step: 28899] train loss: 33.0413 time: 0:52:48
[epoch:   3 step: 28949] train loss: 32.2383 time: 0:52:54
[epoch:   3 step: 28999] train loss: 32.131 time: 0:53:00
[epoch:   3 step: 29049] train loss: 32.4604 time: 0:53:05
[epoch:   3 step: 29099] train loss: 32.8362 time: 0:53:11
[epoch:   3 step: 29149] train loss: 32.021 time: 0:53:16
[epoch:   3 step: 29199] train loss: 32.5453 time: 0:53:22
[epoch:   3 step: 29249] train loss: 32.4375 time: 0:53:27
[epoch:   3 step: 29299] train loss: 32.3254 time: 0:53:33
[epoch:   3 step: 29349] train loss: 32.0145 time: 0:53:38


[epoch:   3 step: 35549] train loss: 32.8522 time: 1:04:33
[epoch:   3 step: 35599] train loss: 32.3255 time: 1:04:38
[epoch:   3 step: 35649] train loss: 32.3198 time: 1:04:43
[epoch:   3 step: 35699] train loss: 32.8923 time: 1:04:48
[epoch:   3 step: 35749] train loss: 32.5537 time: 1:04:53
[epoch:   3 step: 35799] train loss: 32.6339 time: 1:04:59
[epoch:   3 step: 35849] train loss: 32.659 time: 1:05:04
[epoch:   3 step: 35899] train loss: 32.3858 time: 1:05:09
[epoch:   3 step: 35949] train loss: 32.3999 time: 1:05:14
[epoch:   3 step: 35999] train loss: 33.1354 time: 1:05:19
[epoch:   3 step: 36049] train loss: 32.323 time: 1:05:24
[epoch:   3 step: 36099] train loss: 33.2735 time: 1:05:29
[epoch:   3 step: 36149] train loss: 32.4558 time: 1:05:34
[epoch:   3 step: 36199] train loss: 32.5483 time: 1:05:39
[epoch:   3 step: 36249] train loss: 32.6204 time: 1:05:43
[epoch:   3 step: 36299] train loss: 32.6894 time: 1:05:48
[epoch:   3 step: 36349] train loss: 32.2238 time: 1:05:54

[epoch:   4 step: 42449] train loss: 32.8823 time: 1:17:22
[epoch:   4 step: 42499] train loss: 32.9307 time: 1:17:27
[epoch:   4 step: 42549] train loss: 32.2791 time: 1:17:32
[epoch:   4 step: 42599] train loss: 32.5873 time: 1:17:38
[epoch:   4 step: 42649] train loss: 32.8329 time: 1:17:43
[epoch:   4 step: 42699] train loss: 31.9489 time: 1:17:49
[epoch:   4 step: 42749] train loss: 31.9542 time: 1:17:54
[epoch:   4 step: 42799] train loss: 32.6582 time: 1:18:00
[epoch:   4 step: 42849] train loss: 32.0632 time: 1:18:05
[epoch:   4 step: 42899] train loss: 32.5245 time: 1:18:10
[epoch:   4 step: 42949] train loss: 32.5909 time: 1:18:16
[epoch:   4 step: 42999] train loss: 33.1537 time: 1:18:22
[epoch:   4 step: 43049] train loss: 32.3267 time: 1:18:27
[epoch:   4 step: 43099] train loss: 32.6004 time: 1:18:32
[epoch:   4 step: 43149] train loss: 32.3303 time: 1:18:38
[epoch:   4 step: 43199] train loss: 32.4717 time: 1:18:44
[epoch:   4 step: 43249] train loss: 32.9075 time: 1:18:

[epoch:   5 step: 49349] train loss: 32.0132 time: 1:30:19
[epoch:   5 step: 49399] train loss: 31.9186 time: 1:30:24
[epoch:   5 step: 49449] train loss: 32.8381 time: 1:30:30
[epoch:   5 step: 49499] train loss: 32.3078 time: 1:30:35
[epoch:   5 step: 49549] train loss: 31.7748 time: 1:30:40
[epoch:   5 step: 49599] train loss: 31.9522 time: 1:30:46
[epoch:   5 step: 49649] train loss: 32.4604 time: 1:30:51
[epoch:   5 step: 49699] train loss: 32.1676 time: 1:30:55
[epoch:   5 step: 49749] train loss: 32.104 time: 1:31:00
[epoch:   5 step: 49799] train loss: 32.4079 time: 1:31:05
[epoch:   5 step: 49849] train loss: 33.0637 time: 1:31:10
[epoch:   5 step: 49899] train loss: 32.4427 time: 1:31:15
[epoch:   5 step: 49949] train loss: 32.8993 time: 1:31:20
[epoch:   5 step: 49999] train loss: 31.7224 time: 1:31:26
[epoch:   5 step: 50049] train loss: 32.6218 time: 1:31:31
[epoch:   5 step: 50099] train loss: 32.4451 time: 1:31:36
[epoch:   5 step: 50149] train loss: 32.5234 time: 1:31:4

[epoch:   5 step: 56349] train loss: 32.8805 time: 1:42:36
[epoch:   5 step: 56399] train loss: 32.3064 time: 1:42:42
[epoch:   5 step: 56449] train loss: 31.7854 time: 1:42:47
[epoch:   5 step: 56499] train loss: 32.1806 time: 1:42:53
[epoch:   5 step: 56549] train loss: 32.5836 time: 1:42:59
[epoch:   5 step: 56599] train loss: 32.126 time: 1:43:04
[epoch:   5 step: 56649] train loss: 32.2738 time: 1:43:09
[epoch:   5 step: 56699] train loss: 31.726 time: 1:43:15
[epoch:   5 step: 56749] train loss: 32.4898 time: 1:43:20
[epoch:   5 step: 56799] train loss: 31.8793 time: 1:43:26
[epoch:   5 step: 56849] train loss: 32.3953 time: 1:43:31
[epoch:   5 step: 56899] train loss: 32.6831 time: 1:43:37
[epoch:   5 step: 56949] train loss: 32.5667 time: 1:43:42
[epoch:   5 step: 56999] train loss: 32.5522 time: 1:43:48
[epoch:   5 step: 57049] train loss: 32.2958 time: 1:43:53
[epoch:   5 step: 57099] train loss: 32.6873 time: 1:43:59
[epoch:   5 step: 57149] train loss: 32.3076 time: 1:44:05

[epoch:   6 step: 63249] train loss: 31.9837 time: 1:55:32
[epoch:   6 step: 63299] train loss: 32.0175 time: 1:55:38
[epoch:   6 step: 63349] train loss: 32.1792 time: 1:55:43
[epoch:   6 step: 63399] train loss: 31.9529 time: 1:55:49
[epoch:   6 step: 63449] train loss: 32.638 time: 1:55:54
[epoch:   6 step: 63499] train loss: 31.9917 time: 1:55:59
[epoch:   6 step: 63549] train loss: 32.6083 time: 1:56:05
[epoch:   6 step: 63599] train loss: 32.1019 time: 1:56:10
[epoch:   6 step: 63649] train loss: 32.7298 time: 1:56:15
[epoch:   6 step: 63699] train loss: 31.9395 time: 1:56:21
[epoch:   6 step: 63749] train loss: 31.8264 time: 1:56:26
[epoch:   6 step: 63799] train loss: 32.4777 time: 1:56:33
[epoch:   6 step: 63849] train loss: 32.8056 time: 1:56:38
[epoch:   6 step: 63899] train loss: 32.5339 time: 1:56:44
[epoch:   6 step: 63949] train loss: 32.5727 time: 1:56:49
[epoch:   6 step: 63999] train loss: 31.9923 time: 1:56:55
[epoch:   6 step: 64049] train loss: 33.1525 time: 1:57:0

[epoch:   6 step: 70249] train loss: 32.0788 time: 2:07:58
[epoch:   6 step: 70299] train loss: 32.7064 time: 2:08:03
[epoch:   6 step: 70349] train loss: 32.5606 time: 2:08:07
[epoch:   6 step: 70399] train loss: 32.6705 time: 2:08:12
[epoch:   6 step: 70449] train loss: 32.4351 time: 2:08:17
[epoch:   6 step: 70499] train loss: 31.913 time: 2:08:22
[epoch:   6 step: 70549] train loss: 32.1736 time: 2:08:26
[epoch:   6 step: 70599] train loss: 32.2161 time: 2:08:32
[epoch:   6 step: 70649] train loss: 32.6818 time: 2:08:37
[epoch:   6 step: 70699] train loss: 33.068 time: 2:08:42
[epoch:   6 step: 70749] train loss: 32.3976 time: 2:08:47
[epoch:   6 step: 70799] train loss: 32.5128 time: 2:08:53
[epoch:   6 step: 70849] train loss: 32.2289 time: 2:08:57
[epoch:   6 step: 70899] train loss: 32.1063 time: 2:09:03
[epoch:   6 step: 70949] train loss: 32.0384 time: 2:09:08
[epoch:   6 step: 70999] train loss: 32.2967 time: 2:09:13
[epoch:   6 step: 71049] train loss: 32.798 time: 2:09:18


[epoch:   7 step: 77149] train loss: 32.3321 time: 2:18:43
[epoch:   7 step: 77199] train loss: 32.1223 time: 2:18:47
[epoch:   7 step: 77249] train loss: 32.6001 time: 2:18:51
[epoch:   7 step: 77299] train loss: 31.3315 time: 2:18:54
[epoch:   7 step: 77349] train loss: 32.8071 time: 2:18:58
[epoch:   7 step: 77399] train loss: 32.1226 time: 2:19:01
[epoch:   7 step: 77449] train loss: 32.8857 time: 2:19:05
[epoch:   7 step: 77499] train loss: 32.7244 time: 2:19:09
[epoch:   7 step: 77549] train loss: 31.3897 time: 2:19:13
[epoch:   7 step: 77599] train loss: 32.1658 time: 2:19:17
[epoch:   7 step: 77649] train loss: 32.2093 time: 2:19:21
[epoch:   7 step: 77699] train loss: 32.0672 time: 2:19:24
[epoch:   7 step: 77749] train loss: 31.8781 time: 2:19:28
[epoch:   7 step: 77799] train loss: 32.2554 time: 2:19:32
[epoch:   7 step: 77849] train loss: 32.0529 time: 2:19:36
[epoch:   7 step: 77899] train loss: 31.8817 time: 2:19:40
[epoch:   7 step: 77949] train loss: 31.9402 time: 2:19:

[epoch:   7 step: 84149] train loss: 32.1332 time: 2:27:30
[epoch:   7 step: 84199] train loss: 32.3995 time: 2:27:33
[epoch:   7 step: 84249] train loss: 31.8115 time: 2:27:37
[epoch:   7 step: 84299] train loss: 32.0891 time: 2:27:41
[epoch:   7 step: 84349] train loss: 32.823 time: 2:27:45
[epoch:   7 step: 84399] train loss: 32.3295 time: 2:27:48
[epoch:   7 step: 84449] train loss: 31.8923 time: 2:27:52
[epoch:   7 step: 84499] train loss: 32.365 time: 2:27:56
[epoch:   7 step: 84549] train loss: 32.6742 time: 2:27:59
[epoch:   7 step: 84599] train loss: 32.4155 time: 2:28:03
[epoch:   7 step: 84649] train loss: 32.6628 time: 2:28:07
[epoch:   7 step: 84699] train loss: 31.803 time: 2:28:11
[epoch:   7 step: 84749] train loss: 32.4763 time: 2:28:14
[epoch:   7 step: 84799] train loss: 31.7575 time: 2:28:18
[epoch:   7 step: 84849] train loss: 32.0284 time: 2:28:22
[epoch:   7 step: 84899] train loss: 32.6007 time: 2:28:25
[epoch:   7 step: 84949] train loss: 32.5985 time: 2:28:29


[epoch:   8 step: 91049] train loss: 31.663 time: 2:35:24
[epoch:   8 step: 91099] train loss: 32.2112 time: 2:35:28
[epoch:   8 step: 91149] train loss: 32.3134 time: 2:35:32
[epoch:   8 step: 91199] train loss: 32.7264 time: 2:35:35
[epoch:   8 step: 91249] train loss: 32.486 time: 2:35:39
[epoch:   8 step: 91299] train loss: 32.612 time: 2:35:43
[epoch:   8 step: 91349] train loss: 32.3403 time: 2:35:47
[epoch:   8 step: 91399] train loss: 32.2114 time: 2:35:51
[epoch:   8 step: 91449] train loss: 32.2064 time: 2:35:54
[epoch:   8 step: 91499] train loss: 32.6971 time: 2:35:59
[epoch:   8 step: 91549] train loss: 32.4419 time: 2:36:02
[epoch:   8 step: 91599] train loss: 32.874 time: 2:36:06
[epoch:   8 step: 91649] train loss: 32.0437 time: 2:36:10
[epoch:   8 step: 91699] train loss: 32.3839 time: 2:36:14
[epoch:   8 step: 91749] train loss: 31.8771 time: 2:36:18
[epoch:   8 step: 91799] train loss: 32.2848 time: 2:36:22
[epoch:   8 step: 91849] train loss: 32.4908 time: 2:36:25
[

[epoch:   8 step: 98049] train loss: 32.8747 time: 2:44:11
[epoch:   8 step: 98099] train loss: 31.7245 time: 2:44:15
[epoch:   8 step: 98149] train loss: 32.3999 time: 2:44:19
Evaluation at Epoch 8/10. Step:98176/122720. AccuracyMetric: acc=0.684972
[epoch:   9 step: 98199] train loss: 32.2345 time: 2:44:30
[epoch:   9 step: 98249] train loss: 32.5278 time: 2:44:34
[epoch:   9 step: 98299] train loss: 32.3252 time: 2:44:38
[epoch:   9 step: 98349] train loss: 32.4359 time: 2:44:41
[epoch:   9 step: 98399] train loss: 32.0761 time: 2:44:45
[epoch:   9 step: 98449] train loss: 32.4211 time: 2:44:49
[epoch:   9 step: 98499] train loss: 32.3702 time: 2:44:52
[epoch:   9 step: 98549] train loss: 32.6345 time: 2:44:56
[epoch:   9 step: 98599] train loss: 31.8807 time: 2:45:00
[epoch:   9 step: 98649] train loss: 31.6801 time: 2:45:04
[epoch:   9 step: 98699] train loss: 32.5682 time: 2:45:08
[epoch:   9 step: 98749] train loss: 31.4328 time: 2:45:11
[epoch:   9 step: 98799] train loss: 32.1

[epoch:   9 step: 104899] train loss: 32.1509 time: 2:52:44
[epoch:   9 step: 104949] train loss: 32.7579 time: 2:52:47
[epoch:   9 step: 104999] train loss: 32.247 time: 2:52:51
[epoch:   9 step: 105049] train loss: 32.9224 time: 2:52:54
[epoch:   9 step: 105099] train loss: 32.4574 time: 2:52:57
[epoch:   9 step: 105149] train loss: 31.8976 time: 2:53:01
[epoch:   9 step: 105199] train loss: 32.6032 time: 2:53:04
[epoch:   9 step: 105249] train loss: 32.1426 time: 2:53:07
[epoch:   9 step: 105299] train loss: 32.0947 time: 2:53:10
[epoch:   9 step: 105349] train loss: 32.4205 time: 2:53:13
[epoch:   9 step: 105399] train loss: 31.8424 time: 2:53:17
[epoch:   9 step: 105449] train loss: 32.2406 time: 2:53:20
[epoch:   9 step: 105499] train loss: 32.7265 time: 2:53:23
[epoch:   9 step: 105549] train loss: 32.3896 time: 2:53:26
[epoch:   9 step: 105599] train loss: 31.9918 time: 2:53:29
[epoch:   9 step: 105649] train loss: 32.2053 time: 2:53:32
[epoch:   9 step: 105699] train loss: 32.

[epoch:  10 step: 111699] train loss: 31.9978 time: 3:00:43
[epoch:  10 step: 111749] train loss: 32.2665 time: 3:00:46
[epoch:  10 step: 111799] train loss: 32.4677 time: 3:00:50
[epoch:  10 step: 111849] train loss: 32.1056 time: 3:00:54
[epoch:  10 step: 111899] train loss: 32.1562 time: 3:00:58
[epoch:  10 step: 111949] train loss: 32.3231 time: 3:01:02
[epoch:  10 step: 111999] train loss: 32.0855 time: 3:01:05
[epoch:  10 step: 112049] train loss: 32.0729 time: 3:01:09
[epoch:  10 step: 112099] train loss: 32.2899 time: 3:01:13
[epoch:  10 step: 112149] train loss: 32.0684 time: 3:01:17
[epoch:  10 step: 112199] train loss: 31.5986 time: 3:01:20
[epoch:  10 step: 112249] train loss: 31.9636 time: 3:01:24
[epoch:  10 step: 112299] train loss: 31.9409 time: 3:01:28
[epoch:  10 step: 112349] train loss: 31.9675 time: 3:01:32
[epoch:  10 step: 112399] train loss: 31.7324 time: 3:01:36
[epoch:  10 step: 112449] train loss: 32.6697 time: 3:01:39
[epoch:  10 step: 112499] train loss: 32

[epoch:  10 step: 118549] train loss: 32.1583 time: 3:09:16
[epoch:  10 step: 118599] train loss: 32.2222 time: 3:09:20
[epoch:  10 step: 118649] train loss: 32.6165 time: 3:09:23
[epoch:  10 step: 118699] train loss: 32.4967 time: 3:09:27
[epoch:  10 step: 118749] train loss: 32.4347 time: 3:09:31
[epoch:  10 step: 118799] train loss: 32.4674 time: 3:09:35
[epoch:  10 step: 118849] train loss: 31.669 time: 3:09:38
[epoch:  10 step: 118899] train loss: 32.4449 time: 3:09:42
[epoch:  10 step: 118949] train loss: 32.8065 time: 3:09:46
[epoch:  10 step: 118999] train loss: 31.8555 time: 3:09:50
[epoch:  10 step: 119049] train loss: 32.0136 time: 3:09:53
[epoch:  10 step: 119099] train loss: 32.3032 time: 3:09:57
[epoch:  10 step: 119149] train loss: 32.3094 time: 3:10:01
[epoch:  10 step: 119199] train loss: 32.5394 time: 3:10:04
[epoch:  10 step: 119249] train loss: 32.2024 time: 3:10:08
[epoch:  10 step: 119299] train loss: 32.3247 time: 3:10:12
[epoch:  10 step: 119349] train loss: 32.

RuntimeError: Error(s) in loading state_dict for ESIM:
	Missing key(s) in state_dict: "bn_embeds.running_mean", "bn_embeds.running_var", "fc.0.running_mean", "fc.0.running_var", "fc.3.running_mean", "fc.3.running_var", "fc.7.running_mean", "fc.7.running_var". 

In [31]:
newtrainer = Trainer(model=multisnli_model_no_emb,
                  train_data=ds_train,
                  dev_data=ds_dev,
                  loss=CrossEntropyLoss(pred="pred", target="label"),
                  metrics=AccuracyMetric(),
                  optimizer=Adam(lr=0.0001, weight_decay=0), 
                  n_epochs=64,
                  use_cuda=True,
                  use_tqdm=False)
newtrainer.train()

input fields after batch(if batch size is 2):
	p_seq: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 62]) 
	h_seq: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 12]) 
target fields after batch(if batch size is 2):
	label: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 

training epochs started 2019-01-16 19-36-11
[epoch:   1 step:   49] train loss: 54.2324 time: 0:00:05
[epoch:   1 step:   99] train loss: 54.1121 time: 0:00:10
[epoch:   1 step:  149] train loss: 53.3246 time: 0:00:15
[epoch:   1 step:  199] train loss: 52.4784 time: 0:00:20
[epoch:   1 step:  249] train loss: 52.0618 time: 0:00:26
[epoch:   1 step:  299] train loss: 51.2121 time: 0:00:31
[epoch:   1 step:  349] train loss: 50.877 time: 0:00:36
[epoch:   1 step:  399] train loss: 49.7973 time: 0:00:41
[epoch:   1 step:  449] train loss: 49.8618 time: 0:00:46
[epoch:   1 step:  499] train loss: 49.548 time: 0:00:52
[epoch:   1 step:  549] train loss: 49.7396 t

[epoch:   1 step: 6799] train loss: 47.1444 time: 0:11:36
[epoch:   1 step: 6849] train loss: 45.2539 time: 0:11:42
[epoch:   1 step: 6899] train loss: 46.4565 time: 0:11:47
[epoch:   1 step: 6949] train loss: 45.1785 time: 0:11:52
[epoch:   1 step: 6999] train loss: 45.6723 time: 0:11:58
[epoch:   1 step: 7049] train loss: 44.6153 time: 0:12:04
[epoch:   1 step: 7099] train loss: 45.9192 time: 0:12:10
[epoch:   1 step: 7149] train loss: 44.6215 time: 0:12:16
[epoch:   1 step: 7199] train loss: 46.5246 time: 0:12:23
[epoch:   1 step: 7249] train loss: 46.466 time: 0:12:29
[epoch:   1 step: 7299] train loss: 46.4506 time: 0:12:35
[epoch:   1 step: 7349] train loss: 45.675 time: 0:12:41
[epoch:   1 step: 7399] train loss: 45.3096 time: 0:12:47
[epoch:   1 step: 7449] train loss: 46.2109 time: 0:12:54
[epoch:   1 step: 7499] train loss: 46.3164 time: 0:13:00
[epoch:   1 step: 7549] train loss: 45.2008 time: 0:13:06
[epoch:   1 step: 7599] train loss: 45.6012 time: 0:13:12
[epoch:   1 step

[epoch:   2 step: 13749] train loss: 43.7676 time: 0:26:05
[epoch:   2 step: 13799] train loss: 45.3805 time: 0:26:11
[epoch:   2 step: 13849] train loss: 44.2152 time: 0:26:17
[epoch:   2 step: 13899] train loss: 44.4942 time: 0:26:23
[epoch:   2 step: 13949] train loss: 43.4752 time: 0:26:29
[epoch:   2 step: 13999] train loss: 44.5762 time: 0:26:35
[epoch:   2 step: 14049] train loss: 44.802 time: 0:26:42
[epoch:   2 step: 14099] train loss: 44.6258 time: 0:26:48
[epoch:   2 step: 14149] train loss: 43.5564 time: 0:26:55
[epoch:   2 step: 14199] train loss: 45.2967 time: 0:27:01
[epoch:   2 step: 14249] train loss: 44.506 time: 0:27:07
[epoch:   2 step: 14299] train loss: 44.2137 time: 0:27:13
[epoch:   2 step: 14349] train loss: 43.5379 time: 0:27:19
[epoch:   2 step: 14399] train loss: 43.9998 time: 0:27:25
[epoch:   2 step: 14449] train loss: 45.1337 time: 0:27:31
[epoch:   2 step: 14499] train loss: 43.5821 time: 0:27:37
[epoch:   2 step: 14549] train loss: 44.688 time: 0:27:44


[epoch:   2 step: 20749] train loss: 44.7215 time: 0:38:58
[epoch:   2 step: 20799] train loss: 43.5532 time: 0:39:03
[epoch:   2 step: 20849] train loss: 44.163 time: 0:39:08
[epoch:   2 step: 20899] train loss: 43.4941 time: 0:39:13
[epoch:   2 step: 20949] train loss: 44.3623 time: 0:39:18
[epoch:   2 step: 20999] train loss: 45.1332 time: 0:39:23
[epoch:   2 step: 21049] train loss: 43.8074 time: 0:39:29
[epoch:   2 step: 21099] train loss: 44.0493 time: 0:39:34
[epoch:   2 step: 21149] train loss: 44.3574 time: 0:39:39
[epoch:   2 step: 21199] train loss: 43.7496 time: 0:39:44
[epoch:   2 step: 21249] train loss: 43.0583 time: 0:39:49
[epoch:   2 step: 21299] train loss: 44.2276 time: 0:39:55
[epoch:   2 step: 21349] train loss: 44.1169 time: 0:40:00
[epoch:   2 step: 21399] train loss: 43.2779 time: 0:40:05
[epoch:   2 step: 21449] train loss: 44.0078 time: 0:40:10
[epoch:   2 step: 21499] train loss: 44.5131 time: 0:40:15
[epoch:   2 step: 21549] train loss: 44.3279 time: 0:40:2

[epoch:   3 step: 27649] train loss: 43.4587 time: 0:49:00
[epoch:   3 step: 27699] train loss: 42.9534 time: 0:49:04
[epoch:   3 step: 27749] train loss: 42.7492 time: 0:49:07
[epoch:   3 step: 27799] train loss: 42.7888 time: 0:49:11
[epoch:   3 step: 27849] train loss: 43.4506 time: 0:49:15
[epoch:   3 step: 27899] train loss: 43.0298 time: 0:49:19
[epoch:   3 step: 27949] train loss: 42.739 time: 0:49:23
[epoch:   3 step: 27999] train loss: 42.7146 time: 0:49:26
[epoch:   3 step: 28049] train loss: 42.5091 time: 0:49:30
[epoch:   3 step: 28099] train loss: 43.8672 time: 0:49:34
[epoch:   3 step: 28149] train loss: 42.7908 time: 0:49:38
[epoch:   3 step: 28199] train loss: 43.6399 time: 0:49:41
[epoch:   3 step: 28249] train loss: 43.5863 time: 0:49:45
[epoch:   3 step: 28299] train loss: 42.3738 time: 0:49:49
[epoch:   3 step: 28349] train loss: 42.9907 time: 0:49:53
[epoch:   3 step: 28399] train loss: 42.9984 time: 0:49:57
[epoch:   3 step: 28449] train loss: 43.4054 time: 0:50:0

[epoch:   3 step: 34649] train loss: 43.161 time: 0:57:25
[epoch:   3 step: 34699] train loss: 43.3685 time: 0:57:28
[epoch:   3 step: 34749] train loss: 43.4536 time: 0:57:31
[epoch:   3 step: 34799] train loss: 41.8087 time: 0:57:34
[epoch:   3 step: 34849] train loss: 42.4115 time: 0:57:37
[epoch:   3 step: 34899] train loss: 42.7107 time: 0:57:41
[epoch:   3 step: 34949] train loss: 43.4296 time: 0:57:44
[epoch:   3 step: 34999] train loss: 43.4833 time: 0:57:47
[epoch:   3 step: 35049] train loss: 43.3964 time: 0:57:50
[epoch:   3 step: 35099] train loss: 44.2837 time: 0:57:54
[epoch:   3 step: 35149] train loss: 42.52 time: 0:57:57
[epoch:   3 step: 35199] train loss: 43.5595 time: 0:58:00
[epoch:   3 step: 35249] train loss: 44.1521 time: 0:58:03
[epoch:   3 step: 35299] train loss: 43.191 time: 0:58:06
[epoch:   3 step: 35349] train loss: 42.3602 time: 0:58:10
[epoch:   3 step: 35399] train loss: 43.4716 time: 0:58:14
[epoch:   3 step: 35449] train loss: 43.1986 time: 0:58:18
[

[epoch:   4 step: 41549] train loss: 41.9479 time: 1:10:41
[epoch:   4 step: 41599] train loss: 41.9062 time: 1:10:48
[epoch:   4 step: 41649] train loss: 41.7097 time: 1:10:54
[epoch:   4 step: 41699] train loss: 40.9704 time: 1:11:00
[epoch:   4 step: 41749] train loss: 42.9993 time: 1:11:06
[epoch:   4 step: 41799] train loss: 41.4466 time: 1:11:13
[epoch:   4 step: 41849] train loss: 41.8312 time: 1:11:19
[epoch:   4 step: 41899] train loss: 41.8153 time: 1:11:25
[epoch:   4 step: 41949] train loss: 42.2707 time: 1:11:31
[epoch:   4 step: 41999] train loss: 42.7148 time: 1:11:37
[epoch:   4 step: 42049] train loss: 42.7272 time: 1:11:43
[epoch:   4 step: 42099] train loss: 42.1449 time: 1:11:50
[epoch:   4 step: 42149] train loss: 42.8755 time: 1:11:56
[epoch:   4 step: 42199] train loss: 41.8924 time: 1:12:02
[epoch:   4 step: 42249] train loss: 41.8395 time: 1:12:08
[epoch:   4 step: 42299] train loss: 42.3706 time: 1:12:14
[epoch:   4 step: 42349] train loss: 41.7313 time: 1:12:

[epoch:   4 step: 48549] train loss: 42.1821 time: 1:23:47
[epoch:   4 step: 48599] train loss: 41.7799 time: 1:23:52
[epoch:   4 step: 48649] train loss: 42.0976 time: 1:23:58
[epoch:   4 step: 48699] train loss: 41.3939 time: 1:24:04
[epoch:   4 step: 48749] train loss: 41.8705 time: 1:24:09
[epoch:   4 step: 48799] train loss: 41.8191 time: 1:24:14
[epoch:   4 step: 48849] train loss: 42.2099 time: 1:24:20
[epoch:   4 step: 48899] train loss: 42.8247 time: 1:24:25
[epoch:   4 step: 48949] train loss: 41.8795 time: 1:24:31
[epoch:   4 step: 48999] train loss: 42.7211 time: 1:24:36
[epoch:   4 step: 49049] train loss: 42.988 time: 1:24:41
Evaluation at Epoch 4/64. Step:49088/785408. AccuracyMetric: acc=0.670301
[epoch:   5 step: 49099] train loss: 40.9355 time: 1:24:57
[epoch:   5 step: 49149] train loss: 41.0141 time: 1:25:02
[epoch:   5 step: 49199] train loss: 41.6076 time: 1:25:08
[epoch:   5 step: 49249] train loss: 40.8652 time: 1:25:13
[epoch:   5 step: 49299] train loss: 41.41

[epoch:   5 step: 55449] train loss: 40.9835 time: 1:35:22
[epoch:   5 step: 55499] train loss: 41.4322 time: 1:35:27
[epoch:   5 step: 55549] train loss: 40.8784 time: 1:35:33
[epoch:   5 step: 55599] train loss: 40.3211 time: 1:35:38
[epoch:   5 step: 55649] train loss: 41.1604 time: 1:35:44
[epoch:   5 step: 55699] train loss: 41.2086 time: 1:35:49
[epoch:   5 step: 55749] train loss: 40.511 time: 1:35:55
[epoch:   5 step: 55799] train loss: 40.8843 time: 1:36:00
[epoch:   5 step: 55849] train loss: 41.4322 time: 1:36:05
[epoch:   5 step: 55899] train loss: 40.8291 time: 1:36:11
[epoch:   5 step: 55949] train loss: 41.6531 time: 1:36:16
[epoch:   5 step: 55999] train loss: 40.4629 time: 1:36:22
[epoch:   5 step: 56049] train loss: 41.1039 time: 1:36:27
[epoch:   5 step: 56099] train loss: 41.3359 time: 1:36:33
[epoch:   5 step: 56149] train loss: 40.8365 time: 1:36:39
[epoch:   5 step: 56199] train loss: 41.9176 time: 1:36:44
[epoch:   5 step: 56249] train loss: 40.955 time: 1:36:49

[epoch:   6 step: 62349] train loss: 39.7076 time: 1:48:21
[epoch:   6 step: 62399] train loss: 39.6989 time: 1:48:27
[epoch:   6 step: 62449] train loss: 39.9126 time: 1:48:33
[epoch:   6 step: 62499] train loss: 39.2383 time: 1:48:38
[epoch:   6 step: 62549] train loss: 39.4263 time: 1:48:44
[epoch:   6 step: 62599] train loss: 39.8942 time: 1:48:49
[epoch:   6 step: 62649] train loss: 40.0938 time: 1:48:54
[epoch:   6 step: 62699] train loss: 39.7363 time: 1:49:00
[epoch:   6 step: 62749] train loss: 40.134 time: 1:49:05
[epoch:   6 step: 62799] train loss: 40.2432 time: 1:49:10
[epoch:   6 step: 62849] train loss: 40.2212 time: 1:49:16
[epoch:   6 step: 62899] train loss: 40.198 time: 1:49:21
[epoch:   6 step: 62949] train loss: 38.9524 time: 1:49:27
[epoch:   6 step: 62999] train loss: 40.3656 time: 1:49:32
[epoch:   6 step: 63049] train loss: 39.1759 time: 1:49:38
[epoch:   6 step: 63099] train loss: 40.4111 time: 1:49:44
[epoch:   6 step: 63149] train loss: 39.8672 time: 1:49:49

[epoch:   6 step: 69349] train loss: 40.2543 time: 2:00:54
[epoch:   6 step: 69399] train loss: 40.2037 time: 2:00:59
[epoch:   6 step: 69449] train loss: 39.8355 time: 2:01:04
[epoch:   6 step: 69499] train loss: 39.8587 time: 2:01:08
[epoch:   6 step: 69549] train loss: 40.3599 time: 2:01:13
[epoch:   6 step: 69599] train loss: 39.9755 time: 2:01:17
[epoch:   6 step: 69649] train loss: 40.0458 time: 2:01:22
[epoch:   6 step: 69699] train loss: 40.2031 time: 2:01:26
[epoch:   6 step: 69749] train loss: 40.2856 time: 2:01:31
[epoch:   6 step: 69799] train loss: 40.2214 time: 2:01:36
[epoch:   6 step: 69849] train loss: 40.0371 time: 2:01:40
[epoch:   6 step: 69899] train loss: 39.2914 time: 2:01:45
[epoch:   6 step: 69949] train loss: 40.8524 time: 2:01:49
[epoch:   6 step: 69999] train loss: 39.7954 time: 2:01:54
[epoch:   6 step: 70049] train loss: 40.1001 time: 2:01:59
[epoch:   6 step: 70099] train loss: 39.9248 time: 2:02:03
[epoch:   6 step: 70149] train loss: 40.5301 time: 2:02:

KeyboardInterrupt: 

In [20]:
from fastNLP import Tester
tester = Tester(data=ds_dev, model=multisnli_model, metrics=AccuracyMetric())
acc = tester.test()

[tester] 
AccuracyMetric: acc=0.687825


In [26]:
tester_mis = Tester(data=ds_dev_mis, model=multisnli_model, metrics=AccuracyMetric())
acc_mis = tester_mis.test()

[tester] 
AccuracyMetric: acc=0.687856


In [32]:
tester_no_emb = Tester(data=ds_dev, model=multisnli_model_no_emb, metrics=AccuracyMetric())
acc = tester_no_emb.test()

[tester] 
AccuracyMetric: acc=0.667244


In [33]:
tester_mis_no_emb = Tester(data=ds_dev_mis, model=multisnli_model_no_emb, metrics=AccuracyMetric())
acc_mis = tester_mis_no_emb.test()

[tester] 
AccuracyMetric: acc=0.678804


In [21]:
torch.save(multisnli_model, 'multinli_model.pkl')