In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# import spacy

import random
import math
import time

import collections
import pickle
import os

from nltk import bigrams, trigrams
import tensorflow as tf
import jieba

In [2]:
data_dir = './data/'
proj_dir = './'

In [3]:

class Vocabulary(object):

    def __init__(self):
        self.word2idx = {'<SOS>': 0, '<EOS>': 1, '<PAD>': 2, '<UNK>': 3}
        self.idx2word = {0: '<SOS>', 1: '<EOS>', 2: '<PAD>', 3: '<UNK>'}
        self.num_words = 4
        self.OOV_list = []
        self.OOV=0
        for i in range(10):
            word = str(i)
            self.word2idx[word] = self.num_words
            self.idx2word[self.num_words] = word
            self.num_words += 1           

    def build_vocab(self, data_path):
        """Construct the relation between words and indices"""
        i=0
        with open(data_path, 'r', encoding='utf-8') as dataset:
            for words in dataset:
                words = list(words.strip('\n').strip())
                if i<5:
                    print(words)
                i+=1
                for word in words:
                    if word not in self.word2idx:
                        self.word2idx[word] = self.num_words
                        self.idx2word[self.num_words] = word
                        self.num_words += 1

    def sequence_to_indices(self, sequence, add_eos=False, add_sos=False):
        """Transform a char sequence to index sequence
            :param sequence: a string composed with chars
            :param add_eos: if true, add the <EOS> tag at the end of given sentence
            :param add_sos: if true, add the <SOS> tag at the beginning of given sentence
        """
        index_sequence = [self.word2idx['<SOS>']] if add_sos else []

        for word in sequence:
            if word not in self.word2idx:
                self.OOV+=1
                self.OOV_list.append(word)
                index_sequence.append((self.word2idx['<UNK>']))
            else:
                index_sequence.append(self.word2idx[word])

        if add_eos:
            index_sequence.append(self.word2idx['<EOS>'])

        return index_sequence

    def indices_to_sequence(self, indices):
        """Transform a list of indices
            :param indices: a list
        """
        sequence = []
        for idx in indices:
            word = self.idx2word[idx]
            if word == "<EOS>":
                sequence.append(word)
                break
            else:
                sequence.append(word)
        return sequence

    def __str__(self):
        str = "Vocab information:\n"
        for idx, word in self.idx2word.items():
            str += "word: %s Index: %d\n" % (word, idx)
        return str

In [4]:
from torch.utils.data import DataLoader, Dataset, SequentialSampler, RandomSampler
import torch.nn.utils.rnn as rnn_utils
class TestData(Dataset):
    def __init__(self, path,max_length,corpus_path=None,vocab=None):
        self.src_indices_seq = []
        if vocab == None:
            self.vocab = Vocabulary()
            self.vocab.build_vocab(corpus_path)
        else:
            self.vocab = vocab
        self.PAD_ID = self.vocab.word2idx["<PAD>"]
        self.SOS_ID = self.vocab.word2idx["<SOS>"]
        self.vocab_size = self.vocab.num_words
        self.max_length = max_length
        self.raw_data = []
        self.raw_trg_word = []
        self.trg_pos = []
#         self.max_length = self.vocab.max_length
        file=open(path, 'r', encoding='utf-8')
        for words in file:
            parts = words.strip('\n').split('<EOS>')
            assert len(parts) == 2,'ERROR!'
            words = words.strip('\n').split()
            control_signal = parts[1].strip().split()
            control_cnt = len(control_signal) // 2
            trg_pos_t = []; trg_word_t = []
            for j in range(control_cnt):
                position, word = control_signal[j*2: j*2+2]
#                 print(position, word)
                trg_pos_t.append(int(position)); trg_word_t.append(word)
            self.trg_pos.append(trg_pos_t)
            self.raw_trg_word.append(trg_word_t)
            self.raw_data.append(words)
        assert len(self.raw_data)==len(self.trg_pos) and len(self.trg_pos)==len(self.raw_trg_word),'Error 2!'
        for i,sent in enumerate(self.raw_data):
            indices_seq = self.vocab.sequence_to_indices(sent, add_eos=False)
            self.src_indices_seq.append(torch.tensor(indices_seq))
        self.src_indices_seq = rnn_utils.pad_sequence(self.src_indices_seq, batch_first=True, padding_value=self.PAD_ID)
        print("## J: Total examples: %d, unique words:%d, Max seq length: %d"%(len(self.src_indices_seq),self.vocab_size,self.max_length))
    # def collate_fn(data):
    #     data = rnn_utils.pad_sequence(data, batch_first=True, padding_value=0)
    #     return data
    def __len__(self):
        return len(self.src_indices_seq)

    def __getitem__(self, idx):
        return torch.tensor(self.src_indices_seq[idx]),torch.tensor(self.src_indices_seq[idx])

In [5]:
class MyData(Dataset):
    def __init__(self, path,corpus_path=None,vocab=None):
        self.src_indices_seq = []
        self.trg_indices_seq = []
        if vocab == None:
            self.vocab = Vocabulary()
            self.vocab.build_vocab(corpus_path)
        else:
            self.vocab = vocab
        self.PAD_ID = self.vocab.word2idx["<PAD>"]
        self.SOS_ID = self.vocab.word2idx["<SOS>"]
        self.vocab_size = self.vocab.num_words
        self.max_length = -1
        self.raw_src_sent_data = [] #src sentence
        self.raw_trg_sent_data = [] #target sentence
        self.raw_trg_word = []
        self.trg_pos = []
#         self.max_length = self.vocab.max_length
        c=0
        file=open(path, 'r', encoding='utf-8')
        for line in file:
            pparts = line.strip('\n').split(',')
            assert len(pparts) == 2, 'Error!!'
            words,trg_sentence = pparts
            trg_sentence = trg_sentence.split()
            self.raw_trg_sent_data.append(trg_sentence)
            if trg_sentence[0]!='<SOS>':
                print('trg:',trg_sentence)
            if self.max_length < len(trg_sentence):
                self.max_length = len(trg_sentence)
            
            parts = words.strip('\n').split('<EOS>')
            assert len(parts) == 2,'ERROR!'
            src_sent = words.strip('\n').split()
            control_signal = parts[1].strip().split()
            control_cnt = len(control_signal) // 2
            trg_pos_t = []; trg_word_t = []
#             try:
            for j in range(control_cnt):
                position, word = control_signal[j*2: j*2+2]
#                 print(position, word)
                trg_pos_t.append(int(position)); trg_word_t.append(word)
#             except:
#                 print(src_sent,control_signal,position,word,trg_sentence)
            self.trg_pos.append(trg_pos_t)
            self.raw_trg_word.append(trg_word_t)
            self.raw_src_sent_data.append(src_sent)
            if src_sent[0]!='<SOS>':
                print('src:',src_sent)
            if self.max_length < len(src_sent):
                self.max_length = len(src_sent)
#             if self.max_length > 100:
#                 print(c,trg_sentence,src_sent)
#             c+=1
        assert len(self.raw_trg_sent_data)==len(self.raw_src_sent_data) and len(self.trg_pos)==len(self.raw_trg_word),'Error 2!'
        for i,trg_sent in enumerate(self.raw_trg_sent_data):
            indices_seq = self.vocab.sequence_to_indices(self.raw_src_sent_data[i], add_eos=False)
            self.src_indices_seq.append(torch.tensor(indices_seq))
            indices_seq = self.vocab.sequence_to_indices(trg_sent, add_eos=False)
            self.trg_indices_seq.append(torch.tensor(indices_seq))
        self.src_indices_seq = rnn_utils.pad_sequence(self.src_indices_seq, batch_first=True, padding_value=self.PAD_ID)
#         self.trg_pos = rnn_utils.pad_sequence(self.trg_pos, batch_first=True, padding_value=0)
        self.trg_indices_seq = rnn_utils.pad_sequence(self.trg_indices_seq, batch_first=True, padding_value=self.SOS_ID)
        print("## J: Total examples: %d, unique words:%d, Max seq length: %d"%(len(self.src_indices_seq),self.vocab_size,self.max_length))
    # def collate_fn(data):
    #     data = rnn_utils.pad_sequence(data, batch_first=True, padding_value=0)
    #     return data
    def __len__(self):
        return len(self.src_indices_seq)

    def __getitem__(self, idx):
        return torch.tensor(self.src_indices_seq[idx]), torch.tensor(self.trg_indices_seq[idx])


In [7]:
train_set = MyData(data_dir+'training_2w.txt',corpus_path=data_dir+'hw2.1_corpus.txt')
test_set = TestData(data_dir+'hw2.1-2_sample_testing_data.txt',max_length=train_set.max_length,vocab=train_set.vocab)

['心', '疼', '你', '还', '没', '挣', '脱', '思', '念', '的', '囚', '禁']
['他', '在', '你', '一', '段', '难', '忘', '远', '行', '最', '后', '却', '离', '去']
['你', '无', '力', '依', '靠', '在', '我', '这', '里']
['隔', '着', '刚', '被', '雨', '淋', '湿', '的', '玻', '璃']
['你', '问', '了', '我', '到', '底', '爱', '在', '哪', '里']
## J: Total examples: 719521, unique words:6573, Max seq length: 26
## J: Total examples: 100, unique words:6573, Max seq length: 26


In [10]:
dataset=test_set
for i in range(len(dataset.src_indices_seq)):
    print('----',i,'-----')
    print('data:',dataset.vocab.indices_to_sequence(dataset.src_indices_seq[i].numpy()))
    print('pos:',dataset.trg_pos[i],' word:',dataset.raw_trg_word[i])
#     print('trg:',dataset.vocab.indices_to_sequence(dataset.trg_indices_seq[i].numpy()))
    if i>100:
        break

---- 0 -----
data: ['<SOS>', '心', '疼', '你', '还', '没', '挣', '脱', '思', '念', '的', '囚', '禁', '<EOS>']
pos: [4]  word: ['一']
---- 1 -----
data: ['<SOS>', '他', '在', '你', '一', '段', '难', '忘', '远', '行', '最', '后', '却', '离', '去', '<EOS>']
pos: [8]  word: ['这']
---- 2 -----
data: ['<SOS>', '你', '无', '力', '依', '靠', '在', '我', '这', '里', '<EOS>']
pos: [4, 9]  word: ['被', '玻']
---- 3 -----
data: ['<SOS>', '隔', '着', '刚', '被', '雨', '淋', '湿', '的', '玻', '璃', '<EOS>']
pos: [7]  word: ['爱']
---- 4 -----
data: ['<SOS>', '你', '问', '了', '我', '到', '底', '爱', '在', '哪', '里', '<EOS>']
pos: [7]  word: ['的']
---- 5 -----
data: ['<SOS>', '你', '最', '想', '去', '的', '目', '的', '地', '剩', '多', '少', '公', '里', '<EOS>']
pos: [8, 10]  word: ['可', '暂']
---- 6 -----
data: ['<SOS>', '累', '了', '也', '没', '关', '系', '我', '可', '以', '暂', '停', '<EOS>']
pos: [2, 6]  word: ['中', '勇']
---- 7 -----
data: ['<SOS>', '途', '中', '先', '加', '满', '勇', '气', '<EOS>']
pos: [5]  word: ['问']
---- 8 -----
data: ['<SOS>', '能', '不', '能', '别', '问', '目', '的', '

In [7]:
# data_loader = DataLoader(train_set, batch_size=4, shuffle=True)
# batch_x = iter(data_loader).next()

In [8]:
batch_x[0].shape
# train_set.src_indices_seq.size()

NameError: name 'batch_x' is not defined

In [11]:
SEED = 1234

random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [12]:
device = torch.device('cuda')

In [13]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        # self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.gru = nn.GRU(emb_dim, hid_dim, n_layers, dropout = dropout,batch_first=True)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src):
        
        #src = [src sent len, batch size]
        
        embedded = self.dropout(self.embedding(src))
#         embedded = self.dropout(src)
        
        #embedded = [src sent len, batch size, emb dim]
        
        # outputs, (hidden, cell) = self.rnn(embedded)
        outputs, hidden = self.gru(embedded)
        
        #outputs = [src sent len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #outputs are always from the top hidden layer
        
        return hidden

In [14]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        # self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.gru = nn.GRU(emb_dim, hid_dim, n_layers, dropout = dropout,batch_first=True)
        
        self.out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden):
        
        #input = [batch size]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #n directions in the decoder will both always be 1, therefore:
        #hidden = [n layers, batch size, hid dim]
        #context = [n layers, batch size, hid dim]
        
        input = input.unsqueeze(1)
        
#         #input = [1, batch size]
        
        embedded = self.dropout(self.embedding(input))
#         embedded = self.dropout(input)
        
        #embedded = [1, batch size, emb dim]
                
        # output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        output, hidden = self.gru(embedded,hidden)
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #sent len and n directions will always be 1 in the decoder, therefore:
        #output = [1, batch size, hid dim]
        #hidden = [n layers, batch size, hid dim]
        #cell = [n layers, batch size, hid dim]
        
        prediction = self.out(output.squeeze(1))
        
        #prediction = [batch size, output dim]
        
        return prediction, hidden
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
#         self.embedding = nn.Embedding(output_dim, emb_dim)
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        assert encoder.hid_dim == decoder.hid_dim, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        #src = [src sent len, batch size]
        #trg = [trg sent len, batch size]
        #teacher_forcing_ratio is probability to use teacher forcing
        #e.g. if teacher_forcing_ratio is 0.75 we use ground-truth inputs 75% of the time
        
        batch_size = trg.shape[0]
        max_len = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim
        
        #tensor to store decoder outputs
#         outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)
        outputs = torch.zeros(batch_size, max_len, trg_vocab_size).to(self.device)
        outputs_idx = torch.zeros(batch_size, max_len-1)

        #last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden = self.encoder(src)
#         embedded = self.embedding(src)
#         hidden = self.encoder(embedded)
        
        #first input to the decoder is the <sos> tokens
        input = trg[:,0]
        
        for t in range(1, max_len):
            
            #insert input token embedding, previous hidden and previous cell states
            #receive output tensor (predictions) and new hidden and cell states
            output, hidden= self.decoder(input, hidden)
#             input = input.unsqueeze(1)
#             embedded = self.embedding(input)
#             output, hidden= self.decoder(embedded, hidden)
            
            #place predictions in a tensor holding predictions for each token
            outputs[:,t,:] = output
            
            #decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            
            #get the highest predicted token from our predictions
            top1 = output.argmax(1)
            outputs_idx[:,t-1] = top1
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            input = trg[:,t] if teacher_force else top1
        
        return outputs,outputs_idx

In [15]:
INPUT_DIM = train_set.vocab_size
OUTPUT_DIM = train_set.vocab_size
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 1
ENC_DROPOUT = 0
DEC_DROPOUT = 0

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, device).to(device)

In [16]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(6573, 256)
    (gru): GRU(256, 512, batch_first=True)
    (dropout): Dropout(p=0, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(6573, 256)
    (gru): GRU(256, 512, batch_first=True)
    (out): Linear(in_features=512, out_features=6573, bias=True)
    (dropout): Dropout(p=0, inplace=False)
  )
)

In [17]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 9,102,765 trainable parameters


In [18]:
optimizer = optim.Adam(model.parameters())

In [19]:
criterion = nn.CrossEntropyLoss(ignore_index = train_set.PAD_ID)

In [20]:
from tqdm import tqdm, trange
def train(model, optimizer, criterion, clip):
    model.train()
    
    epoch_loss = 0
    batch_size = 128
    data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    trange = tqdm(enumerate(data_loader), total=len(data_loader),desc='Train')
    for step,batch in trange:
        # print(step)
        src = batch[0]
        trg = batch[1]
        src = src.to(device)
        trg = trg.to(device)       
        optimizer.zero_grad()
        # print(trg.size())
        output,output_idx = model(src, trg)
        
        #trg = [trg sent len, batch size]
        #output = [trg sent len, batch size, output dim]
        
        output = output[:,1:].reshape(-1, output.shape[-1])
        trg = trg[:,1:].reshape(-1)
        
        #trg = [(trg sent len - 1) * batch size]
        #output = [(trg sent len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
    return epoch_loss / len(data_loader)



In [21]:
def decode_string(output,dataset):
    results = []
    trg = []
    all_assign_cnt = 0
    correct_cnt = 0
    for i,seq in enumerate(output):
        result = ['<SOS>']+dataset.vocab.indices_to_sequence(seq)
        positions = dataset.trg_pos[i]
        trg_word = dataset.raw_trg_word[i]
#         positions = positions[positions!=0]
        assert len(positions) == len(trg_word),'Error!!!'
        all_assign_cnt += len(positions)
        pos_word = []
        for i,position in enumerate(positions):
            position = int(position)
            word = trg_word[i]
            pos_word.append(str(position)+word)
            if position < len(result) and result[position] == word:
                correct_cnt += 1
        results.append(' '.join(result))
        trg.append(pos_word)
        
    print('-----exm-----')
    print(results[:10])
    print(trg[:10])
    return results, correct_cnt / all_assign_cnt

def evaluate(model,criterion,dataset):
    prediction = []
    model.eval()
    
    epoch_loss = 0
    data_loader = DataLoader(dataset, batch_size=128, shuffle=False)
    trange = tqdm(enumerate(data_loader), total=len(data_loader),desc='valid')
    for step, batch in trange:
        src = batch[0]
        trg = batch[1]
        src = src.to(device)
        trg = trg.to(device)
        with torch.no_grad():
            output,output_idx = model(src, trg, 0) #turn off teacher forcing
            prediction.append(output_idx.to('cpu'))

            #trg = [trg sent len, batch size]
            #output = [trg sent len, batch size, output dim]

            output = output[:,1:].reshape(-1, output.shape[-1])
            trg = trg[:,1:].reshape(-1)

            #trg = [(trg sent len - 1) * batch size]
            #output = [(trg sent len - 1) * batch size, output dim]

            loss = criterion(output, trg)

            epoch_loss += loss.item()
    prediction = torch.cat(prediction).detach().numpy().astype(int)
    prediction,acc = decode_string(prediction,dataset)
    return epoch_loss / len(data_loader),prediction,acc

In [22]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [23]:
def dd():
    return collections.defaultdict(int)
def _read_words(filename):
    with tf.gfile.GFile(filename, "r") as f:
        return f.read().replace("\n", " <eos> ").split() 
class Lm_utility():
    def __init__(self,tri_LM_path):
        f = open(tri_LM_path+'trigram_model.pkl', 'rb')
        self.model = pickle.load(f)
        f.close()

        f = open(tri_LM_path+'w2i.pkl', 'rb')
        self.word_to_id = pickle.load(f)
        f.close()
   
    def calculate_Lm_score(self,text):
        sentence_pool = []
        cnt_s = 0
        prob_all = 0
        for line in text:
            prob_s = 0
            cnt = 0
            # rule base filter
            line = line.replace(' ', '').strip()
            if len(line) == 0:
                prob_all += 0
                cnt_s += 1
                sentence_pool.append(set('\n'))
                continue
            counter = collections.Counter(line)
            split_line = list(jieba.cut(line))
            sentence_pool.append(set(split_line))
            common_cnt = 0
            last = None
            for w in split_line:
                if w == last:
                    common_cnt += 1
                last = w
            if common_cnt >= 2:
                prob_all += 0
                cnt_s += 1
                continue

            # trigram LM
            splitted_line = ['<sos>', '<sos>'] + split_line + ['<eos>', '<eos>']
            for w1, w2, w3 in zip(splitted_line, splitted_line[1:], splitted_line[2:]):
                if (w1, w2) not in self.model:
                    continue
                else:
                    prob_s += max(self.model[(w1, w2)][w3], self.model[(w1, w2)]['BACKGROUND'])
                cnt += 1
            if prob_s == 0:
                prob_all += 0
            else:
                prob_s /= max(cnt, 1)
                prob_all += 10 ** prob_s
            cnt_s += 1

        # jaccard distance of randomly chosen sentences pairs
        sample = 0
        jaccard = 0
        while sample < 3000:
            r1 = random.randint(0, cnt_s-1)
            r2 = random.randint(0, cnt_s-1)
            if r1 == r2:
                continue
            s1 = sentence_pool[r1]
            s2 = sentence_pool[r2]
            jaccard += (len(s1 & s2)/len(s1 | s2))
            sample += 1
        LM_score = prob_all/cnt_s * 1000
        print ('LM score: ',LM_score)
        print ('Jaccard Distance score: ' + str(jaccard/sample))
        return LM_score

In [None]:
N_EPOCHS = 10
CLIP = 1

best_valid_loss = float('inf')
best_valid_acc = 0
train_lm_score = -1
history = {'train':[],'val':[]}
lm_utility = Lm_utility(data_dir+'trigram_model/')
for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, optimizer, criterion, CLIP)
    train_loss_2,predictions,acc = evaluate(model, criterion,train_set)
#     train_lm_score = lm_utility.calculate_Lm_score(predictions)
    history['train'].append({'loss':train_loss,'acc':acc,'lm':train_lm_score})
    valid_loss,valid_predict,valid_acc = evaluate(model, criterion,test_set)
    valid_lm_score = lm_utility.calculate_Lm_score(valid_predict)
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model, './output/Task2_1_2_model.pkl') 
        torch.save(model.state_dict(), './output/Task2_1_2_model.pt')
    history['val'].append({'loss':valid_loss,'acc':valid_acc,'lm':valid_lm_score})
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f} | Train acc: {acc:.3f}| val LM: {train_lm_score:.3f}')
#     print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f} | val acc: {valid_acc:.3f} | val LM: {valid_lm_score:.3f}')

Train: 100%|██████████| 5622/5622 [09:50<00:00,  9.52it/s]
valid: 100%|██████████| 5622/5622 [04:39<00:00, 20.11it/s]
valid: 100%|██████████| 1/1 [00:00<00:00, 26.66it/s]
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache


-----exm-----
['<SOS> 我 是 我 的 我 难 过 的 你 最 后 <EOS>', '<SOS> 爱 无 力 <EOS>', '<SOS> 我 你 的 我 的 我 的 的 玻 璃 <EOS>', '<SOS> 我 在 我 的 的 在 我 的 里 里 <EOS>', '<SOS> 你 是 我 的 的 目 光 <EOS>', '<SOS> 我 是 我 没 有 什 么 可 以 <EOS>', '<SOS> 我 是 我 的 的 勇 气 <EOS>', '<SOS> 我 不 能 够 到 你 的 <EOS>', '<SOS> 让 我 们 起 去 <EOS>', '<SOS> 让 我 们 一 起 接 近 <EOS>']
[['6难', '11后'], ['3力', '2无'], ['9玻'], ['10里'], ['6目'], ['8可', '4没'], ['7气'], ['7的', '3能'], ['1让', '5去'], ['7近']]
-----exm-----
['<SOS> 我 是 我 一 个 <EOS>', '<SOS> 这 样 的 是 你 <EOS>', '<SOS> 我 是 你 被 你 的 的 的 玻 璃 <EOS>', '<SOS> 我 是 我 的 我 的 爱 <EOS>', '<SOS> 我 是 我 我 我 你 的 爱 <EOS>', '<SOS> 你 是 我 的 我 不 可 可 以 暂 时 <EOS>', '<SOS> 心 中 的 有 多 勇 敢 <EOS>', '<SOS> 我 不 我 不 问 <EOS>', '<SOS> 我 不 带 你 的 <EOS>', '<SOS> 我 我 的 心 里 很 近 <EOS>']
[['4一'], ['8这'], ['4被', '9玻'], ['7爱'], ['7的'], ['8可', '10暂'], ['2中', '6勇'], ['5问'], ['3带', '4你'], ['6很', '7近']]


Loading model cost 0.619 seconds.
Prefix dict has been built succesfully.
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
Train:   0%|          | 0/5622 [00:00<?, ?it/s]

LM score:  64.64719290928852
Jaccard Distance score: 0.4710754134754149
Epoch: 01 | Time: 14m 38s
	Train Loss: 2.155 | Train PPL:   8.630 | Train acc: 0.937| val LM: -1.000
	 Val. Loss: 9.649 |  Val. PPL: 15510.418 | val acc: 0.938 | val LM: 64.647


Train:  53%|█████▎    | 2961/5622 [05:08<04:43,  9.39it/s]

In [25]:
import pickle
pickle.dump(history,open('./output/task2_1_2_history0.pkl','wb'))
history

{'train': [{'loss': 2.1552482512827575, 'acc': 0.936892617723683, 'lm': -1},
  {'loss': 1.8443456195461838, 'acc': 0.9452894455887118, 'lm': -1},
  {'loss': 1.7802427597675234, 'acc': 0.9441132294776784, 'lm': -1},
  {'loss': 1.7328689797244348, 'acc': 0.9452781525241303, 'lm': -1},
  {'loss': 1.6977374463563117, 'acc': 0.9479268105171442, 'lm': -1},
  {'loss': 1.6687043663107683, 'acc': 0.9448898448419796, 'lm': -1},
  {'loss': 1.642506844820988, 'acc': 0.9440037736209648, 'lm': -1},
  {'loss': 1.6282475776453538, 'acc': 0.9469755870004665, 'lm': -1},
  {'loss': 1.6083944619062405, 'acc': 0.9469608191467829, 'lm': -1},
  {'loss': 1.5926768426070728, 'acc': 0.9442061800861574, 'lm': -1}],
 'val': [{'loss': 9.649267196655273, 'acc': 0.9375, 'lm': 64.64719290928852},
  {'loss': 10.220865249633789, 'acc': 0.9375, 'lm': 44.500095963576975},
  {'loss': 10.506912231445312,
   'acc': 0.9236111111111112,
   'lm': 49.67632675711894},
  {'loss': 10.517874717712402, 'acc': 0.9375, 'lm': 34.025929

In [16]:

valid_loss,valid_predict,valid_acc = evaluate(model, criterion,test_set)
valid_lm_score = lm_utility.calculate_Lm_score(valid_predict)

array([[[1, 1],
        [2, 1],
        [3, 1]],

       [[4, 1],
        [5, 1],
        [6, 1]],

       [[7, 1],
        [8, 1],
        [9, 1]]])

array([[[2, 1],
        [3, 1]],

       [[5, 1],
        [6, 1]],

       [[8, 1],
        [9, 1]]])

In [50]:
a[:,2]

array([3, 6, 9])

In [66]:
m = nn.Dropout(p=0.2)
input = torch.randn(10,1)
output = m(input)
output

tensor([[-1.1133],
        [ 0.0000],
        [ 1.9238],
        [ 1.8364],
        [ 0.4400],
        [-1.7066],
        [-2.8024],
        [-2.0669],
        [-0.0000],
        [ 0.8218]])

In [9]:
outputs = torch.zeros(3, 2, 4)
a = torch.ones(3,4)*2

In [29]:
outputs[:,0,:] = a

In [15]:
if 2.10 in a[0]:
    print('Y')

In [18]:
a=torch.tensor(a)
a

tensor([[[1, 1],
         [2, 1],
         [3, 1]],

        [[4, 1],
         [5, 1],
         [6, 1]],

        [[7, 1],
         [8, 1],
         [9, 1]]])

In [38]:
b=np.array([[1,2],[0,1],[2,0]])
a[[range(3)],b[:,1],:]

tensor([[[3, 1],
         [5, 1],
         [7, 1]]])

In [70]:
b=np.array([1,2,0,0])
b[b!=0]

array([1, 2])

In [71]:
a=[1,'a']
a

[1, 'a']

In [122]:
class a():
    def __init__(self):
        print('ok')
    def b(self,i):
        print('b')

In [123]:
aa =a()
aa.b(1)

ok
b


In [111]:
type(lm_utility)

__main__.Lm_utility

In [25]:
import numpy as np
a=np.array([[1]])
a = a[a!=0]

In [26]:
a

array([1])