In [None]:
import json
import torch.optim as optim
import numpy as np
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import random
import torch
from io import open
import traceback
import time
import argparse
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import os

class Args:
    def __init__(self):
        self.ebd_size = 100
        self.max_wds = 100
        self.k_size = 2
        self.out_cs= 300
        self.l1_out = 200
        self.rnn_h = 200
        self.rnn_l = 1
        self.bidirectional = True
        self.attn_h = 100
        self.batch_size = 8
        self.epoches = 100
        self.dir_ebd = 'data/sogou_100_nobinary'
        self.dir_train = 'data/nlpcc-train-data-par'
        self.dir_test = 'data/nlpcc-iccpol-2016.dbqa.testing-labeled-data-par'
        self.dir_dev = 'data/nlpcc-dev-data-part'
        self.model = 'LSTM'
        self.print_per_steps = 1000
args = Args()
def log(log_inf):
    print(log_inf)
torch.manual_seed(1)
cuda_able = torch.cuda.is_available()

In [None]:
def load_ebd(dir_ebd, embedding_size):
    embedding = {}
    f = open(dir_ebd, "r", encoding='utf-8')
    line = f.readline()
    while line:
        try:
            content = line.strip(' \n').split(' ')
            assert len(content) == embedding_size + 1
            embedding[content[0]] = np.array([float(i) for i in content[1:]])
            line = f.readline()
        except:
            print('loading embedding error!\n' + content)
            break
    f.close()
    return embedding

In [None]:
# Start training
def get_sentence_embedding(s, out_size, embedding, embedding_size):
    arr = []
    for word in s:
        if word in embedding:
            arr.append(embedding[word])
        else:
            #print('empty word:',word)
            pass
            # arr.append([random.uniform(-1,1) for i in range(embedding_size)])
    if len(arr) < out_size:  # 补零
        append_arr = [0.0 for i in range(embedding_size)]
        for j in range(out_size - len(arr)):
            arr.append(append_arr)
    elif len(arr) > out_size:
        arr = arr[:out_size]
    if args.model == 'CNN': 
        return [arr]
    else:
        return arr

In [None]:
class NewLSTM(nn.Module):
    def __init__(self, batch_size, ebd_size, rnn_hidden, rnn_layers, l1_out, bidirectional, max_words, cuda_able):
        super(NewLSTM, self).__init__()
        self.batch_size = batch_size
        self.cuda_able = cuda_able
        self.rnn_layers = rnn_layers
        self.rnn_hidden = rnn_hidden
        self.num_directions = 2 if bidirectional else 1
        self.lstm = nn.LSTM(input_size=ebd_size, hidden_size=rnn_hidden, num_layers=self.rnn_layers,
                            batch_first=True, bidirectional=bidirectional)
        self.hidden = self.init_hidden()
        self.fc1 = nn.Linear(self.num_directions * rnn_hidden * 2, l1_out)
        self.fc2 = nn.Linear(l1_out, 1)
        self.pool = nn.MaxPool2d(kernel_size=(max_words, 1))

    def init_hidden(self):
        h_0 = torch.zeros(self.num_directions * self.rnn_layers, self.batch_size, self.rnn_hidden)
        c_0 = torch.zeros(self.num_directions * self.rnn_layers, self.batch_size, self.rnn_hidden)
        return (Variable(h_0.cuda()), Variable(c_0.cuda())) if self.cuda_able else(Variable(h_0), Variable(c_0))

    def forward(self, e_q, e_a, batch_size):
        # e_q,e_a represent question and answer ebd
        # print("in forward:")
        # transpose x
        self.batch_size = batch_size
        self.hidden = self.init_hidden()
        h_q, self.hidden = self.lstm(e_q, self.hidden) # batch_size X seq_len X (lstm_h * num_layers)
        self.hidden = self.init_hidden()
        h_a, self.hidden = self.lstm(e_a, self.hidden)
        # print("lstm:",h_q.size(),h_a.size())
     
        h_q = F.tanh(self.pool(h_q))
        h_a = F.tanh(self.pool(h_a))
        
        h_q = h_q.view(h_q.size(0), -1)
        h_a = h_a.view(h_a.size(0), -1)
        # print("view:",h_q.size(),h_a.size())
        cat_q_a = torch.cat((h_q,h_a),1)
        # print("cat",cat_q_a.size())
        cat_q_a = self.fc2(self.fc1(cat_q_a))
        # print("fc1&fc2:",cat_q_a.size())
        p = torch.sigmoid(cat_q_a.view(cat_q_a.size(0)))
        # print("fc1&fc2:",p.size())
        return p

In [103]:
def testNetACC(net, dir_test, max_words, embedding, embedding_size, cuda_able):
    log('start test on ' + dir_test + '...')
    f = open(args.dir_test, 'r', encoding='utf-8')
    data = [line for line in f]
    f.close()
    count_step = 0
    max_ACC = 0
    right_count = 0
    for epoch in range(args.epoches):  # loop over the dataset multiple times
        data_count = 0
        for line in data:
            # get the inputs
            arr = line.strip('\n').split('\t')
            assert(len(arr) == 3)
            question = arr[0]
            answer = arr[1]
            target = int(arr[2])
            #print(question.split(' '), answer.split(' '))
            question_ebd = get_sentence_embedding(question.split(' '), args.max_wds, embedding, args.ebd_size)
            answer_ebd = get_sentence_embedding(answer.split(' '), args.max_wds, embedding, args.ebd_size)
            # wrap them in Variable
            e_q, e_a, targets = torch.Tensor([question_ebd]), torch.Tensor([answer_ebd]), torch.Tensor([target])
            if cuda_able:
                e_q, e_a,targets = Variable(e_q.cuda()), Variable(e_a.cuda()), Variable(targets.cuda())
            else:
                e_q, e_a,targets = Variable(e_q), Variable(e_a), Variable(targets)
            p = net(e_q, e_a, 1)
            if (p.data[0]>=0.5 and target==1) or (p.data[0]<0.5 and target==0):
                right_count += 1
            count_step += 1
            
            if count_step % args.print_per_steps == args.print_per_steps - 1:  # print every 2000 mini-batches
                log('[%d, %5d] loss: %f' %
                    (epoch + 1, count_step + 1, right_count / count_step))
            # clear batch
    ACC = right_count / len(data)
    log('Finish tresting, Final ACC:%f' % (ACC))
    return ACC

In [None]:
def train(net, args, embedding, loss_func, cuda_able, optimizer):
    log('training on '+args.dir_train)
    f = open(args.dir_train, 'r', encoding='utf-8')
    data = [line for line in f]
    f.close()
    count_step = 0
    max_MRR = 0
    running_loss = 0.0
    for epoch in range(args.epoches):  # loop over the dataset multiple times
        data_count = 0
        batch = [[] for i in range(2)]
        targets = []
        for line in data:
            # get the inputs
            arr = line.strip('\n').split('\t')
            assert(len(arr) == 3)
            question = arr[0]
            answer = arr[1]
            target = int(arr[2])
            #print(question.split(' '), answer.split(' '))
            question_ebd = get_sentence_embedding(question.split(' '), args.max_wds, embedding, args.ebd_size)
            answer_ebd = get_sentence_embedding(answer.split(' '), args.max_wds, embedding, args.ebd_size)
            batch[0].append(question_ebd)
            batch[1].append(answer_ebd)
            targets.append(target)
            if len(batch[0]) == args.batch_size:
                # wrap them in Variable
                # assert(batch[0])
                e_q, e_a, targets = torch.Tensor(batch[0]), torch.Tensor(batch[1]), torch.Tensor(targets)
                if cuda_able:
                    e_q, e_a,targets = Variable(e_q.cuda()), Variable(e_a.cuda()), Variable(targets.cuda())
                else:
                    e_q, e_a,targets = Variable(e_q), Variable(e_a), Variable(targets)
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward + backward + optimize
                p = net(e_q, e_a, args.batch_size)
                loss = loss_func(p, targets)
                loss.backward()
                optimizer.step()

                count_step += 1
                running_loss += loss.data[0]
                # log('Pos:%f Neg:%f\t[%d, %5d] loss: %f' % (pos_cosine.data[0], neg_cosine.data[0], epoch + 1, count_step + 1, loss.data[0]))
                if count_step % args.print_per_steps == args.print_per_steps - 1:  # print every 2000 mini-batches
                    log('[%d, %5d] loss: %f' %
                        (epoch + 1, count_step + 1, running_loss / args.print_per_steps))
                    running_loss = 0.0
                # clear batch
                batch = [[] for i in range(2)]
                targets = []
            data_count += 1
        ACC = testNetACC(net,args.dir_dev, args.max_wds, embedding, args.ebd_size, cuda_able)
        if ACC > max_ACC:
            max_ACC = ACC
            max_epoch = epoch
    log('Finish training, max ACC:%f,and its epoch:%d' % (max_ACC, max_epoch+1))

In [None]:
torch.manual_seed(1)
cuda_able = torch.cuda.is_available()
loss_func = nn.BCELoss()
embedding = load_ebd(args.dir_ebd, args.ebd_size)

In [None]:
net = LSTM(args.batch_size,args.ebd_size,args.rnn_h,args.rnn_l,args.l1_out,args.bidirectional,args.max_wds,cuda_able)
optimizer = optim.Adamax(net.parameters())
train(net, args, embedding, loss_func, cuda_able, optimizer)

In [104]:
testNetACC(net, args.dir_dev, args.max_wds, embedding, args.ebd_size, cuda_able)

start test on data/nlpcc-dev-data-part...
right
right
right
right
right
wrong
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
right
wrong
right
right
right
right
right
right
right
right
right


KeyboardInterrupt: 

In [None]:
e_q = Variable(torch.randn(64,100,100))
e_a = Variable(torch.randn(64,100,100))
net = LSTM(args.batch_size,args.ebd_size,args.rnn_h,args.rnn_l,args.l1_out,args.bidirectional,args.max_wds,cuda_able)
net(e_q,e_a,args.batch_size)
loss_func = nn.BCELoss()

In [None]:
weight=torch.Tensor([0.2,0.8,0.3,0.5,0.4])
print(weight.size())
loss = nn.BCELoss(weight=weight)
input = Variable(torch.sigmoid(torch.randn(5)), requires_grad=True)
print(input)
target = Variable(torch.Tensor(5).random_(2))
print(target)
output = loss(input, target)
output.backward()

In [None]:
True != False

In [None]:
a=torch.randn(64,1)
torch.sigmoid(a)