In [1]:
import sys
import random
import torch
import importlib
from tensorboardX import SummaryWriter
import torch.nn.utils.rnn as rnn_utils
import pickle
import tqdm
import os
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from collections import deque
sys.path.append(".")
from dataUtilsV0 import *
import json
from RDM_Model import *
from CM_Model import *
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

load glove finished


In [2]:

class LayerNormLSTMCell(nn.LSTMCell):
    def __init__(self, input_size, hidden_size, dropout=0.0, bias=True, use_layer_norm=True):
        super().__init__(input_size, hidden_size, bias)
        self.use_layer_norm = use_layer_norm
        if self.use_layer_norm:
            self.ln_ih = nn.LayerNorm(4 * hidden_size)
            self.ln_hh = nn.LayerNorm(4 * hidden_size)
            self.ln_ho = nn.LayerNorm(hidden_size)
        # DropConnect on the recurrent hidden to hidden weight
        self.dropout = dropout

    def forward(self, input, hidden=None):
        self.check_forward_input(input)
        if hidden is None:
            hx = input.new_zeros(input.size(0), self.hidden_size, requires_grad=False)
            cx = input.new_zeros(input.size(0), self.hidden_size, requires_grad=False)
        else:
            hx, cx = hidden
        self.check_forward_hidden(input, hx, '[0]')
        self.check_forward_hidden(input, cx, '[1]')

        weight_hh = nn.functional.dropout(self.weight_hh, p=self.dropout, training=self.training)
        if self.use_layer_norm:
            gates = self.ln_ih(F.linear(input, self.weight_ih, self.bias_ih)) \
                     + self.ln_hh(F.linear(hx, weight_hh, self.bias_hh))
        else:
            gates = F.linear(input, self.weight_ih, self.bias_ih) \
                    + F.linear(hx, weight_hh, self.bias_hh)

        i, f, c, o = gates.chunk(4, 1)
        i_ = torch.sigmoid(i)
        f_ = torch.sigmoid(f)
        c_ = torch.tanh(c)
        o_ = torch.sigmoid(o)
        cy = (f_ * cx) + (i_ * c_)
        if self.use_layer_norm:
            hy = o_ * self.ln_ho(torch.tanh(cy))
        else:
            hy = o_ * torch.tanh(cy)
        return hy, cy

class LayerNormLSTM(nn.Module):
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 dropout=0.0,
                 weight_dropout=0.0,
                 bias=True,
                 bidirectional=False,
                 use_layer_norm=True):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # using variational dropout
        self.dropout = dropout
        self.bidirectional = bidirectional

        num_directions = 2 if bidirectional else 1
        self.hidden0 = nn.ModuleList([
            LayerNormLSTMCell(input_size=(input_size if layer == 0 else hidden_size * num_directions),
                              hidden_size=hidden_size, dropout=weight_dropout, bias=bias, use_layer_norm=use_layer_norm)
            for layer in range(num_layers)
        ])

        if self.bidirectional:
            self.hidden1 = nn.ModuleList([
                LayerNormLSTMCell(input_size=(input_size if layer == 0 else hidden_size * num_directions),
                                  hidden_size=hidden_size, dropout=weight_dropout, bias=bias, use_layer_norm=use_layer_norm)
                for layer in range(num_layers)
            ])

    def copy_parameters(self, rnn_old):
        for param in rnn_old.named_parameters():
            name_ = param[0].split("_")
            layer = int(name_[2].replace("l", ""))
            sub_name = "_".join(name_[:2])
            if len(name_) > 3:
                self.hidden1[layer].register_parameter(sub_name, param[1])
            else:
                self.hidden0[layer].register_parameter(sub_name, param[1])

    def forward(self, input, hidden=None, seq_lens=None):
        seq_len, batch_size, _ = input.size()
        num_directions = 2 if self.bidirectional else 1
        if hidden is None:
            hx = input.new_zeros(self.num_layers * num_directions, batch_size, self.hidden_size, requires_grad=False)
            cx = input.new_zeros(self.num_layers * num_directions, batch_size, self.hidden_size, requires_grad=False)
        else:
            hx, cx = hidden

        ht = []
        for i in range(seq_len):
            ht.append([None] * (self.num_layers * num_directions))
        ct = []
        for i in range(seq_len):
            ct.append([None] * (self.num_layers * num_directions))

        seq_len_mask = input.new_ones(batch_size, seq_len, self.hidden_size, requires_grad=False)
        if seq_lens != None:
            for i, l in enumerate(seq_lens):
                seq_len_mask[i, l:, :] = 0
        seq_len_mask = seq_len_mask.transpose(0, 1)

        if self.bidirectional:
            # if use cuda, change 'torch.LongTensor' to 'torch.cuda.LongTensor'
            indices_ = (torch.LongTensor(seq_lens) - 1).unsqueeze(1).unsqueeze(0).unsqueeze(0).repeat(
                [1, 1, 1, self.hidden_size])
            # if use cuda, change 'torch.LongTensor' to 'torch.cuda.LongTensor'
            indices_reverse = torch.LongTensor([0] * batch_size).unsqueeze(1).unsqueeze(0).unsqueeze(0).repeat(
                [1, 1, 1, self.hidden_size])
            indices = torch.cat((indices_, indices_reverse), dim=1)
            hy = []
            cy = []
            xs = input
            # Variational Dropout
            if not self.training or self.dropout == 0:
                dropout_mask = input.new_ones(self.num_layers, 2, batch_size, self.hidden_size)
            else:
                dropout_mask = input.new(self.num_layers, 2, batch_size, self.hidden_size).bernoulli_(1 - self.dropout)
                dropout_mask = Variable(dropout_mask, requires_grad=False) / (1 - self.dropout)

            for l, (layer0, layer1) in enumerate(zip(self.hidden0, self.hidden1)):
                l0, l1 = 2 * l, 2 * l + 1
                h0, c0, h1, c1 = hx[l0], cx[l0], hx[l1], cx[l1]
                for t, (x0, x1) in enumerate(zip(xs, reversed(xs))):
                    ht_, ct_ = layer0(x0, (h0, c0))
                    ht[t][l0] = ht_ * seq_len_mask[t]
                    ct[t][l0] = ct_ * seq_len_mask[t]
                    h0, c0 = ht[t][l0], ct[t][l0]
                    t = seq_len - 1 - t
                    ht_, ct_ = layer1(x1, (h1, c1))
                    ht[t][l1] = ht_ * seq_len_mask[t]
                    ct[t][l1] = ct_ * seq_len_mask[t]
                    h1, c1 = ht[t][l1], ct[t][l1]

                xs = [torch.cat((h[l0]*dropout_mask[l][0], h[l1]*dropout_mask[l][1]), dim=1) for h in ht]
                ht_temp = torch.stack([torch.stack([h[l0], h[l1]]) for h in ht])
                ct_temp = torch.stack([torch.stack([c[l0], c[l1]]) for c in ct])
                if len(hy) == 0:
                    hy = torch.stack(list(ht_temp.gather(dim=0, index=indices).squeeze(0)))
                else:
                    hy = torch.cat((hy, torch.stack(list(ht_temp.gather(dim=0, index=indices).squeeze(0)))), dim=0)
                if len(cy) == 0:
                    cy = torch.stack(list(ct_temp.gather(dim=0, index=indices).squeeze(0)))
                else:
                    cy = torch.cat((cy, torch.stack(list(ct_temp.gather(dim=0, index=indices).squeeze(0)))), dim=0)
            y  = torch.stack(xs)
        else:
            # if use cuda, change 'torch.LongTensor' to 'torch.cuda.LongTensor'
            indices = (torch.cuda.LongTensor(seq_lens) - 1).unsqueeze(1).unsqueeze(0).unsqueeze(0).repeat(
                [1, self.num_layers, 1, self.hidden_size])
            h, c = hx, cx
            # Variational Dropout
            if not self.training or self.dropout == 0:
                dropout_mask = input.new_ones(self.num_layers, batch_size, self.hidden_size)
            else:
                dropout_mask = input.new(self.num_layers, batch_size, self.hidden_size).bernoulli_(1 - self.dropout)
                dropout_mask = Variable(dropout_mask, requires_grad=False) / (1 - self.dropout)

            for t, x in enumerate(input):
                for l, layer in enumerate(self.hidden0):
                    ht_, ct_ = layer(x, (h[l], c[l]))
                    ht[t][l] = ht_ * seq_len_mask[t]
                    ct[t][l] = ct_ * seq_len_mask[t]
                    x = ht[t][l] * dropout_mask[l]
                ht[t] = torch.stack(ht[t])
                ct[t] = torch.stack(ct[t])
                h, c = ht[t], ct[t]
            y = torch.stack([h[-1]*dropout_mask[-1] for h in ht])
            hy = torch.stack(list(torch.stack(ht).gather(dim=0, index=indices).squeeze(0)))
            cy = torch.stack(list(torch.stack(ct).gather(dim=0, index=indices).squeeze(0)))

        return y, (hy, cy)


# ### 模型训练与测试
class pooling_layer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(pooling_layer, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        self.input_dim = input_dim
        self.output_dim = output_dim
        
    def forward(self, inputs, cuda=True):
        inputs_sent = [torch.cat([self.linear(sent_tensor.cuda() if cuda else sent_tensor).max(axis=0)[0].unsqueeze(0) for sent_tensor in seq]) for seq in inputs]
        seqs = torch.nn.utils.rnn.pad_sequence(inputs_sent, batch_first=True)
        return seqs

class RDM_Model(nn.Module):
    def __init__(self, word_embedding_dim, sent_embedding_dim, hidden_dim, dropout_prob):
        super(RDM_Model, self).__init__()
        self.embedding_dim = sent_embedding_dim
        self.hidden_dim = hidden_dim
        self.gru_model = nn.GRU(word_embedding_dim, 
                                self.hidden_dim, 
                                batch_first=True, 
                                dropout=dropout_prob
                            )
        self.DropLayer = nn.Dropout(dropout_prob)

    def forward(self, input_x): 
        """
        input_x: [batchsize, max_seq_len, sentence_embedding_dim] 
        x_len: [batchsize]
        init_states: [batchsize, hidden_dim]
        """
        batchsize, max_seq_len, emb_dim = input_x.shape
        init_states = torch.zeros([1, batchsize, self.hidden_dim], dtype=torch.float32).cuda()
        try:
            df_outputs, df_last_state = self.gru_model(input_x, init_states)
        except:
            print("Error:", pool_feature.shape, init_states.shape)
            raise
        return df_outputs

class RDM_Model_V1(nn.Module):
    def __init__(self, word_embedding_dim, sent_embedding_dim, hidden_dim, dropout_prob):
        super(RDM_Model_V1, self).__init__()
        self.embedding_dim = sent_embedding_dim
        self.hidden_dim = hidden_dim
        self.gru_model = LayerNormLSTM(word_embedding_dim, 
                                self.hidden_dim, 
                                dropout=dropout_prob
                            )

    def forward(self, input_x, seq_lens): 
        """
        input_x: [batchsize, max_seq_len, sentence_embedding_dim] 
        x_len: [batchsize]
        init_states: [batchsize, hidden_dim]
        """
        batchsize, max_seq_len, emb_dim = input_x.shape
        h0 = torch.zeros([1, batchsize, self.hidden_dim], dtype=torch.float32).cuda()
        c0 = torch.zeros([1, batchsize, self.hidden_dim], dtype=torch.float32).cuda()
        df_outputs, (df_last_state, df_last_cell) = self.gru_model(input_x.transpose(0, 1), (h0, c0), seq_lens)
        return df_outputs.transpose(0, 1), df_last_state.transpose(0, 1), df_last_cell.transpose(0, 1)

class CM_Model_V1(nn.Module):
    def __init__(self, hidden_dim, action_num):
        super(CM_Model_V1, self).__init__()
        self.hidden_dim = hidden_dim
        self.action_num = action_num
        self.DenseLayer = nn.Linear(self.hidden_dim, 64)
        self.Classifier = nn.Linear(64, self.action_num)
        
    def forward(self, rdm_state):
        """
        rdm_state: [batchsize, hidden_dim]
        """
        batchsize, hidden_dim = rdm_state.shape
        rl_h1 = nn.functional.relu(
            self.DenseLayer(
                rdm_state
            )
        )
        stopScore = self.Classifier(rl_h1)
        isStop = stopScore.argmax(axis=1)
        return stopScore, isStop

class CM_Model(nn.Module):
    def __init__(self, sentence_embedding_dim, hidden_dim, action_num):
        super(CM_Model, self).__init__()
        self.sentence_embedding_dim = sentence_embedding_dim
        self.hidden_dim = hidden_dim
        self.action_num = action_num
#         self.PoolLayer = pooling_layer(self.embedding_dim, 
#                                             self.hidden_dim)
        self.DenseLayer = nn.Linear(self.hidden_dim, 64)
        self.Classifier = nn.Linear(64, self.action_num)
        
    def forward(self, rdm_model, rl_input, rl_state):
        """
        rl_input: [batchsize, max_word_num, sentence_embedding_dim]
        rl_state: [1, batchsize, hidden_dim]
        """
        assert(rl_input.ndim==3)
        batchsize, max_word_num, embedding_dim = rl_input.shape
        rl_output, rl_new_state = rdm_model.gru_model(
                                            rl_input, 
                                            rl_state
                                        )
        rl_h1 = nn.functional.relu(
            self.DenseLayer(
#                 rl_state.reshape([len(rl_input), self.hidden_dim]) #it is not sure to take rl_state , rather than rl_output, as the feature
                rl_output.reshape(
                    [len(rl_input), self.hidden_dim]
                )
            )
        )
        stopScore = self.Classifier(rl_h1)
        isStop = stopScore.argmax(axis=1)
        return stopScore, isStop, rl_new_state


# In[13]:


load_data_fast()

rdm_model = RDM_Model(300, 300, 256, 0.2).cuda()
sent_pooler = pooling_layer(300, 300).cuda()
rdm_classifier = nn.Linear(256, 2).cuda()
cm_model = CM_Model_V1(256, 2).cuda()

log_dir = os.path.join(sys.path[0], "ERD/")

with open("../../config.json", "r") as cr:
    dic = json.load(cr)

class adict(dict):
    ''' Attribute dictionary - a convenience data structure, similar to SimpleNamespace in python 3.3
        One can use attributes to read/write dictionary content.
    '''
    def __init__(self, *av, **kav):
        dict.__init__(self, *av, **kav)
        self.__dict__ = self

FLAGS = adict(dic)

max_sent: 64 ,  max_seq_len: 346
5802 data loaded


  "num_layers={}".format(dropout, num_layers))


In [3]:
pretrained_file = "ERD/ERD_best.pkl"
if os.path.exists(pretrained_file):
    checkpoint = torch.load(pretrained_file)
    sent_pooler.load_state_dict(checkpoint['sent_pooler'])
    rdm_model.load_state_dict(checkpoint["rmdModel"])
    rdm_classifier.load_state_dict(checkpoint["rdm_classifier"])
else:
    TrainRDMModel(rdm_model, sent_pooler, rdm_classifier, 
                    t_steps=5000, stage=0, new_data_len=[], valid_new_len=[], logger=None, 
                        log_dir=log_dir, cuda=True)


In [4]:
del valid_data_ID
del valid_data_len
del valid_data_y

In [5]:
from dataUtilsV0 import valid_data_ID
from dataUtilsV0 import valid_data_len
from dataUtilsV0 import valid_data_y

In [6]:
stage = 0
t_rw = 0.5 
t_steps = 10000  
logger = None
cuda=True

In [7]:
batch_size = 20
t_acc = 0.9
gamma = 1.0
lambda1 = -1.0
lambda2 = 0.0 #regularizer
sum_loss = 0.0
sum_acc = 0.0
t_acc = 0.9
ret_acc = 0.0
init_states = torch.zeros([1, batch_size, rdm_model.hidden_dim], dtype=torch.float32).cuda()
weight = torch.tensor([2.0, 1.0], dtype=torch.float32).cuda()
loss_fn = nn.CrossEntropyLoss(weight=weight, reduction='mean')
optim = torch.optim.Adagrad([
                            {'params': sent_pooler.parameters(), 'lr': 2e-5},
                            {'params': rdm_model.parameters(), 'lr': 2e-5},
                            {'params': rdm_classifier.parameters(), 'lr': 2e-5},
                            {'params': cm_model.parameters(), 'lr':2e-3}
                         ]
)

writer = SummaryWriter(log_dir, filename_suffix="_ERD_CM_stage_%3d"%stage)
best_valid_acc = 0.0
rw_arr = np.zeros(10)
len_arr = np.zeros(10)

In [10]:
for step in range(1000):
    x, x_len, y = get_df_batch(step*batch_size, batch_size)        
    seq = sent_pooler(x)
    rdm_hiddens = rdm_model(seq)
    batchsize, max_seq_len, hidden_dim = rdm_hiddens.shape
    rdm_outs = torch.stack(
        [ rdm_hiddens[i][x_len[i]-1] for i in range(batchsize)] 
    )
    stopScore, isStop = cm_model(
        rdm_outs
    )
#     rdm_preds = rdm_scores.argmax(axis=1)
    y_label = torch.tensor(y).argmax(axis=1).cuda() if cuda else torch.tensor(y).argmax(axis=1)
    acc = accuracy_score(y_label.cpu().numpy(), isStop.cpu().numpy())
    loss = loss_fn(stopScore, y_label)
    optim.zero_grad()
    loss.backward()
    optim.step()
    if step%10 == 0:  
        print('*****Optimizing PG***** %3d | %d , train_loss/Expected Reward = %6.8f/%6.7f,  RDM_Loss/RDM Accuracy = %6.8f/%6.7f, mean_len = %2.3f'% (step, t_steps, 
                0.0, 0 , loss, acc, 0.0))

*****Optimizing PG*****   0 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.12729970/0.9000000, mean_len = 0.000
*****Optimizing PG*****  10 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.35842717/0.9500000, mean_len = 0.000
*****Optimizing PG*****  20 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.05993899/0.9500000, mean_len = 0.000
*****Optimizing PG*****  30 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.04908651/0.9500000, mean_len = 0.000
*****Optimizing PG*****  40 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.03922317/1.0000000, mean_len = 0.000
*****Optimizing PG*****  50 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.09170857/0.9500000, mean_len = 0.000
*****Optimizing PG*****  60 | 10000 , train_loss/Expected Reward = 0.00000000/0.00

KeyboardInterrupt: 

In [9]:
for step in range(1000):
    x, x_len, y = get_df_batch(step*batch_size, batch_size)        
    seq = sent_pooler(x)
    rdm_hiddens = rdm_model(seq)
    batchsize, max_seq_len, hidden_dim = rdm_hiddens.shape
    rdm_outs = torch.stack(
        [ rdm_hiddens[i][x_len[i]-1] for i in range(batchsize)] 
    )
    rdm_scores = rdm_classifier(
        rdm_outs
    )
    rdm_preds = rdm_scores.argmax(axis=1)
    y_label = torch.tensor(y).argmax(axis=1).cuda() if cuda else torch.tensor(y).argmax(axis=1)
    acc = accuracy_score(y_label.cpu().numpy(), rdm_preds.cpu().numpy())
    loss = loss_fn(rdm_scores, y_label)
    optim.zero_grad()
    loss.backward()
    optim.step()
    if step%10 == 0:  
        print('*****Optimizing RDM***** %3d | %d , train_loss/Expected Reward = %6.8f/%6.7f,  RDM_Loss/RDM Accuracy = %6.8f/%6.7f, mean_len = %2.3f'% (step, t_steps, 
                0.0, 0 , loss, acc, 0.0))

*****Optimizing RDM*****   0 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.11310752/0.9500000, mean_len = 0.000
*****Optimizing RDM*****  10 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.41237867/0.9500000, mean_len = 0.000
*****Optimizing RDM*****  20 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.04482171/0.9500000, mean_len = 0.000
*****Optimizing RDM*****  30 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.03326625/1.0000000, mean_len = 0.000
*****Optimizing RDM*****  40 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.02917382/1.0000000, mean_len = 0.000
*****Optimizing RDM*****  50 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.10453340/0.9500000, mean_len = 0.000
*****Optimizing RDM*****  60 | 10000 , train_loss/Expected Reward = 0.000000

KeyboardInterrupt: 

In [11]:
for step in range(4000):
    x, x_len, y = get_df_batch(step*batch_size, batch_size)        
    seq = sent_pooler(x)
    rdm_hiddens = rdm_model(seq)
    tau_len = 5
    
    with torch.no_grad():
        batchsize, _, _ = rdm_hiddens.shape
        rdm_outs = torch.cat(
            [ rdm_hiddens[i][x_len[i]-1].unsqueeze(0) for i in range(batchsize)] 
            # a list of tensor, where the ndim of tensor is 1 and the shape of tensor is [hidden_size]
        )
        rdm_scores = rdm_classifier(
            rdm_outs
        )
        rdm_preds = rdm_scores.argmax(axis=1)
        y_label = torch.tensor(y).argmax(axis=1).cuda() if cuda else torch.tensor(y).argmax(axis=1)
        acc = accuracy_score(y_label.cpu().numpy(), rdm_preds.cpu().numpy())
        loss = loss_fn(rdm_scores, y_label)

    batchsize, max_seq_len, hidden_dim = rdm_hiddens.shape
    stopScore, isStop = cm_model(rdm_hiddens.reshape(-1, 256))
    isStop = isStop.reshape([batchsize, max_seq_len, -1])
    stopProb = stopScore.reshape([batchsize, max_seq_len, -1]).softmax(axis=-1)         

    sum_rw = torch.zeros(len(x_len)).cuda()
    prob = torch.ones(len(x_len)).cuda()
    sum_len = 0.0

    preds_list = []
    label_list = []

    preds = rdm_classifier(rdm_hiddens.reshape(-1, hidden_dim)).reshape(batchsize, max_seq_len, -1)
    for j in range(len(x_len)):
        start = random.randint(0, x_len[j]-1)
        delay_punish = -0.1*torch.arange(1, x_len[j]+1).cuda()
        for t in range(start, x_len[j]):
            rnd = random.random()
            if rnd > stopProb[j][t][1]:
                prob[j] *= stopProb[j][t][0]            
            else:
                prob[j] *= stopProb[j][t][1]
                rnd2 = random.random()
                if  rnd2 < 0.4:
                    break
        label_list.append( torch.tensor(y[j]).repeat(x_len[j]-t, 1).cuda() )
        sum_rw[j] = -1*delay_punish[t]+ loss_fn(preds[j][t:x_len[j]], label_list[-1].argmax(axis=1))
        sum_len += (t-start+1)*1.0/x_len[j]
        
    optim.zero_grad()
    if step %2 == 0:
        E_rw = (prob.detach().cuda()*sum_rw).mean()
        E_rw.backward()
    else:
        E_rw = (prob*sum_rw.detach().cuda()).mean()
        E_rw.backward()
    optim.step()       
    if step%10 == 0:  
        print('*****Optimizing RDM & PG***** %3d | %d , train_loss/Expected Reward = %6.8f/%6.7f,  RDM_Loss/RDM Accuracy = %6.8f/%6.7f, mean_len = %2.3f'             % (step, t_steps, 
                rw_arr.mean(), rw_arr.mean(), loss, acc, len_arr.mean()
                ))
    rw_arr[int(step%10)] = float(E_rw)
    len_arr[int(step%10)] = sum_len*1.0/batch_size

    writer.add_scalar('RDM Loss', loss, step)
    writer.add_scalar('RDM Accuracy', acc, step)
    writer.add_scalar('Train Loss', float(E_rw), step)
    writer.add_scalar('Expected Reward', float(E_rw), step)

    torch.cuda.empty_cache()


*****Optimizing RDM & PG*****   0 | 10000 , train_loss/Expected Reward = 0.00000000/0.0000000,  RDM_Loss/RDM Accuracy = 0.10292125/0.9000000, mean_len = 0.000
*****Optimizing RDM & PG*****  10 | 10000 , train_loss/Expected Reward = 1.17797074/1.1779707,  RDM_Loss/RDM Accuracy = 0.39256126/0.9500000, mean_len = 0.246
*****Optimizing RDM & PG*****  20 | 10000 , train_loss/Expected Reward = 1.11247169/1.1124717,  RDM_Loss/RDM Accuracy = 0.04693697/0.9500000, mean_len = 0.290
*****Optimizing RDM & PG*****  30 | 10000 , train_loss/Expected Reward = 1.10926030/1.1092603,  RDM_Loss/RDM Accuracy = 0.03507628/1.0000000, mean_len = 0.258
*****Optimizing RDM & PG*****  40 | 10000 , train_loss/Expected Reward = 1.16846640/1.1684664,  RDM_Loss/RDM Accuracy = 0.02621892/1.0000000, mean_len = 0.222
*****Optimizing RDM & PG*****  50 | 10000 , train_loss/Expected Reward = 0.81764104/0.8176410,  RDM_Loss/RDM Accuracy = 0.08326828/0.9500000, mean_len = 0.233
*****Optimizing RDM & PG*****  60 | 10000 , tr

*****Optimizing RDM & PG***** 520 | 10000 , train_loss/Expected Reward = 0.21575976/0.2157598,  RDM_Loss/RDM Accuracy = 0.00859091/1.0000000, mean_len = 0.295
*****Optimizing RDM & PG***** 530 | 10000 , train_loss/Expected Reward = 0.23222566/0.2322257,  RDM_Loss/RDM Accuracy = 0.12520550/0.9500000, mean_len = 0.300
*****Optimizing RDM & PG***** 540 | 10000 , train_loss/Expected Reward = 0.24071087/0.2407109,  RDM_Loss/RDM Accuracy = 0.06673069/1.0000000, mean_len = 0.310
*****Optimizing RDM & PG***** 550 | 10000 , train_loss/Expected Reward = 0.19561233/0.1956123,  RDM_Loss/RDM Accuracy = 0.09258128/0.9500000, mean_len = 0.324
*****Optimizing RDM & PG***** 560 | 10000 , train_loss/Expected Reward = 0.20127381/0.2012738,  RDM_Loss/RDM Accuracy = 0.11965803/0.9500000, mean_len = 0.282
*****Optimizing RDM & PG***** 570 | 10000 , train_loss/Expected Reward = 0.25493366/0.2549337,  RDM_Loss/RDM Accuracy = 0.39542127/0.9000000, mean_len = 0.306
*****Optimizing RDM & PG***** 580 | 10000 , tr

*****Optimizing RDM & PG***** 1040 | 10000 , train_loss/Expected Reward = 0.20455577/0.2045558,  RDM_Loss/RDM Accuracy = 0.03836829/0.9500000, mean_len = 0.278
*****Optimizing RDM & PG***** 1050 | 10000 , train_loss/Expected Reward = 0.20363007/0.2036301,  RDM_Loss/RDM Accuracy = 0.06108401/1.0000000, mean_len = 0.314
*****Optimizing RDM & PG***** 1060 | 10000 , train_loss/Expected Reward = 0.25946083/0.2594608,  RDM_Loss/RDM Accuracy = 0.09433495/0.9500000, mean_len = 0.290
*****Optimizing RDM & PG***** 1070 | 10000 , train_loss/Expected Reward = 0.19577198/0.1957720,  RDM_Loss/RDM Accuracy = 0.00378055/1.0000000, mean_len = 0.280
*****Optimizing RDM & PG***** 1080 | 10000 , train_loss/Expected Reward = 0.22583198/0.2258320,  RDM_Loss/RDM Accuracy = 0.16336639/0.9500000, mean_len = 0.301
*****Optimizing RDM & PG***** 1090 | 10000 , train_loss/Expected Reward = 0.19446086/0.1944609,  RDM_Loss/RDM Accuracy = 0.38705343/0.8500000, mean_len = 0.330
*****Optimizing RDM & PG***** 1100 | 100

*****Optimizing RDM & PG***** 1560 | 10000 , train_loss/Expected Reward = 0.16749661/0.1674966,  RDM_Loss/RDM Accuracy = 0.03922372/1.0000000, mean_len = 0.330
*****Optimizing RDM & PG***** 1570 | 10000 , train_loss/Expected Reward = 0.23415815/0.2341582,  RDM_Loss/RDM Accuracy = 0.01895372/1.0000000, mean_len = 0.291
*****Optimizing RDM & PG***** 1580 | 10000 , train_loss/Expected Reward = 0.21277909/0.2127791,  RDM_Loss/RDM Accuracy = 0.10441211/0.9000000, mean_len = 0.318
*****Optimizing RDM & PG***** 1590 | 10000 , train_loss/Expected Reward = 0.24609045/0.2460904,  RDM_Loss/RDM Accuracy = 0.03919357/1.0000000, mean_len = 0.332
*****Optimizing RDM & PG***** 1600 | 10000 , train_loss/Expected Reward = 0.21059294/0.2105929,  RDM_Loss/RDM Accuracy = 0.33498996/0.9000000, mean_len = 0.300
*****Optimizing RDM & PG***** 1610 | 10000 , train_loss/Expected Reward = 0.26239321/0.2623932,  RDM_Loss/RDM Accuracy = 0.25978473/0.9500000, mean_len = 0.264
*****Optimizing RDM & PG***** 1620 | 100

*****Optimizing RDM & PG***** 2080 | 10000 , train_loss/Expected Reward = 0.19376960/0.1937696,  RDM_Loss/RDM Accuracy = 0.01510480/1.0000000, mean_len = 0.303
*****Optimizing RDM & PG***** 2090 | 10000 , train_loss/Expected Reward = 0.21779183/0.2177918,  RDM_Loss/RDM Accuracy = 0.43525994/0.8500000, mean_len = 0.285
*****Optimizing RDM & PG***** 2100 | 10000 , train_loss/Expected Reward = 0.27215149/0.2721515,  RDM_Loss/RDM Accuracy = 0.12491938/0.9000000, mean_len = 0.284
*****Optimizing RDM & PG***** 2110 | 10000 , train_loss/Expected Reward = 0.22681377/0.2268138,  RDM_Loss/RDM Accuracy = 0.01255062/1.0000000, mean_len = 0.315
*****Optimizing RDM & PG***** 2120 | 10000 , train_loss/Expected Reward = 0.20298302/0.2029830,  RDM_Loss/RDM Accuracy = 0.06105240/1.0000000, mean_len = 0.306
*****Optimizing RDM & PG***** 2130 | 10000 , train_loss/Expected Reward = 0.27512446/0.2751245,  RDM_Loss/RDM Accuracy = 0.03290658/1.0000000, mean_len = 0.285
*****Optimizing RDM & PG***** 2140 | 100

*****Optimizing RDM & PG***** 2600 | 10000 , train_loss/Expected Reward = 0.23711057/0.2371106,  RDM_Loss/RDM Accuracy = 0.07090273/0.9500000, mean_len = 0.276
*****Optimizing RDM & PG***** 2610 | 10000 , train_loss/Expected Reward = 0.24152178/0.2415218,  RDM_Loss/RDM Accuracy = 0.05450480/1.0000000, mean_len = 0.300
*****Optimizing RDM & PG***** 2620 | 10000 , train_loss/Expected Reward = 0.23017306/0.2301731,  RDM_Loss/RDM Accuracy = 0.04295926/1.0000000, mean_len = 0.274
*****Optimizing RDM & PG***** 2630 | 10000 , train_loss/Expected Reward = 0.23178140/0.2317814,  RDM_Loss/RDM Accuracy = 0.07192231/1.0000000, mean_len = 0.299
*****Optimizing RDM & PG***** 2640 | 10000 , train_loss/Expected Reward = 0.22234944/0.2223494,  RDM_Loss/RDM Accuracy = 0.30893037/0.8500000, mean_len = 0.295
*****Optimizing RDM & PG***** 2650 | 10000 , train_loss/Expected Reward = 0.29186646/0.2918665,  RDM_Loss/RDM Accuracy = 0.19960357/0.9500000, mean_len = 0.292
*****Optimizing RDM & PG***** 2660 | 100

*****Optimizing RDM & PG***** 3120 | 10000 , train_loss/Expected Reward = 0.23814025/0.2381403,  RDM_Loss/RDM Accuracy = 0.06603328/0.9500000, mean_len = 0.296
*****Optimizing RDM & PG***** 3130 | 10000 , train_loss/Expected Reward = 0.22489734/0.2248973,  RDM_Loss/RDM Accuracy = 0.24646990/0.9500000, mean_len = 0.293
*****Optimizing RDM & PG***** 3140 | 10000 , train_loss/Expected Reward = 0.25460239/0.2546024,  RDM_Loss/RDM Accuracy = 0.01588176/1.0000000, mean_len = 0.313
*****Optimizing RDM & PG***** 3150 | 10000 , train_loss/Expected Reward = 0.22159871/0.2215987,  RDM_Loss/RDM Accuracy = 0.04992288/0.9500000, mean_len = 0.330
*****Optimizing RDM & PG***** 3160 | 10000 , train_loss/Expected Reward = 0.27174877/0.2717488,  RDM_Loss/RDM Accuracy = 0.01291234/1.0000000, mean_len = 0.278
*****Optimizing RDM & PG***** 3170 | 10000 , train_loss/Expected Reward = 0.27554596/0.2755460,  RDM_Loss/RDM Accuracy = 0.03324949/1.0000000, mean_len = 0.277
*****Optimizing RDM & PG***** 3180 | 100

*****Optimizing RDM & PG***** 3640 | 10000 , train_loss/Expected Reward = 0.25997037/0.2599704,  RDM_Loss/RDM Accuracy = 0.15793009/0.9500000, mean_len = 0.269
*****Optimizing RDM & PG***** 3650 | 10000 , train_loss/Expected Reward = 0.17479647/0.1747965,  RDM_Loss/RDM Accuracy = 0.12125248/0.9000000, mean_len = 0.299
*****Optimizing RDM & PG***** 3660 | 10000 , train_loss/Expected Reward = 0.25612660/0.2561266,  RDM_Loss/RDM Accuracy = 0.22856781/0.9500000, mean_len = 0.292
*****Optimizing RDM & PG***** 3670 | 10000 , train_loss/Expected Reward = 0.29542180/0.2954218,  RDM_Loss/RDM Accuracy = 0.03097360/1.0000000, mean_len = 0.298
*****Optimizing RDM & PG***** 3680 | 10000 , train_loss/Expected Reward = 0.19086605/0.1908661,  RDM_Loss/RDM Accuracy = 0.26477662/0.9500000, mean_len = 0.303
*****Optimizing RDM & PG***** 3690 | 10000 , train_loss/Expected Reward = 0.20057129/0.2005713,  RDM_Loss/RDM Accuracy = 0.07536831/0.9500000, mean_len = 0.299
*****Optimizing RDM & PG***** 3700 | 100

In [12]:
erd_save_as = '%s/erdModel_test1.pkl'% (log_dir)
torch.save(
    {
        "sent_pooler":sent_pooler.state_dict(),
        "rmdModel":rdm_model.state_dict(),
        "rdm_classifier": rdm_classifier.state_dict(),
        "cm_model":cm_model.state_dict()
    },
    erd_save_as
)

In [17]:
del data
del data_ID
del data_y
del data_len
del valid_data_ID
del valid_data_len
del valid_data_y

from dataUtilsV0 import data
from dataUtilsV0 import data_ID
from dataUtilsV0 import data_len
from dataUtilsV0 import data_y
from dataUtilsV0 import valid_data_ID
from dataUtilsV0 import valid_data_len
from dataUtilsV0 import valid_data_y

In [16]:
valid_new_len = get_new_len_on_valid_data(sent_pooler, rdm_model, cm_model, FLAGS, cuda=True)

In [21]:
np.array(valid_data_len) - np.array(valid_new_len)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [22]:
accuracy_on_valid_data(rdm_model, sent_pooler, rdm_classifier)

(0.854, 0.8960244648318043, 0.8825301204819277)

In [23]:
fisrt_len = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])+1

In [None]:
accuracy_on_valid_data()

In [24]:
accuracy_on_valid_data(rdm_model, sent_pooler, rdm_classifier, new_data_len=fisrt_len)

(0.782, 0.9305019305019305, 0.7259036144578314)

In [16]:
del accuracy_on_valid_data

In [25]:
load_data_fast()

max_sent: 64 ,  max_seq_len: 346
5802 data loaded


In [28]:
del data
del data_ID
del data_y
del data_len
del valid_data_ID
del valid_data_len
del valid_data_y

from dataUtilsV0 import data
from dataUtilsV0 import data_ID
from dataUtilsV0 import data_len
from dataUtilsV0 import data_y
from dataUtilsV0 import valid_data_ID
from dataUtilsV0 import valid_data_len
from dataUtilsV0 import valid_data_y

In [44]:
def accuracy_on_valid_data(rdm_model = None, sent_pooler = None, rdm_classifier=None, new_data_len=[], cuda=True):
    batch_size = 20
    t_steps = int(len(valid_data_ID)/batch_size)
    
    sum_acc = 0.0
    sum_pres = 0.0
    sum_recall = 0.0
    
    miss_vec = 0
    mts = 0
    hit_vec = 0
    if len(new_data_len) > 0:
        t_data_len = new_data_len
    else:
        t_data_len = valid_data_len
    labels = []
    preds = []
    for step in range(t_steps):
        data_x = []
        m_data_y = np.zeros([batch_size, 2], dtype=np.int32)
        m_data_len = np.zeros([batch_size], dtype=np.int32)
        for i in range(batch_size):
            m_data_y[i] = valid_data_y[mts]
            m_data_len[i] = t_data_len[mts]
            seq = []
            for j in range(t_data_len[mts]):
                sent = wordlist2wordvecs(data[valid_data_ID[mts]]['text'][j])
                sent_tensor = torch.tensor(np.stack(sent))
                seq.append(sent_tensor)
            data_x.append(seq)
            mts += 1
            if mts >= len(valid_data_ID): # read data looply
                mts = mts % len(valid_data_ID)
        
        
        if rdm_model is not None and sent_pooler is not None and rdm_classifier is not None:
            with torch.no_grad():
                seq = sent_pooler(data_x)
                rdm_hiddens = rdm_model(seq)
                batchsize, _, _ = rdm_hiddens.shape
                rdm_outs = torch.stack(
                    [ rdm_hiddens[i][m_data_len[i]-1] for i in range(batchsize)] 
#                     [ rdm_hiddens[i][0] for i in range(batchsize)] 
                )
                rdm_scores = rdm_classifier(
                    rdm_outs
                )
                rdm_preds = rdm_scores.argmax(axis=1)
                y_label = torch.tensor(m_data_y).argmax(axis=1).cuda() if cuda else torch.tensor(m_data_y).argmax(axis=1)
                preds.append(rdm_preds)
                labels.append(y_label)
    
    pred_array = torch.cat(preds).cpu().numpy()
    label_array = torch.cat(labels).cpu().numpy()
    return accuracy_score(y_true=label_array, y_pred=pred_array), precision_score(y_true=label_array, y_pred=pred_array), recall_score(y_true=label_array, y_pred=pred_array)

In [43]:
accuracy_on_valid_data(rdm_model, sent_pooler, rdm_classifier) # 使用第0条推特

(0.836, 0.8255208333333334, 0.9548192771084337)

In [45]:
accuracy_on_valid_data(rdm_model, sent_pooler, rdm_classifier) # 使用最后1条推特

(0.874, 0.8898550724637682, 0.9246987951807228)