In [0]:
import torch.nn as nn
import numpy as np
import torch
import torch.utils.data as data
import pickle
def mask_padding(arr):
    mask=np.zeros((arr.shape))
    mask[arr>0]=1
    return torch.from_numpy(mask).float()


class dataset(data.Dataset):
  def __init__(self,path='arr.pkl',mask=mask_padding):
    self.path=path
    self.mask=mask_padding
    with open('arr.pkl','rb') as fp:
      self.arr=pickle.load(fp)
  def __getitem__(self,index):
    mask1=self.mask(self.arr[index])
    length=(mask1>0).sum()
    return torch.from_numpy(self.arr[index]).long(),torch.from_numpy(self.arr[index]).long(),torch.tensor(length.item()),mask1
  def __len__(self):
    return len(self.arr)
  
with open('word_embed.pkl','rb') as fp:
  pretrain_vector=pickle.load(fp)

with open('word2index.pkl','rb') as f:
  word2index=pickle.load(f)
pretrain_vector=torch.from_numpy(pretrain_vector).float()






























seq2seq

In [0]:

import torch.nn.functional as F


class Seq2SeqRNN(nn.Module):
    
    def __init__(self, rnn_type,  hidden_size, embz_size,batch_size,
                 attention_type, tied_weight_type, pre_trained_vector, 
                 num_layers=1, encoder_drop=(0.2,0.3), decoder_drop=(0.2,0.3), 
                 bidirectional=True, bias=False, teacher_forcing=True):
        
        super().__init__()
        
        rnn_type, attention_type, tied_weight_type = rnn_type.upper(), attention_type.title(), tied_weight_type.lower()
        
        if rnn_type in ['LSTM', 'GRU']: self.rnn_type = rnn_type
        else: raise ValueError("""An invalid option for '--rnn_type' was supplied,
                                    options are ['LSTM', 'GRU']""")
            
        if attention_type in ['Luong', 'Bahdanau']: self.attention_type = attention_type
        else: raise ValueError("""An invalid option for '--attention_type' was supplied,
                                    options are ['Luong', 'Bahdanau']""")
            
        if tied_weight_type in ['three_way', 'two_way']: self.tied_weight_type = tied_weight_type
        else: raise ValueError("""An invalid option for '--tied_weight_type' was supplied,
                                    options are ['three_way', 'two_way']""")
    
                    
        #initialize model parameters            
        self.embz_size, self.hidden_size =  embz_size, hidden_size//2
        self.num_layers,   self.pre_trained_vector = num_layers,   pre_trained_vector
        self.bidirectional,self.teacher_forcing = bidirectional, teacher_forcing
        self.encoder_drop, self.decoder_drop = encoder_drop, decoder_drop
        self.input_size=pre_trained_vector.size(0)
        self.output_size=pre_trained_vector.size(0)
        if self.teacher_forcing: self.force_prob = 0.5
        
        #set bidirectional
        if self.bidirectional: self.num_directions = 2
        else: self.num_directions = 1
            
        
        #encoder
        self.encoder_dropout = nn.Dropout(self.encoder_drop[0])
        self.encoder_embedding_layer = nn.Embedding(self.input_size, self.embz_size)
        self.encoder_embedding_layer.weight.data.copy_(self.pre_trained_vector)
            
        self.encoder_rnn = getattr(nn, self.rnn_type)(
                           input_size=self.embz_size,
                           hidden_size=self.hidden_size,
                           num_layers=self.num_layers,
                           dropout=self.encoder_drop[1], 
                           bidirectional=self.bidirectional,batch_first=True)
        self.encoder_vector_layer = nn.Linear(self.hidden_size*self.num_directions,self.output_size, bias=bias)
        
       #decoder
        self.decoder_dropout = nn.Dropout(self.decoder_drop[0])
        self.decoder_embedding_layer = nn.Embedding(self.input_size, self.embz_size)
        self.decoder_rnn = getattr(nn, self.rnn_type)(
                           input_size=self.embz_size,
                           hidden_size=self.hidden_size*self.num_directions,
                           num_layers=self.num_layers,
                           dropout=self.decoder_drop[1],batch_first=True) 
        self.decoder_output_layer = nn.Linear(self.hidden_size*self.num_directions, self.embz_size, bias=bias)
        self.output_layer = nn.Linear(self.embz_size, self.output_size, bias=bias)
        
        #set tied weights: three way tied weights vs two way tied weights
        if self.tied_weight_type == 'three_way':
            self.decoder_embedding_layer.weight  = self.encoder_embedding_layer.weight
            self.output_layer.weight = self.decoder_embedding_layer.weight  
        else:
            self.decoder_embedding_layer.weight.data.copy_(self.pre_trained_vector)
            self.output_layer.weight = self.decoder_embedding_layer.weight  
            
        #set attention
        self.encoder_output_layer = nn.Linear(self.hidden_size*self.num_directions, self.embz_size, bias=bias)
        self.att_vector_layer = nn.Linear(self.embz_size+self.embz_size, self.embz_size,bias=bias)
        if self.attention_type == 'Bahdanau':
            self.decoder_hidden_layer = nn.Linear(self.hidden_size*self.num_directions, self.embz_size, bias=bias)
            self.att_score = nn.Linear(self.embz_size,1,bias=bias)

            
####################################   
    def init_hidden(self, batch_size=10):
        if self.rnn_type == 'LSTM':
            return (torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size).cuda(),
                    torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size).cuda())
        else:
            return torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size).cuda()
##########################################   

    def _cat_directions(self, hidden):
        def _cat(h):
            return torch.cat([h[0:h.size(0):2], h[1:h.size(0):2]], 2)
            
        if isinstance(hidden, tuple):
            # LSTM hidden contains a tuple (hidden state, cell state)
            hidden = tuple([_cat(h) for h in hidden])
        else:
            # GRU hidden
            hidden = _cat(hidden)
        return hidden    
    
    
    def bahdanau_attention(self, encoder_output, decoder_hidden, decoder_input):
        encoder_output = self.encoder_output_layer(encoder_output) 
        encoder_output = encoder_output
        decoder_hidden = decoder_hidden
        att_score = torch.tanh(encoder_output + decoder_hidden)
        att_score = self.att_score(att_score)
        att_weight = F.softmax(att_score, dim=1)
        context_vector = torch.bmm(att_weight.transpose(-1, 1), encoder_output).squeeze(1)
        att_vector = torch.cat((context_vector, decoder_input), dim=1)
        att_vector = self.att_vector_layer(att_vector)
        att_vector = torch.tanh(att_vector)
        return att_weight.squeeze(-1), att_vector
    
    
    def luong_attention(self, encoder_output, decoder_output):
        encoder_output = self.encoder_output_layer(encoder_output) 
        encoder_output = encoder_output
        decoder_output = decoder_output
        
        att_score = torch.bmm(encoder_output, decoder_output.transpose(-1,1))
        att_weight = F.softmax(att_score, dim=1)
        context_vector = torch.bmm(att_weight.transpose(-1, 1), encoder_output).squeeze(1)
        att_vector = torch.cat((context_vector, decoder_output.squeeze(1)), dim=1)
        att_vector = self.att_vector_layer(att_vector)
        att_vector = torch.tanh(att_vector)
        return att_weight.squeeze(-1), att_vector
        
    def decoder_forward(self, batch_size, encoder_output, decoder_hidden, y,length):
       ############################################
        decoder_input = torch.zeros(batch_size).long().cuda()
        output_seq_stack, att_stack = [], []
        
        for i in range(max(length)):
            decoder_input = self.decoder_dropout(self.decoder_embedding_layer(decoder_input))
            if self.attention_type == 'Bahdanau':
                if isinstance(decoder_hidden, tuple):
                    prev_hidden = self.decoder_hidden_layer(decoder_hidden[0][-1]).unsqueeze(0)
                else:
                    prev_hidden = self.decoder_hidden_layer(decoder_hidden[-1]).unsqueeze(0) 
                att, decoder_input = self.bahdanau_attention(encoder_output, prev_hidden, decoder_input)
                decoder_output, decoder_hidden = self.decoder_rnn(decoder_input.unsqueeze(1), decoder_hidden)
                decoder_output = self.decoder_output_layer(decoder_output.squeeze(1)) 
            else:

                #print(decoder_hidden.size(),decoder_input.unsqueeze(1).size())
                decoder_output, decoder_hidden = self.decoder_rnn(decoder_input.unsqueeze(1), decoder_hidden)
                decoder_output = self.decoder_output_layer(decoder_output) 
                att, decoder_output = self.luong_attention(encoder_output, decoder_output)
            att_stack.append(att)
            output = self.output_layer(decoder_output)
            output_seq_stack.append(output)
            
            #decoder_input = V(output.data.max(1)[1])
            
            
            if self.teacher_forcing:    
               
                if (y is not None):
                  decoder_input=y[:,i].long()
                     
                
        return torch.stack(output_seq_stack), torch.stack(att_stack)
        
                
    def forward(self, seq, y,length):
        batch_size = seq.size(0)
        
        encoder_hidden = self.init_hidden(batch_size)
        encoder_input = self.encoder_dropout(self.encoder_embedding_layer(seq))
       
        encoder_packed = torch.nn.utils.rnn.pack_padded_sequence(encoder_input, length, batch_first=True)
        encoder_output, encoder_hidden = self.encoder_rnn(encoder_packed, encoder_hidden) 
        encoder_outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(encoder_output, batch_first=True)
        encoder_outputs=encoder_outputs
        if self.bidirectional:
            encoder_hidden = self._cat_directions(encoder_hidden)
        output,_ = self.decoder_forward(batch_size, encoder_outputs, encoder_hidden, y=y,length=length)
        if isinstance(encoder_hidden, tuple):
            encoder_vector = self.encoder_vector_layer(encoder_hidden[0][-1])
        else:
            encoder_vector = self.encoder_vector_layer(encoder_hidden[-1])
        #output = output + (encoder_vector.unsqueeze(0))  
        return output ,encoder_hidden[0][-1]

In [0]:
index2word={}
for x,y in word2index.items():
  index2word[y]=x
  
def print_sentence(pre):
  if len(pre.size())==3:
    _,a1=pre.max(2)
  else:
    a1=pre
  a1=a1.cpu().numpy()
  batch_1=a1[:,0]
  b1=[]
  for i in batch_1:
    if i in index2word:
      b1.append(index2word[i])
    else:
      b1.append('unknow')
  return ' '.join(b1)
      

model=Seq2SeqRNN('LSTM',400,300,10,'Luong','two_way', pretrain_vector).cuda()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
sentence=dataset()
datasets=data.DataLoader(sentence,batch_size=10,shuffle=True)
for j in range(10):
  for i,(x,y,z,m) in enumerate(datasets):
    a,b=z.sort(descending=True)
    x=x[b].cuda()
  
    y=y[b].cuda()
    z=a.cuda()
    max_l=max(a)
    m=m[b].cuda()
    out,_=model(x,y,z)
    out1=F.log_softmax(out,dim=2)
    y=y[:,:max_l]
    m=m[:,:max_l]
    m=m.permute(1,0).view(m.size(1),m.size(0),1)
    y=y.permute(1,0).view(y.size(1),y.size(0),1)
    output_loss=torch.gather(out1,2,y)
    
    total_loss=-(1/(len(a)))*(output_loss*m).sum()
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    nn.utils.clip_grad_norm_(model.parameters(),3)
    if i%500==0:
      
      print('loss',total_loss.data)
      pre=print_sentence(out)
      rea=print_sentence(y.squeeze(2))
      print(pre,'###',rea)
torch.save(model.cpu().state_dict(),'train:epoach{}.pth'.format(j))
'''with torch.no_grad():
  model=Seq2SeqRNN('LSTM',400,300,10,'Luong','two_way', pretrain_vector)
  model.load_state_dict(torch.load('train:epoach9.pth'))
  sentence=dataset()
  
  datasets=data.DataLoader(sentence,batch_size=1000,shuffle=False)
  simple=[]
  for i,(x,y,z,m) in enumerate(datasets):
    a,b=z.sort(descending=True)
    x=x[b]
  
    y=y[b]
    z=a
    max_l=max(a)
    m=m[b]
    out1,out=model(x,y,z)
    simple.append(out.numpy())


simple=np.vstack(simple)  
from sklearn.preprocessing import normalize
from sklearn.cluster import KMeans
import numpy as np
from collections import Counter
from operator import itemgetter
from sklearn import metrics



def map_label(true_labels, pred_labels):
    label_pair = list(zip(pred_labels, true_labels))
    count = tuple(Counter(label_pair).items())
    mapping = dict()
    n_label = len(np.unique(true_labels))

    # map most likely labels from prediction to ground truth
    for label in range(n_label):
        tuples = [tup for tup in count if tup[0][0] == label]
        likely_tuple = max(tuples, key=itemgetter(1))[0]
        mapping[likely_tuple[0]] = likely_tuple[1]

    pred_labels_mapped = [mapping[x] for x in pred_labels]
    return pred_labels_mapped


def cluster_quality(true_labels, pred_labels, show=True):
    h, c, v = metrics.homogeneity_completeness_v_measure(true_labels, pred_labels)
    nmi = metrics.normalized_mutual_info_score(true_labels, pred_labels)
    rand = metrics.adjusted_rand_score(true_labels, pred_labels)
    pred_labels_mapped = map_label(true_labels, pred_labels)
    acc = metrics.accuracy_score(true_labels, pred_labels_mapped)
    if show:
        print("Homogeneity: %0.3f" % h)
        print("Completeness: %0.3f" % c)
        print("V-measure: %0.3f" % v)
        print("NMI: %0.3f" % nmi)
        print("Rand score: %0.3f" % rand)
        print("Accuracy: %0.3f" % acc)
    return dict(
        homogeneity=h,
        completeness=c,
        vmeasure=v,
        nmi=nmi,
        rand=rand,
        accuracy=acc,
    )
y=[]
with open('target.txt','r') as f:
    for i in f.readlines():
        y.append(int(i))
    
true_labels = np.array(y)
n_clusters = len(np.unique(y))
print("Number of classes: %d" % n_clusters)
km = KMeans(n_clusters=n_clusters, n_jobs=20)
result = dict()
#input2 = normalize(embed, norm='l2')
km.fit(simple)
pred = km.labels_
print(pred)
a = {'deep': cluster_quality(true_labels, pred)}'''



  
  
