In [193]:
import numpy as np
from collections import defaultdict
%matplotlib nbagg
import random
import matplotlib.pyplot as plt
import dynet
import torch
import torch.nn.functional as F


In [194]:
data_path = 'data/en.pos.train'
sentences = open(data_path, 'r').read().strip().split('\n\n')

In [195]:
word_count, tags = defaultdict(int), set()
for sentence in sentences:
    lines = sentence.strip().split('\n')
    for line in lines:
        word, tag = line.strip().split('\t')
        word_count[word] += 1
        tags.add(tag)
tags = list(tags)

In [196]:
words = [word for word in word_count.keys() if word_count[word]>1]

In [197]:
words = ['<UNK>', '<s>', '</s>'] + words
feat_tags = ['<s>'] + tags
output_tags = tags

In [198]:
word_dict = {word: i for i, word in enumerate(words)}
feat_tags_dict = {tag: i for i, tag in enumerate(feat_tags)}
output_tag_dict = {tag: i for i, tag in enumerate(output_tags)}

In [199]:
def tagid2tag_str(id):
    return output_tags[id]

def tag2id(tag):
    return output_tag_dict[tag]

def feat_tag2id(tag):
    return feat_tags_dict[tag]

def word2id(word):
    return word_dict[word] if word in word_dict else word_dict['<UNK>']

def num_words():
    return len(words)

def num_tag_feats():
    return len(feat_tags)

def num_tags():
    return len(output_tags)

In [237]:
sens = open(data_path, 'r').read().strip().split('\n\n')
writer = open(data_path+'.data', 'w')

for sen in sens:
    lines = sen.strip().split('\n')
    ws, ts = ['<s>', '<s>'], ['<s>', '<s>']
    for line in lines:
        word, tag = line.strip().split()
        ws.append(word)
        ts.append(tag)
    ws += ['</s>', '</s>']

    for i in range(len(lines)):
        feats = [ws[i], ws[i + 1], ws[i + 2], ws[i + 3], ws[i + 4], ts[i], ts[i + 1]]
        label = ts[i + 2]
        writer.write('\t'.join(feats) + '\t' + label + '\n')
writer.close()


In [263]:
word_embed_dim, pos_embed_dim = 100, 100

word_embedding = torch.normal(torch.zeros(len(words),word_embed_dim))
tag_embedding=torch.normal(torch.zeros(len(feat_tags), pos_embed_dim))
radii_we=torch.rand(len(words))
radii_te=torch.rand(len(feat_tags))
we_norm=torch.sum(word_embedding**2,dim=1)
te_norm=torch.sum(tag_embedding**2,dim=1)
correction_we=radii_we/torch.sqrt(we_norm)
correction_te=radii_te/torch.sqrt(te_norm)
corr_tile_we=correction_we.repeat(word_embed_dim,1)
pre_word_embedding=corr_tile_we.transpose(0,1)*word_embedding
hyp_word_embedding=pre_word_embedding.clone().detach().requires_grad_(True)

corr_tile_te=correction_te.repeat(pos_embed_dim,1)
pre_tag_embedding=corr_tile_te.transpose(0,1)*tag_embedding
hyp_tag_embedding=pre_tag_embedding.clone().detach().requires_grad_(True)


In [264]:
def arccosh(x):
    c0 = torch.log(x)
    c1 = torch.log1p(torch.sqrt(x * x - 1) / x)
    return c0 + c1

def hyp_dist(u,v):
    de=2*(torch.sum((u-v)**2,dim=1))/((1-torch.sum(u**2,dim=1))*(1-torch.sum(v**2,dim=1)))
    dist=arccosh(1+de)
        
    return dist

In [272]:
def forward(features):
    
   # extract word and tags ids
    word_ids = [word2id(word_feat) for word_feat in features[0:5]]
    tag_ids = [feat_tag2id(tag_feat) for tag_feat in features[5:]]
    
    """
    # consider outputs for each position and sum them all together
    hyp_con=torch.cat((hyp_word_embedding[word_ids[0]],hyp_word_embedding[word_ids[1]],hyp_word_embedding[word_ids[2]],hyp_word_embedding[word_ids[3]],hyp_word_embedding[word_ids[4]],hyp_tag_embedding[tag_ids[0]],hyp_tag_embedding[tag_ids[1]]),0)
    hyp_conc=hyp_con/(torch.sum(hyp_con**2)+1e-2)
    c=hyp_conc.repeat(hidden_dim,1)
    de=1+2*(torch.sum((hidden_layer-c)**2,dim=1))/((1-torch.sum(hidden_layer**2,dim=1))*(1-torch.sum(c**2,dim=1)))
    print(de)
    hyp_distance=torch.log(de)+torch.log1p(torch.sqrt(de**2-1)/de)
    print(hyp_distance)
    
    hidden_out=F.relu(1-hyp_distance)
    print(hidden_out)
    output=output_layers@hidden_out+output_bias
    print(output)
        
    
    """
    c1=hyp_tag_embedding[tag_ids[0]].repeat(len(feat_tags),1)
    c2=hyp_word_embedding[word_ids[2]].repeat(len(feat_tags),1)
    dist1=hyp_dist(c1,hyp_tag_embedding)
    dist2=hyp_dist(c2,hyp_tag_embedding)
    loss_func=torch.nn.LogSoftmax(dim=0)
    soft1=loss_func(-dist1)
    soft2=loss_func(-dist2)
    output=-(soft1+soft2)
    
    
    # return a list of outputs
    return output
    

In [273]:
def decode(ws):
   # first putting two start symbols
    ws = ['<s>', '<s>'] + ws + ['</s>', '</s>']
    ts = ['<s>', '<s>']

    for i in range(2, len(ws) - 2):
        features = ws[i - 2:i + 3] + ts[i - 2:i]

       # running forward
        dist1,dist2,dist3 = forward(features)
        softmax=torch.nn.Softmax()
        soft1=softmax(-dist1)
        soft2=softmax(-dist2)
        output=soft1*soft2
        

       # getting best tag
        best_tag_id = torch.argmax(output)

       # assigning the best tag
        ts.append(tagid2tag_str(best_tag_id))

    return ts[2:]

In [274]:
train_data_start = open(data_path+'.data', 'r').read().strip().split('\n')
print(len(train_data_start))


280551


In [275]:
train_data=train_data_start[:8000]
minibatch_size=1000






In [276]:
def train_iter(train_data,hyp_word_embedding,hyp_tag_embedding):
        losses = [] # minibatch loss vector
        random.shuffle(train_data) # shuffle the training data.
        loss_values=[]

        for line in train_data:
            fields = line.strip().split('\t')
            with torch.autograd.detect_anomaly():
                features, label, gold_label = fields[:-1], fields[-1], tag2id(fields[-1])
                output = forward(features)
            
                
            
            


            # getting loss with respect to negative log softmax function and the gold label; and appending to the minibatch losses.
                loss = output[gold_label]
                loss.backward()
            losses.append(loss)

            if len(losses) >= minibatch_size:
                minibatch_loss_value= sum(losses) / len(losses) 
                print(minibatch_loss_value)
                


                # printing info and plotting
                loss_values.append(minibatch_loss_value)

                                                    
                minibatch_loss_value.backward(retain_graph=True) # calling pytorch to run backpropagation
                
                    
                hwe_norm_sq=torch.sum(hyp_word_embedding.data**2,dim=1)
                hwe_var=1e-2*(((1-hwe_norm_sq.repeat(word_embed_dim,1).transpose(0,1))**2)/4)*hyp_word_embedding.grad.data
                hyp_word_embedding.data=hyp_word_embedding.data-hwe_var
                if torch.max(torch.sum(hyp_word_embedding.data**2,dim=1))>1:
                    hyp_word_embedding.data=hyp_word_embedding.data/(torch.sum(hyp_word_embedding.data**2,dim=1)+1e-5)
                hyp_word_embedding.grad.data.zero_()
                hte_norm_sq=torch.sum(hyp_tag_embedding.data**2,dim=1)
                hte_var=1e-2*(((1-hte_norm_sq.repeat(pos_embed_dim,1).transpose(0,1))**2)/4)*hyp_tag_embedding.grad.data
                hyp_tag_embedding.data=hyp_tag_embedding.data-hte_var
                if torch.max(torch.sum(hyp_tag_embedding.data**2,dim=1))>1:
                    hyp_tag_embedding.data=hyp_tag_embedding.data/(torch.sum(hyp_tag_embedding.data**2,dim=1)+1e-5)
                hyp_tag_embedding.grad.data.zero_()

                

                


                
                

                # empty the loss vector and refresh the memory of dynetnet
                losses = []
            


In [277]:
def load(filename):
    model.populate(filename)

def save(filename):
    model.save(filename)

In [278]:
for i in range(5):
    print('epoch',i+1) 
    train_iter(train_data,hyp_word_embedding,hyp_tag_embedding)
    
print('finished training!') 

epoch 1


  File "/Users/francescopreta/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/francescopreta/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/francescopreta/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/francescopreta/anaconda3/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/francescopreta/anaconda3/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/Users/francescopreta/anaconda3/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/Users/francescopreta/anaconda3/lib/python3.7/asyncio/base_events.py", line 539, in run_forever
    self._run_once()
  File "/Users/francescopreta/anaconda3/lib/python3.7/as

RuntimeError: Function 'DivBackward0' returned nan values in its 1th output.

In [37]:
test_file = 'data/en.pos.dev.raw'
writer = open(test_file+'.output', 'w')
for sentence in open(test_file, 'r'):
    words = sentence.strip().split()
    tags = decode(words)
    output = [word + '\t' + tag for word, tag in zip(words, tags)]
    writer.write('\n'.join(output) + '\n\n')
writer.close()

AttributeError: 'tuple' object has no attribute 'contiguous'