In [1]:
from __future__ import print_function
from collections import OrderedDict

import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable
from torch import autograd

import time
import _pickle as cPickle

import urllib
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 80
plt.style.use('seaborn-pastel')

import os
import sys
import codecs
import re
import numpy as np

In [12]:
#parameters for the Model
parameters = OrderedDict()
parameters['train'] = "../data/eng.train" #Path to train file
parameters['dev'] = "../data/eng.testa" #Path to test file
parameters['test'] = "../data/eng.testb" #Path to dev file
parameters['tag_scheme'] = "BIOES" #BIO or BIOES
parameters['lower'] = True # Boolean variable to control lowercasing of words
parameters['zeros'] =  True # Boolean variable to control replacement of  all digits by 0 
parameters['char_dim'] = 30 #Char embedding dimension
parameters['word_dim'] = 100 #Token embedding dimension
parameters['word_lstm_dim'] = 200 #Token LSTM hidden layer size
parameters['word_bidirect'] = True #Use a bidirectional LSTM for words
parameters['embedding_path'] = "../data/glove.6B.100d.txt" #Location of pretrained embeddings
parameters['all_emb'] = 1 #Load all embeddings
parameters['crf'] =1 #Use CRF (0 to disable)
parameters['dropout'] = 0.5 #Droupout on the input (0 = no dropout)
parameters['epoch'] =  50 #Number of epochs to run"
parameters['weights'] = "" #path to Pretrained for from a previous run
parameters['name'] = "self-trained-model" # Model name
parameters['gradient_clip']=5.0
parameters['char_mode']="CNN"
models_path = "../models/" #path to saved models

#GPU
parameters['use_gpu'] = torch.cuda.is_available() #GPU Check
use_gpu = parameters['use_gpu']

parameters['reload'] = "../models/pre-trained-model" 

#Constants
START_TAG = '<START>'
STOP_TAG = '<STOP>'

In [13]:
#paths to files 
#To stored mapping file
mapping_file = '../data/mapping.pkl'

#To stored model
name = parameters['name']
model_name = models_path + name #get_name(parameters)

if not os.path.exists(models_path):
    os.makedirs(models_path)

In [14]:
def zero_digits(s):
    """
    Replace every digit in a string by a zero.
    """
    return re.sub('\d', '0', s)

def load_sentences(path, zeros):
    """
    Load sentences. A line must contain at least a word and its tag.
    Sentences are separated by empty lines.
    """
    sentences = []
    sentence = []
    for line in codecs.open(path, 'r', 'utf8'):
        line = zero_digits(line.rstrip()) if zeros else line.rstrip()
        if not line:
            if len(sentence) > 0:
                if 'DOCSTART' not in sentence[0][0]:
                    sentences.append(sentence)
                sentence = []
        else:
            word = line.split()
            assert len(word) >= 2
            sentence.append(word)
    if len(sentence) > 0:
        if 'DOCSTART' not in sentence[0][0]:
            sentences.append(sentence)
    return sentences

In [15]:
train_sentences = load_sentences(parameters['train'], parameters['zeros'])
test_sentences = load_sentences(parameters['test'], parameters['zeros'])
dev_sentences = load_sentences(parameters['dev'], parameters['zeros'])

## Word, Character and Tag Mappings

In [16]:
def create_dico(item_list):
    """
    Create a dictionary of items from a list of list of items.
    """
    assert type(item_list) is list
    dico = {}
    for items in item_list:
        for item in items:
            if item not in dico:
                dico[item] = 1
            else:
                dico[item] += 1
    return dico

def create_mapping(dico):
    """
    Create a mapping (item to ID / ID to item) from a dictionary.
    Items are ordered by decreasing frequency.
    """
    sorted_items = sorted(dico.items(), key=lambda x: (-x[1], x[0]))
    id_to_item = {i: v[0] for i, v in enumerate(sorted_items)}
    item_to_id = {v: k for k, v in id_to_item.items()}
    return item_to_id, id_to_item

def word_mapping(sentences, lower):
    """
    Create a dictionary and a mapping of words, sorted by frequency.
    """
    words = [[x[0].lower() if lower else x[0] for x in s] for s in sentences]
    dico = create_dico(words)
    dico['<UNK>'] = 10000000 #UNK tag for unknown words
    word_to_id, id_to_word = create_mapping(dico)
    print("Found %i unique words (%i in total)" % (
        len(dico), sum(len(x) for x in words)
    ))
    return dico, word_to_id, id_to_word

def char_mapping(sentences):
    """
    Create a dictionary and mapping of characters, sorted by frequency.
    """
    chars = ["".join([w[0] for w in s]) for s in sentences]
    dico = create_dico(chars)
    char_to_id, id_to_char = create_mapping(dico)
    print("Found %i unique characters" % len(dico))
    return dico, char_to_id, id_to_char

def tag_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [[word[-1] for word in s] for s in sentences]
    dico = create_dico(tags)
    dico[START_TAG] = -1
    dico[STOP_TAG] = -2
    tag_to_id, id_to_tag = create_mapping(dico)
    print("Found %i unique named entity tags" % len(dico))
    return dico, tag_to_id, id_to_tag

In [17]:
dico_words,word_to_id,id_to_word = word_mapping(train_sentences, parameters['lower'])
dico_chars, char_to_id, id_to_char = char_mapping(train_sentences)
dico_tags, tag_to_id, id_to_tag = tag_mapping(train_sentences)

Found 17493 unique words (203621 in total)
Found 75 unique characters
Found 10 unique named entity tags


## Prepare Dataset

In [18]:
def lower_case(x,lower=False):
    if lower:
        return x.lower()  
    else:
        return x

In [19]:
def prepare_dataset(sentences, word_to_id, char_to_id, tag_to_id, lower=False):
    """
    Prepare the dataset. Return a list of lists of dictionaries containing:
        - word indexes
        - word char indexes
        - tag indexes
    """
    data = []
    for s in sentences:
        str_words = [w[0] for w in s]
        words = [word_to_id[lower_case(w,lower) if lower_case(w,lower) in word_to_id else '<UNK>']
                 for w in str_words]
        # Skip characters that are not in the training set
        chars = [[char_to_id[c] for c in w if c in char_to_id]
                 for w in str_words]
        tags = [tag_to_id[w[-1]] for w in s]
        data.append({
            'str_words': str_words,
            'words': words,
            'chars': chars,
            'tags': tags,
        })
    return data

train_data = prepare_dataset(
    train_sentences, word_to_id, char_to_id, tag_to_id, parameters['lower']
)
dev_data = prepare_dataset(
    dev_sentences, word_to_id, char_to_id, tag_to_id, parameters['lower']
)
test_data = prepare_dataset(
    test_sentences, word_to_id, char_to_id, tag_to_id, parameters['lower']
)
print("{} / {} / {} sentences in train / dev / test.".format(len(train_data), len(dev_data), len(test_data)))

14041 / 3250 / 3453 sentences in train / dev / test.


In [20]:
train_data[:3]

[{'str_words': ['EU',
   'rejects',
   'German',
   'call',
   'to',
   'boycott',
   'British',
   'lamb',
   '.'],
  'words': [944, 15473, 198, 590, 8, 3848, 207, 6233, 2],
  'chars': [[31, 48],
   [6, 0, 52, 0, 12, 2, 7],
   [42, 0, 6, 14, 1, 3],
   [12, 1, 9, 9],
   [2, 5],
   [21, 5, 19, 12, 5, 2, 2],
   [36, 6, 4, 2, 4, 7, 11],
   [9, 1, 14, 21],
   [18]],
  'tags': [2, 0, 4, 0, 0, 0, 4, 0, 0]},
 {'str_words': ['Peter', 'Blackburn'],
  'words': [741, 1797],
  'chars': [[40, 0, 2, 0, 6], [36, 9, 1, 12, 27, 21, 13, 6, 3]],
  'tags': [1, 1]},
 {'str_words': ['BRUSSELS', '0000-00-00'],
  'words': [678, 23],
  'chars': [[36, 33, 48, 25, 25, 31, 37, 25],
   [8, 8, 8, 8, 22, 8, 8, 22, 8, 8]],
  'tags': [3, 0]}]

## Word Embedings

In [21]:
all_word_embeds = {}
for i, line in enumerate(codecs.open(parameters['embedding_path'], 'r', 'utf-8')):
    s = line.strip().split()
    if len(s) == parameters['word_dim'] + 1:
        all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]])

#Intializing Word Embedding Matrix
word_embeds = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06), (len(word_to_id), parameters['word_dim']))

for w in word_to_id:
    if w in all_word_embeds:
        word_embeds[word_to_id[w]] = all_word_embeds[w]
    elif w.lower() in all_word_embeds:
        word_embeds[word_to_id[w]] = all_word_embeds[w.lower()]

print('Loaded %i pretrained embeddings.' % len(all_word_embeds))

Loaded 400000 pretrained embeddings.


In [22]:
with open(mapping_file, 'wb') as f:
    mappings = {
        'word_to_id': word_to_id,
        'tag_to_id': tag_to_id,
        'char_to_id': char_to_id,
        'parameters': parameters,
        'word_embeds': word_embeds
    }
    cPickle.dump(mappings, f)

print('word_to_id: ', len(word_to_id))

word_to_id:  17493


## Model

In [22]:
def init_embedding(input_embedding):
    """
    Initialize embedding
    """
    bias = np.sqrt(3.0 / input_embedding.size(1))
    nn.init.uniform(input_embedding, -bias, bias)

In [23]:
def init_linear(input_linear):
    """
    Initialize linear transformation
    """
    bias = np.sqrt(6.0 / (input_linear.weight.size(0) + input_linear.weight.size(1)))
    nn.init.uniform(input_linear.weight, -bias, bias)
    if input_linear.bias is not None:
        input_linear.bias.data.zero_()
        

In [24]:
def init_lstm(input_lstm):
    """
    Initialize lstm
    
    PyTorch weights parameters:
    
        weight_ih_l[k]: the learnable input-hidden weights of the k-th layer,
            of shape `(hidden_size * input_size)` for `k = 0`. Otherwise, the shape is
            `(hidden_size * hidden_size)`
            
        weight_hh_l[k]: the learnable hidden-hidden weights of the k-th layer,
            of shape `(hidden_size * hidden_size)`            
    """
    
    # Weights init for forward layer
    for ind in range(0, input_lstm.num_layers):
        
        ## Gets the weights Tensor from our model, for the input-hidden weights in our current layer
        weight = eval('input_lstm.weight_ih_l' + str(ind))
        
        # Initialize the sampling range
        sampling_range = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
        
        # Randomly sample from our samping range using uniform distribution and apply it to our current layer
        nn.init.uniform(weight, -sampling_range, sampling_range)
        
        # Similar to above but for the hidden-hidden weights of the current layer
        weight = eval('input_lstm.weight_hh_l' + str(ind))
        sampling_range = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
        nn.init.uniform(weight, -sampling_range, sampling_range)
        
        
    # We do the above again, for the backward layer if we are using a bi-directional LSTM (our final model uses this)
    if input_lstm.bidirectional:
        for ind in range(0, input_lstm.num_layers):
            weight = eval('input_lstm.weight_ih_l' + str(ind) + '_reverse')
            sampling_range = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
            nn.init.uniform(weight, -sampling_range, sampling_range)
            weight = eval('input_lstm.weight_hh_l' + str(ind) + '_reverse')
            sampling_range = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
            nn.init.uniform(weight, -sampling_range, sampling_range)

    # Bias initialization steps
    
    # We initialize them to zero except for the forget gate bias, which is initialized to 1
    if input_lstm.bias:
        for ind in range(0, input_lstm.num_layers):
            bias = eval('input_lstm.bias_ih_l' + str(ind))
            
            # Initializing to zero
            bias.data.zero_()
            
            # This is the range of indices for our forget gates for each LSTM cell
            bias.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1
            
            #Similar for the hidden-hidden layer
            bias = eval('input_lstm.bias_hh_l' + str(ind))
            bias.data.zero_()
            bias.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1
            
        # Similar to above, we do for backward layer if we are using a bi-directional LSTM 
        if input_lstm.bidirectional:
            for ind in range(0, input_lstm.num_layers):
                bias = eval('input_lstm.bias_ih_l' + str(ind) + '_reverse')
                bias.data.zero_()
                bias.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1
                bias = eval('input_lstm.bias_hh_l' + str(ind) + '_reverse')
                bias.data.zero_()
                bias.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1

#### Fancy math for CRF as they are better than softmax for considering tags from neighbours

In [25]:
def log_sum_exp(vec):
    '''
    This function calculates the score explained above for the forward algorithm
    vec 2D: 1 * tagset_size
    '''
    max_score = vec[0, argmax(vec)]
    max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1])
    return max_score + torch.log(torch.sum(torch.exp(vec - max_score_broadcast)))
    
def argmax(vec):
    '''
    This function returns the max index in a vector
    '''
    _, idx = torch.max(vec, 1)
    return to_scalar(idx)

def to_scalar(var):
    '''
    Function to convert pytorch tensor to a scalar
    '''
    return var.view(-1).data.tolist()[0]

In [26]:
def score_sentences(self, feats, tags):
    # tags is ground_truth, a list of ints, length is len(sentence)
    # feats is a 2D tensor, len(sentence) * tagset_size
    r = torch.LongTensor(range(feats.size()[0]))
    if self.use_gpu:
        r = r.cuda()
        pad_start_tags = torch.cat([torch.cuda.LongTensor([self.tag_to_ix[START_TAG]]), tags])
        pad_stop_tags = torch.cat([tags, torch.cuda.LongTensor([self.tag_to_ix[STOP_TAG]])])
    else:
        pad_start_tags = torch.cat([torch.LongTensor([self.tag_to_ix[START_TAG]]), tags])
        pad_stop_tags = torch.cat([tags, torch.LongTensor([self.tag_to_ix[STOP_TAG]])])

    score = torch.sum(self.transitions[pad_stop_tags, pad_start_tags]) + torch.sum(feats[r, tags])

    return score

In [27]:
def forward_alg(self, feats):
    '''
    This function performs the forward algorithm explained above
    '''
    # calculate in log domain
    # feats is len(sentence) * tagset_size
    # initialize alpha with a Tensor with values all equal to -10000.
    
    # Do the forward algorithm to compute the partition function
    init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.)
    
    # START_TAG has all of the score.
    init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
    
    # Wrap in a variable so that we will get automatic backprop
    forward_var = autograd.Variable(init_alphas)
    if self.use_gpu:
        forward_var = forward_var.cuda()
        
    # Iterate through the sentence
    for feat in feats:
        # broadcast the emission score: it is the same regardless of
        # the previous tag
        emit_score = feat.view(-1, 1)
        
        # the ith entry of trans_score is the score of transitioning to
        # next_tag from i
        tag_var = forward_var + self.transitions + emit_score
        
        # The ith entry of next_tag_var is the value for the
        # edge (i -> next_tag) before we do log-sum-exp
        max_tag_var, _ = torch.max(tag_var, dim=1)
        
        # The forward variable for this tag is log-sum-exp of all the
        # scores.
        tag_var = tag_var - max_tag_var.view(-1, 1)
        
        # Compute log sum exp in a numerically stable way for the forward algorithm
        forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1)
    terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1)
    alpha = log_sum_exp(terminal_var)
    # Z(x)
    return alpha

In [28]:
def viterbi_algo(self, feats):
    '''
    In this function, we implement the viterbi algorithm explained above.
    A Dynamic programming based approach to find the best tag sequence
    '''
    backpointers = []
    # analogous to forward
    
    # Initialize the viterbi variables in log space
    init_vvars = torch.Tensor(1, self.tagset_size).fill_(-10000.)
    init_vvars[0][self.tag_to_ix[START_TAG]] = 0
    
    # forward_var at step i holds the viterbi variables for step i-1
    forward_var = Variable(init_vvars)
    if self.use_gpu:
        forward_var = forward_var.cuda()
    for feat in feats:
        next_tag_var = forward_var.view(1, -1).expand(self.tagset_size, self.tagset_size) + self.transitions
        _, bptrs_t = torch.max(next_tag_var, dim=1)
        bptrs_t = bptrs_t.squeeze().data.cpu().numpy() # holds the backpointers for this step
        next_tag_var = next_tag_var.data.cpu().numpy() 
        viterbivars_t = next_tag_var[range(len(bptrs_t)), bptrs_t] # holds the viterbi variables for this step
        viterbivars_t = Variable(torch.FloatTensor(viterbivars_t))
        if self.use_gpu:
            viterbivars_t = viterbivars_t.cuda()
            
        # Now add in the emission scores, and assign forward_var to the set
        # of viterbi variables we just computed
        forward_var = viterbivars_t + feat
        backpointers.append(bptrs_t)

    # Transition to STOP_TAG
    terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
    terminal_var.data[self.tag_to_ix[STOP_TAG]] = -10000.
    terminal_var.data[self.tag_to_ix[START_TAG]] = -10000.
    best_tag_id = argmax(terminal_var.unsqueeze(0))
    path_score = terminal_var[best_tag_id]
    
    # Follow the back pointers to decode the best path.
    best_path = [best_tag_id]
    for bptrs_t in reversed(backpointers):
        best_tag_id = bptrs_t[best_tag_id]
        best_path.append(best_tag_id)
        
    # Pop off the start tag (we dont want to return that to the caller)
    start = best_path.pop()
    assert start == self.tag_to_ix[START_TAG] # Sanity check
    best_path.reverse()
    return path_score, best_path

In [29]:
def forward_calc(self, sentence, chars, chars2_length, d):
    
    '''
    The function calls viterbi decode and generates the 
    most probable sequence of tags for the sentence
    '''
    
    # Get the emission scores from the BiLSTM
    feats = self._get_lstm_features(sentence, chars, chars2_length, d)
    # viterbi to get tag_seq
    
    # Find the best path, given the features.
    if self.use_crf:
        score, tag_seq = self.viterbi_decode(feats)
    else:
        score, tag_seq = torch.max(feats, 1)
        tag_seq = list(tag_seq.cpu().data)

    return score, tag_seq

In [30]:
def get_lstm_features(self, sentence, chars2, chars2_length, d):
    
    if self.char_mode == 'LSTM':
        
            chars_embeds = self.char_embeds(chars2).transpose(0, 1)
            
            packed = torch.nn.utils.rnn.pack_padded_sequence(chars_embeds, chars2_length)
            
            lstm_out, _ = self.char_lstm(packed)
            
            outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(lstm_out)
            
            outputs = outputs.transpose(0, 1)
            
            chars_embeds_temp = Variable(torch.FloatTensor(torch.zeros((outputs.size(0), outputs.size(2)))))
            
            if self.use_gpu:
                chars_embeds_temp = chars_embeds_temp.cuda()
            
            for i, index in enumerate(output_lengths):
                chars_embeds_temp[i] = torch.cat((outputs[i, index-1, :self.char_lstm_dim], outputs[i, 0, self.char_lstm_dim:]))
            
            chars_embeds = chars_embeds_temp.clone()
            
            for i in range(chars_embeds.size(0)):
                chars_embeds[d[i]] = chars_embeds_temp[i]
    
    
    if self.char_mode == 'CNN':
        chars_embeds = self.char_embeds(chars2).unsqueeze(1)

        ## Creating Character level representation using Convolutional Neural Netowrk
        ## followed by a Maxpooling Layer
        chars_cnn_out3 = self.char_cnn3(chars_embeds)
        chars_embeds = nn.functional.max_pool2d(chars_cnn_out3,
                                             kernel_size=(chars_cnn_out3.size(2), 1)).view(chars_cnn_out3.size(0), self.out_channels)

        ## Loading word embeddings
    embeds = self.word_embeds(sentence)

    ## We concatenate the word embeddings and the character level representation
    ## to create unified representation for each word
    embeds = torch.cat((embeds, chars_embeds), 1)

    embeds = embeds.unsqueeze(1)

    ## Dropout on the unified embeddings
    embeds = self.dropout(embeds)

    ## Word lstm
    ## Takes words as input and generates a output at each step
    lstm_out, _ = self.lstm(embeds)

    ## Reshaping the outputs from the lstm layer
    lstm_out = lstm_out.view(len(sentence), self.hidden_dim*2)

    ## Dropout on the lstm output
    lstm_out = self.dropout(lstm_out)

    ## Linear layer converts the ouput vectors to tag space
    lstm_feats = self.hidden2tag(lstm_out)
    
    return lstm_feats

In [31]:
def get_neg_log_likelihood(self, sentence, tags, chars2, chars2_length, d):
    # sentence, tags is a list of ints
    # features is a 2D tensor, len(sentence) * self.tagset_size
    feats = self._get_lstm_features(sentence, chars2, chars2_length, d)

    if self.use_crf:
        forward_score = self._forward_alg(feats)
        gold_score = self._score_sentence(feats, tags)
        return forward_score - gold_score
    else:
        tags = Variable(tags)
        scores = nn.functional.cross_entropy(feats, tags)
        return scores

In [32]:
class BiLSTM_CRF(nn.Module):

    def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim,
                 char_to_ix=None, pre_word_embeds=None, char_out_dimension=25,char_embedding_dim=25, use_gpu=False
                 , use_crf=True, char_mode='CNN'):
        '''
        Input parameters:
                
                vocab_size= Size of vocabulary (int)
                tag_to_ix = Dictionary that maps NER tags to indices
                embedding_dim = Dimension of word embeddings (int)
                hidden_dim = The hidden dimension of the LSTM layer (int)
                char_to_ix = Dictionary that maps characters to indices
                pre_word_embeds = Numpy array which provides mapping from word embeddings to word indices
                char_out_dimension = Output dimension from the CNN encoder for character
                char_embedding_dim = Dimension of the character embeddings
                use_gpu = defines availability of GPU, 
                    when True: CUDA function calls are made
                    else: Normal CPU function calls are made
                use_crf = parameter which decides if you want to use the CRF layer for output decoding
        '''
        
        super(BiLSTM_CRF, self).__init__()
        
        #parameter initialization for the model
        self.use_gpu = use_gpu
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.tag_to_ix = tag_to_ix
        self.use_crf = use_crf
        self.tagset_size = len(tag_to_ix)
        self.out_channels = char_out_dimension
        self.char_mode = char_mode

        if char_embedding_dim is not None:
            self.char_embedding_dim = char_embedding_dim
            
            #Initializing the character embedding layer
            self.char_embeds = nn.Embedding(len(char_to_ix), char_embedding_dim)
            init_embedding(self.char_embeds.weight)
            
            #Performing LSTM encoding on the character embeddings
            if self.char_mode == 'LSTM':
                self.char_lstm = nn.LSTM(char_embedding_dim, char_lstm_dim, num_layers=1, bidirectional=True)
                init_lstm(self.char_lstm)
                
            #Performing CNN encoding on the character embeddings
            if self.char_mode == 'CNN':
                self.char_cnn3 = nn.Conv2d(in_channels=1, out_channels=self.out_channels, kernel_size=(3, char_embedding_dim), padding=(2,0))

        #Creating Embedding layer with dimension of ( number of words * dimension of each word)
        self.word_embeds = nn.Embedding(vocab_size, embedding_dim)
        if pre_word_embeds is not None:
            #Initializes the word embeddings with pretrained word embeddings
            self.pre_word_embeds = True
            self.word_embeds.weight = nn.Parameter(torch.FloatTensor(pre_word_embeds))
        else:
            self.pre_word_embeds = False
    
        #Initializing the dropout layer, with dropout specificed in parameters
        self.dropout = nn.Dropout(parameters['dropout'])
        
        #Lstm Layer:
        #input dimension: word embedding dimension + character level representation
        #bidirectional=True, specifies that we are using the bidirectional LSTM
        if self.char_mode == 'LSTM':
            self.lstm = nn.LSTM(embedding_dim+char_lstm_dim*2, hidden_dim, bidirectional=True)
        if self.char_mode == 'CNN':
            self.lstm = nn.LSTM(embedding_dim+self.out_channels, hidden_dim, bidirectional=True)
        
        #Initializing the lstm layer using predefined function for initialization
        init_lstm(self.lstm)
        
        # Linear layer which maps the output of the bidirectional LSTM into tag space.
        self.hidden2tag = nn.Linear(hidden_dim*2, self.tagset_size)
        
        #Initializing the linear layer using predefined function for initialization
        init_linear(self.hidden2tag) 

        if self.use_crf:
            # Matrix of transition parameters.  Entry i,j is the score of transitioning *to* i *from* j.
            # Matrix has a dimension of (total number of tags * total number of tags)
            self.transitions = nn.Parameter(
                torch.zeros(self.tagset_size, self.tagset_size))
            
            # These two statements enforce the constraint that we never transfer
            # to the start tag and we never transfer from the stop tag
            self.transitions.data[tag_to_ix[START_TAG], :] = -10000
            self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000

    #assigning the functions, which we have defined earlier
    _score_sentence = score_sentences
    _get_lstm_features = get_lstm_features
    _forward_alg = forward_alg
    viterbi_decode = viterbi_algo
    neg_log_likelihood = get_neg_log_likelihood
    forward = forward_calc

In [33]:
#creating the model using the Class defined above
model = BiLSTM_CRF(vocab_size=len(word_to_id),
                   tag_to_ix=tag_to_id,
                   embedding_dim=parameters['word_dim'],
                   hidden_dim=parameters['word_lstm_dim'],
                   use_gpu=use_gpu,
                   char_to_ix=char_to_id,
                   pre_word_embeds=word_embeds,
                   use_crf=parameters['crf'],
                   char_mode=parameters['char_mode'])
print("Model Initialized!!!")

Model Initialized!!!


  


In [34]:
#Reload a saved model, if parameter["reload"] is set to a path
if parameters['reload']:
    if not os.path.exists(parameters['reload']):
        print("downloading pre-trained model")
        model_url="https://github.com/TheAnig/NER-LSTM-CNN-Pytorch/raw/master/trained-model-cpu"
        urllib.request.urlretrieve(model_url, parameters['reload'])
    model.load_state_dict(torch.load(parameters['reload']))
    print("model reloaded :", parameters['reload'])

if use_gpu:
    model.cuda()

RuntimeError: Error(s) in loading state_dict for BiLSTM_CRF:
	size mismatch for transitions: copying a param with shape torch.Size([19, 19]) from checkpoint, the shape in current model is torch.Size([10, 10]).
	size mismatch for hidden2tag.weight: copying a param with shape torch.Size([19, 400]) from checkpoint, the shape in current model is torch.Size([10, 400]).
	size mismatch for hidden2tag.bias: copying a param with shape torch.Size([19]) from checkpoint, the shape in current model is torch.Size([10]).

In [35]:
#Initializing the optimizer
#The best results in the paper where achived using stochastic gradient descent (SGD) 
#learning rate=0.015 and momentum=0.9 
#decay_rate=0.05 

learning_rate = 0.015
momentum = 0.9
number_of_epochs = parameters['epoch'] 
decay_rate = 0.05
gradient_clip = parameters['gradient_clip']
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

#variables which will used in training process
losses = [] #list to store all losses
loss = 0.0 #Loss Initializatoin
best_dev_F = -1.0 # Current best F-1 Score on Dev Set
best_test_F = -1.0 # Current best F-1 Score on Test Set
best_train_F = -1.0 # Current best F-1 Score on Train Set
all_F = [[0, 0, 0]] # List storing all the F-1 Scores
eval_every = len(train_data) # Calculate F-1 Score after this many iterations
plot_every = 2000 # Store loss after this many iterations
count = 0 #Counts the number of iterations

In [36]:
def get_chunk_type(tok, idx_to_tag):
    """
    The function takes in a chunk ("B-PER") and then splits it into the tag (PER) and its class (B)
    as defined in BIOES
    
    Args:
        tok: id of token, ex 4
        idx_to_tag: dictionary {4: "B-PER", ...}

    Returns:
        tuple: "B", "PER"

    """
    
    tag_name = idx_to_tag[tok]
    tag_class = tag_name.split('-')[0]
    tag_type = tag_name.split('-')[-1]
    return tag_class, tag_type

In [37]:
def get_chunks(seq, tags):
    """Given a sequence of tags, group entities and their position

    Args:
        seq: [4, 4, 0, 0, ...] sequence of labels
        tags: dict["O"] = 4

    Returns:
        list of (chunk_type, chunk_start, chunk_end)

    Example:
        seq = [4, 5, 0, 3]
        tags = {"B-PER": 4, "I-PER": 5, "B-LOC": 3}
        result = [("PER", 0, 2), ("LOC", 3, 4)]

    """
    
    # We assume by default the tags lie outside a named entity
    default = tags["O"]
    
    idx_to_tag = {idx: tag for tag, idx in tags.items()}
    
    chunks = []
    
    chunk_type, chunk_start = None, None
    for i, tok in enumerate(seq):
        # End of a chunk 1
        if tok == default and chunk_type is not None:
            # Add a chunk.
            chunk = (chunk_type, chunk_start, i)
            chunks.append(chunk)
            chunk_type, chunk_start = None, None

        # End of a chunk + start of a chunk!
        elif tok != default:
            tok_chunk_class, tok_chunk_type = get_chunk_type(tok, idx_to_tag)
            if chunk_type is None:
                # Initialize chunk for each entity
                chunk_type, chunk_start = tok_chunk_type, i
            elif tok_chunk_type != chunk_type or tok_chunk_class == "B":
                # If chunk class is B, i.e., its a beginning of a new named entity
                # or, if the chunk type is different from the previous one, then we
                # start labelling it as a new entity
                chunk = (chunk_type, chunk_start, i)
                chunks.append(chunk)
                chunk_type, chunk_start = tok_chunk_type, i
        else:
            pass

    # end condition
    if chunk_type is not None:
        chunk = (chunk_type, chunk_start, len(seq))
        chunks.append(chunk)

    return chunks

In [38]:
def evaluating(model, datas, best_F,dataset="Train"):
    '''
    The function takes as input the model, data and calcuates F-1 Score
    It performs conditional updates 
     1) Flag to save the model 
     2) Best F-1 score
    ,if the F-1 score calculated improves on the previous F-1 score
    '''
    # Initializations
    prediction = [] # A list that stores predicted tags
    save = False # Flag that tells us if the model needs to be saved
    new_F = 0.0 # Variable to store the current F1-Score (may not be the best)
    correct_preds, total_correct, total_preds = 0., 0., 0. # Count variables
    
    for data in datas:
        ground_truth_id = data['tags']
        words = data['str_words']
        chars2 = data['chars']
        
        if parameters['char_mode'] == 'LSTM':
            chars2_sorted = sorted(chars2, key=lambda p: len(p), reverse=True)
            d = {}
            for i, ci in enumerate(chars2):
                for j, cj in enumerate(chars2_sorted):
                    if ci == cj and not j in d and not i in d.values():
                        d[j] = i
                        continue
            chars2_length = [len(c) for c in chars2_sorted]
            char_maxl = max(chars2_length)
            chars2_mask = np.zeros((len(chars2_sorted), char_maxl), dtype='int')
            for i, c in enumerate(chars2_sorted):
                chars2_mask[i, :chars2_length[i]] = c
            chars2_mask = Variable(torch.LongTensor(chars2_mask))
        
        
        if parameters['char_mode'] == 'CNN':
            d = {} 

            # Padding the each word to max word size of that sentence
            chars2_length = [len(c) for c in chars2]
            char_maxl = max(chars2_length)
            chars2_mask = np.zeros((len(chars2_length), char_maxl), dtype='int')
            for i, c in enumerate(chars2):
                chars2_mask[i, :chars2_length[i]] = c
            chars2_mask = Variable(torch.LongTensor(chars2_mask))

        dwords = Variable(torch.LongTensor(data['words']))
        
        # We are getting the predicted output from our model
        if use_gpu:
            val,out = model(dwords.cuda(), chars2_mask.cuda(), chars2_length, d)
        else:
            val,out = model(dwords, chars2_mask, chars2_length, d)
        predicted_id = out
    
        
        # We use the get chunks function defined above to get the true chunks
        # and the predicted chunks from true labels and predicted labels respectively
        lab_chunks      = set(get_chunks(ground_truth_id,tag_to_id))
        lab_pred_chunks = set(get_chunks(predicted_id,
                                         tag_to_id))

        # Updating the count variables
        correct_preds += len(lab_chunks & lab_pred_chunks)
        total_preds   += len(lab_pred_chunks)
        total_correct += len(lab_chunks)
    
    # Calculating the F1-Score
    p   = correct_preds / total_preds if correct_preds > 0 else 0
    r   = correct_preds / total_correct if correct_preds > 0 else 0
    new_F  = 2 * p * r / (p + r) if correct_preds > 0 else 0

    print("{}: new_F: {} best_F: {} ".format(dataset,new_F,best_F))
    
    # If our current F1-Score is better than the previous best, we update the best
    # to current F1 and we set the flag to indicate that we need to checkpoint this model
    
    if new_F>best_F:
        best_F=new_F
        save=True

    return best_F, new_F, save

In [39]:
def adjust_learning_rate(optimizer, lr):
    """
    shrink learning rate
    """
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [48]:
parameters['reload']=False
print(parameters['reload'])
if not parameters['reload']:
    tr = time.time()
    model.train(True)
    for epoch in range(1,number_of_epochs):
        for i, index in enumerate(np.random.permutation(len(train_data))):
            count += 1
            data = train_data[index]

            ##gradient updates for each data entry
            model.zero_grad()

            sentence_in = data['words']
            sentence_in = Variable(torch.LongTensor(sentence_in))
            tags = data['tags']
            chars2 = data['chars']
            
            if parameters['char_mode'] == 'LSTM':
                chars2_sorted = sorted(chars2, key=lambda p: len(p), reverse=True)
                d = {}
                for i, ci in enumerate(chars2):
                    for j, cj in enumerate(chars2_sorted):
                        if ci == cj and not j in d and not i in d.values():
                            d[j] = i
                            continue
                chars2_length = [len(c) for c in chars2_sorted]
                char_maxl = max(chars2_length)
                chars2_mask = np.zeros((len(chars2_sorted), char_maxl), dtype='int')
                for i, c in enumerate(chars2_sorted):
                    chars2_mask[i, :chars2_length[i]] = c
                chars2_mask = Variable(torch.LongTensor(chars2_mask))
            
            if parameters['char_mode'] == 'CNN':

                d = {}

                ## Padding the each word to max word size of that sentence
                chars2_length = [len(c) for c in chars2]
                char_maxl = max(chars2_length)
                chars2_mask = np.zeros((len(chars2_length), char_maxl), dtype='int')
                for i, c in enumerate(chars2):
                    chars2_mask[i, :chars2_length[i]] = c
                chars2_mask = Variable(torch.LongTensor(chars2_mask))


            targets = torch.LongTensor(tags)

            #we calculate the negative log-likelihood for the predicted tags using the predefined function
            if use_gpu:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in.cuda(), targets.cuda(), chars2_mask.cuda(), chars2_length, d)
            else:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in, targets, chars2_mask, chars2_length, d)
            loss += neg_log_likelihood.data.item() / len(data['words'])
            neg_log_likelihood.backward()

            #we use gradient clipping to avoid exploding gradients
            torch.nn.utils.clip_grad_norm(model.parameters(), gradient_clip)
            optimizer.step()

            #Storing loss
            if count % plot_every == 0:
                loss /= plot_every
                print(count, ': ', loss)
                if losses == []:
                    losses.append(loss)
                losses.append(loss)
                loss = 0.0

            #Evaluating on Train, Test, Dev Sets
            if count % (eval_every) == 0 and count > (eval_every * 20) or \
                    count % (eval_every*4) == 0 and count < (eval_every * 20):
                model.train(False)
                best_train_F, new_train_F, _ = evaluating(model, train_data, best_train_F,"Train")
                best_dev_F, new_dev_F, save = evaluating(model, dev_data, best_dev_F,"Dev")
                if save:
                    print("Saving Model to ", model_name)
                    torch.save(model.state_dict(), model_name)
                best_test_F, new_test_F, _ = evaluating(model, test_data, best_test_F,"Test")

                all_F.append([new_train_F, new_dev_F, new_test_F])
                model.train(True)

            #Performing decay on the learning rate
            if count % len(train_data) == 0:
                adjust_learning_rate(optimizer, lr=learning_rate/(1+decay_rate*count/len(train_data)))

    print(time.time() - tr)
    plt.plot(losses)
    plt.show()

if not parameters['reload']:
    #reload the best model saved from training
    model.load_state_dict(torch.load(model_name))

False
87.80767822265625




7.9561381340026855
18.61897850036621
24.919023513793945
3.7748756408691406
8.985269546508789
9.748687744140625
17.36376190185547
4.529439449310303
4.638898849487305
17.183273315429688
1.8226242065429688
15.063735961914062
0.2176361083984375
0.6781826019287109
2.5326080322265625
0.27143096923828125
1.052703857421875
1.9632568359375
0.05487060546875
0.020263671875
4.232006072998047
3.1320362091064453
15.813995361328125
0.1656494140625
6.8653564453125
0.137847900390625
0.00183868408203125
0.11017131805419922
0.02655029296875
0.04685497283935547
3.68707275390625
1.844512939453125
0.08181381225585938
0.41595458984375
1.0049209594726562
0.01294708251953125
3.474609375
0.41431427001953125
0.12554931640625
0.75091552734375
0.5019989013671875
0.03411865234375
5.0321044921875
3.3068313598632812
1.6367340087890625
0.08297348022460938
0.00588226318359375
0.26837158203125
0.0624542236328125
0.009319305419921875
0.075958251953125
1.844512939453125
0.3352508544921875
0.1360931396484375
0.024398803710

0.04771614074707031
0.0025482177734375
0.25464630126953125
0.019277572631835938
0.457611083984375
6.103515625e-05
1.707550048828125
2.4145660400390625
0.0304107666015625
0.003917694091796875
0.0011749267578125
0.070526123046875
0.028045654296875
0.0003204345703125
2.288818359375e-05
1.2384147644042969
0.69354248046875
0.0098876953125
0.0002288818359375
0.00710296630859375
0.0012054443359375
5.204864501953125
0.1700439453125
0.00736236572265625
0.08721923828125
3.367919921875
0.002094268798828125
0.63055419921875
0.007965087890625
0.0348968505859375
5.51190185546875
0.00067901611328125
0.0037384033203125
13.913511276245117
0.03338623046875
0.017852783203125
0.0004730224609375
0.0162353515625
0.06976318359375
0.0
0.01580810546875
0.00531768798828125
0.146728515625
0.463958740234375
1.52587890625e-05
0.05744171142578125
3.274871826171875
0.00109100341796875
0.001373291015625
0.000762939453125
0.02325439453125
0.35748291015625
0.0084228515625
0.0037994384765625
0.002197265625
0.34188842773

0.022705078125
0.02130126953125
0.061107635498046875
0.003635406494140625
0.00115966796875
0.0
0.5718841552734375
0.026641845703125
0.19359779357910156
0.00215911865234375
0.005889892578125
0.0024871826171875
0.000823974609375
3.814697265625e-06
0.00025177001953125
-3.814697265625e-06
0.05579376220703125
0.0115509033203125
0.004486083984375
6.103515625e-05
8.392333984375e-05
4.9591064453125e-05
0.073455810546875
0.02294921875
0.0030975341796875
0.0008087158203125
0.00457763671875
9.94830322265625
0.038604736328125
11.829132080078125
0.45654296875
0.32305908203125
0.00152587890625
0.0342254638671875
1.67205810546875
0.005462646484375
0.86846923828125
0.241851806640625
0.0006103515625
0.8521575927734375
0.17052459716796875
7.2479248046875e-05
0.000102996826171875
0.002471923828125
1.52587890625e-05
0.01019287109375
0.00011444091796875
0.10723876953125
0.0033416748046875
2.133575439453125
0.4849853515625
0.0006866455078125
0.1689910888671875
2.288818359375e-05
0.2472076416015625
1.8508758

0.0001220703125
0.000732421875
1.52587890625e-05
0.0069580078125
6.103515625e-05
0.0004425048828125
0.00115966796875
0.0684814453125
0.45847320556640625
0.44671630859375
0.38750457763671875
0.009033203125
0.000820159912109375
0.001922607421875
0.00787353515625
6.103515625e-05
0.033935546875
-3.814697265625e-06
0.000179290771484375
0.00079345703125
0.00030517578125
0.036464691162109375
6.41375732421875
0.995361328125
0.00135040283203125
0.00103759765625
1.52587890625e-05
0.00128173828125
0.0031414031982421875
0.00058746337890625
0.0043182373046875
0.012115478515625
0.00861358642578125
0.008514404296875
0.0045166015625
0.0050201416015625
2.1689834594726562
0.14907073974609375
0.15560150146484375
0.00020599365234375
0.36248779296875
3.814697265625e-06
5.7220458984375e-06
0.0001373291015625
0.000125885009765625
0.004974365234375
0.00921630859375
5.340576171875e-05
0.00323486328125
0.0389404296875
8.392333984375e-05
0.025163650512695312
0.07582855224609375
9.1552734375e-05
0.128955841064453

0.00016021728515625
0.00726318359375
0.0172119140625
0.00066375732421875
0.00058746337890625
0.008758544921875
0.00052642822265625
0.0002040863037109375
0.01409912109375
0.5272674560546875
0.0022735595703125
0.0004425048828125
0.00727081298828125
0.0003204345703125
0.008270263671875
0.020751953125
0.000476837158203125
0.0101470947265625
0.0010356903076171875
0.013397216796875
0.178375244140625
0.2149658203125
0.00054931640625
1.59124755859375
0.0127716064453125
0.0
0.002147674560546875
0.010486602783203125
1.52587890625e-05
0.066619873046875
0.06231689453125
0.03912353515625
0.08475494384765625
0.0002899169921875
0.01157379150390625
9.5367431640625e-07
0.000102996826171875
0.00959014892578125
-1.52587890625e-05
1.1444091796875e-05
0.000244140625
0.039852142333984375
0.00579833984375
0.00677490234375
0.0
0.0002593994140625
0.0005035400390625
0.0
0.0008697509765625
9.1552734375e-05
2.188507080078125
0.003082275390625
0.00018310546875
0.0050506591796875
0.0005645751953125
1.76524353027343

0.0
0.065673828125
0.22951507568359375
0.0064544677734375
0.000728607177734375
0.002655029296875
1.52587890625e-05
2.050567626953125
3.0517578125e-05
0.026592254638671875
0.0443115234375
1.52587890625e-05
1.52587890625e-05
0.00189208984375
0.001495361328125
0.00018310546875
0.000507354736328125
0.0
0.0234832763671875
0.0
0.0003662109375
1.33514404296875e-05
1.83392333984375
-7.62939453125e-06
0.0127716064453125
0.00037384033203125
0.16254615783691406
0.000213623046875
0.0022430419921875
0.0001220703125
0.02704620361328125
0.015594482421875
0.00026702880859375
0.000244140625
0.009429931640625
5.340576171875e-05
0.00012969970703125
0.351318359375
0.8974838256835938
0.0277862548828125
2.339000701904297
0.00408935546875
0.0001373291015625
0.0
0.0574951171875
0.00030517578125
0.0
1.52587890625e-05
1.52587890625e-05
0.0015869140625
0.000331878662109375
0.001068115234375
0.1408538818359375
0.14829254150390625
0.0001983642578125
0.47430419921875
0.0001277923583984375
0.10552978515625
0.0037918

0.001708984375
7.62939453125e-05
0.0002384185791015625
0.0048675537109375
0.01202392578125
0.0001983642578125
7.250732421875
0.0003204345703125
0.4305419921875
0.000244140625
0.021759033203125
8.392333984375e-05
0.0078277587890625
0.1536865234375
0.00244140625
0.866302490234375
0.036834716796875
0.022735595703125
0.0001373291015625
0.01837158203125
0.0003662109375
0.0001220703125
0.58880615234375
0.000701904296875
0.0054779052734375
3.814697265625e-05
0.000732421875
0.0104827880859375
0.0001220703125
0.0005035400390625
9.1552734375e-05
0.0076904296875
0.025577545166015625
0.326690673828125
0.09295654296875
9.5367431640625e-06
0.020313262939453125
2.04180908203125
0.02899169921875
0.37419891357421875
0.04727602005004883
0.00238037109375
0.85882568359375
0.16033172607421875
0.08331298828125
0.015905380249023438
0.007175445556640625
4.57763671875e-05
0.0067138671875
0.11428451538085938
0.01908111572265625
0.0004825592041015625
9.1552734375e-05
0.0009937286376953125
4.545232772827148
9.918

0.000274658203125
0.056365966796875
1.52587890625e-05
0.0001220703125
0.000732421875
0.015289306640625
1.4576416015625
0.057281494140625
9.98492431640625
0.0001659393310546875
0.00018310546875
0.010530471801757812
-7.62939453125e-06
0.28607177734375
0.10453414916992188
3.0517578125e-05
0.0001220703125
1.52587890625e-05
0.005126953125
7.62939453125e-05
2.288818359375e-05
0.44500732421875
9.128959655761719
0.9647216796875
0.375457763671875
0.0535888671875
0.003204345703125
0.0014905929565429688
0.00030517578125
0.0224609375
1.52587890625e-05
0.0009765625
1.15234375
0.001102447509765625
0.0013885498046875
3.4332275390625e-05
0.001190185546875
0.692718505859375
0.103424072265625
0.000171661376953125
0.0419921875
0.021514892578125
1.52587890625e-05
0.00750732421875
0.39703369140625
0.4263572692871094
0.028031349182128906
0.0010528564453125
0.017608642578125
0.01763916015625
-1.52587890625e-05
0.10314178466796875
0.0
0.007293701171875
3.4332275390625e-05
0.00018310546875
0.0504150390625
0.00

0.00019073486328125
1.558441162109375
0.015738487243652344
0.0001220703125
0.01641845703125
0.0
0.00829315185546875
0.000457763671875
0.000244140625
0.0001373291015625
0.0013427734375
0.0002899169921875
0.0071563720703125
0.001220703125
3.814697265625e-06
0.1551361083984375
0.0097503662109375
0.3346061706542969
0.0064697265625
0.034515380859375
0.014892578125
0.000995635986328125
0.0006256103515625
0.0101318359375
0.0029296875
9.1552734375e-05
0.006500244140625
0.000274658203125
0.00048828125
4.188133239746094
0.000579833984375
2.288818359375e-05
0.00146484375
1.52587890625e-05
0.000274658203125
0.0004119873046875
0.000457763671875
4.38690185546875e-05
0.00360107421875
0.0418701171875
0.00042724609375
0.012409210205078125
0.00444793701171875
1.720123291015625
3.0517578125e-05
4.57763671875e-05
3.0517578125e-05
0.00091552734375
1.52587890625e-05
0.0001373291015625
0.077972412109375
3.0517578125e-05
0.000782012939453125
0.00592041015625
0.1132965087890625
-7.62939453125e-06
0.00013351440

0.27911376953125
0.4187469482421875
0.0026397705078125
0.0006103515625
0.37957763671875
0.076568603515625
0.0042266845703125
9.1552734375e-05
0.018096923828125
0.005096435546875
0.7826766967773438
9.1552734375e-05
1.52587890625e-05
0.021697998046875
0.0004730224609375
0.0007781982421875
0.009746551513671875
10.546205520629883
0.07003021240234375
0.000640869140625
0.000213623046875
4.57763671875e-05
0.0030040740966796875
-6.103515625e-05
2.288818359375e-05
0.000823974609375
0.16254425048828125
0.0017547607421875
0.000728607177734375
0.0138092041015625
0.206024169921875
0.0262451171875
3.0517578125e-05
2.528717041015625
0.024139404296875
0.531982421875
21.971725463867188
9.738929748535156
0.0059814453125
0.161346435546875
0.047607421875
0.091827392578125
0.00018310546875
0.0
0.00038909912109375
0.000701904296875
0.19670867919921875
0.747100830078125
0.0075511932373046875
0.03741455078125
0.0002288818359375
-7.62939453125e-06
2.288818359375e-05
0.0061187744140625
1.52587890625e-05
0.00018

13.843048095703125
0.0111083984375
0.000823974609375
1.52587890625e-05
0.783294677734375
0.0001220703125
0.00042724609375
0.00225830078125
0.00534820556640625
0.001129150390625
0.0018463134765625
0.0001068115234375
2.288818359375e-05
0.282989501953125
2.288818359375e-05
0.00970458984375
0.174102783203125
2.288818359375e-05
0.07167816162109375
0.00390625
0.00079345703125
0.00067138671875
0.012481689453125
12.83984375
0.0007476806640625
0.0113525390625
0.00518798828125
0.0
0.0
0.00213623046875
0.0
0.0078582763671875
0.000640869140625
0.0005283355712890625
0.048828125
0.000152587890625
0.0779266357421875
0.0001373291015625
8.392333984375e-05
1.1829376220703125
0.01922607421875
0.00128173828125
0.0064697265625
0.9708251953125
4.57763671875e-05
0.088165283203125
0.0
0.001373291015625
6.866455078125e-05
0.0108642578125
0.000274658203125
7.62939453125e-06
0.013031005859375
4.857421875
0.0408935546875
0.0001678466796875
0.0841522216796875
0.0135955810546875
0.001068115234375
0.0
5.912780761718

0.000579833984375
3.0517578125e-05
5.9288330078125
-3.0517578125e-05
0.000885009765625
0.0274810791015625
0.000152587890625
-3.0517578125e-05
0.00083160400390625
0.076995849609375
0.0745697021484375
0.0
0.003997802734375
2.76605224609375
0.0002288818359375
0.366943359375
1.182586669921875
0.0001983642578125
0.0050048828125
0.01410675048828125
0.0002593994140625
0.0
0.015869140625
1.52587890625e-05
0.026399612426757812
0.005512237548828125
0.0005035400390625
0.00131988525390625
0.06437110900878906
0.010772705078125
0.0291748046875
0.0003662109375
6.103515625e-05
0.002166748046875
0.0
0.518157958984375
-7.62939453125e-06
0.001068115234375
0.00012969970703125
2.288818359375e-05
0.0
3.0517578125e-05
0.000885009765625
0.0024566650390625
0.4165802001953125
0.447509765625
0.00152587890625
0.6793212890625
0.003864288330078125
-0.0001220703125
0.014434814453125
0.000762939453125
0.11798095703125
0.00014495849609375
-3.0517578125e-05
0.00079345703125
0.000141143798828125
0.019775390625
1.0451908

0.016021728515625
0.03924560546875
0.000762939453125
3.0517578125e-05
2.288818359375e-05
0.002960205078125
1.52587890625e-05
0.0003662109375
0.0
4.309070587158203
4.57763671875e-05
0.03936767578125
3.0517578125e-05
0.00164794921875
0.11031723022460938
0.001720428466796875
2.288818359375e-05
-6.103515625e-05
-3.0517578125e-05
0.01153564453125
0.023223876953125
0.000946044921875
0.0108795166015625
1.52587890625e-05
-4.57763671875e-05
0.0003204345703125
3.694671630859375
2.34075927734375
1.1278076171875
0.00115966796875
0.00146484375
0.001220703125
0.033840179443359375
6.103515625e-05
1.52587890625e-05
1.52587890625e-05
0.0003662109375
7.62939453125e-06
0.01461029052734375
0.00109100341796875
0.000457763671875
4.9591064453125e-05
0.0167694091796875
3.0517578125e-05
0.0013427734375
0.000946044921875
0.012054443359375
9.1552734375e-05
0.0
0.000274658203125
3.0517578125e-05
0.0001373291015625
1.9066162109375
1.52587890625e-05
3.0517578125e-05
0.0013580322265625
-0.0001220703125
0.31793212890

2.6702880859375e-05
0.0006103515625
-0.0001220703125
0.015167236328125
0.000335693359375
9.1552734375e-05
0.00262451171875
0.003875732421875
0.0420684814453125
0.7166290283203125
6.866455078125e-05
0.012542724609375
1.1444091796875e-05
1.52587890625e-05
-3.814697265625e-06
4.648307800292969
2.09808349609375e-05
1.755615234375
39.874755859375
0.05670166015625
0.002838134765625
1.52587890625e-05
10.511795043945312
0.0015106201171875
0.0126953125
0.850341796875
0.02463531494140625
0.002471923828125
2.47705078125
0.0005340576171875
0.011199951171875
0.00537109375
0.0059356689453125
0.000213623046875
0.01263427734375
0.00685882568359375
0.0032958984375
7.62939453125e-05
0.020965576171875
2.711181640625
0.0
0.2780303955078125
-3.0517578125e-05
-1.52587890625e-05
0.922271728515625
5.340576171875e-05
0.011016845703125
0.0015411376953125
0.078857421875
1.3199462890625
1.52587890625e-05
-1.52587890625e-05
1.4021530151367188
0.002048492431640625
0.014251708984375
9.1552734375e-05
-3.0517578125e-0

0.00693511962890625
-3.0517578125e-05
0.03546142578125
3.0517578125e-05
3.6964111328125
0.0116424560546875
0.064117431640625
0.0001678466796875
3.0517578125e-05
0.0001964569091796875
0.0203857421875
0.0
3.814697265625e-06
0.3944244384765625
9.1552734375e-05
-3.0517578125e-05
0.021026611328125
0.01385498046875
0.0009002685546875
0.0004119873046875
0.049072265625
5.340576171875e-05
9.1552734375e-05
0.015380859375
0.01708984375
-1.52587890625e-05
0.000274658203125
0.00091552734375
-7.62939453125e-06
0.0
0.0012359619140625
0.0001220703125
0.0003204345703125
0.00141143798828125
0.000125885009765625
1.4542160034179688
0.017578125
0.002044677734375
0.000579833984375
0.0210113525390625
0.006103515625
6.103515625e-05
0.001613616943359375
0.0
0.464447021484375
0.0022125244140625
3.0517578125e-05
0.0002288818359375
0.00011444091796875
8.392333984375e-05
3.0517578125e-05
0.0030670166015625
0.0001983642578125
0.2391357421875
0.001140594482421875
7.62939453125e-06
0.0496826171875
6.103515625e-05
0.0

-0.0001220703125
0.002227783203125
2.302703857421875
0.000274658203125
1.52587890625e-05
-0.0001220703125
0.0031185150146484375
0.2093353271484375
0.05435943603515625
7.62939453125e-06
0.0
0.00110626220703125
-7.62939453125e-06
3.0517578125e-05
1.01373291015625
0.0015869140625
0.00075531005859375
1.52587890625e-05
0.00177001953125
0.02288818359375
-1.52587890625e-05
0.03521728515625
0.0001678466796875
8.0108642578125e-05
0.0013275146484375
0.131439208984375
0.0013427734375
0.0023193359375
4.57763671875e-05
0.01213836669921875
0.001674652099609375
0.000110626220703125
0.00604248046875
0.0
0.007354736328125
1.52587890625e-05
1.778289794921875
0.0074310302734375
0.000270843505859375
0.0001983642578125
0.000576019287109375
0.0001068115234375
0.0112152099609375
0.00048828125
0.0
0.00373077392578125
0.01096343994140625
0.0
0.84429931640625
0.043853759765625
6.103515625e-05
0.02085113525390625
2.0897216796875
0.00012969970703125
0.04754638671875
0.00582122802734375
0.25238037109375
4.57763671

0.0
0.00054931640625
7.62939453125e-06
7.6268463134765625
0.00030517578125
0.000244140625
10.75909423828125
0.2691650390625
0.060882568359375
-1.52587890625e-05
0.0067138671875
1.52587890625e-05
7.62939453125e-06
2.288818359375e-05
5.110649108886719
0.0093994140625
0.0201873779296875
0.0899658203125
-1.52587890625e-05
0.004058837890625
0.526397705078125
0.08331298828125
0.000213623046875
0.00836944580078125
0.001617431640625
0.28753662109375
0.000164031982421875
0.00211334228515625
0.0060882568359375
0.01202392578125
0.13531494140625
0.0
0.0010986328125
0.002410888671875
0.6737289428710938
1.33514404296875e-05
0.0003662109375
0.000701904296875
-0.0001220703125
0.0
0.0155181884765625
0.0
3.0517578125e-05
0.03084564208984375
0.0001220703125
1.52587890625e-05
0.0018768310546875
1.52587890625e-05
0.004077911376953125
0.267822265625
0.0198974609375
8.392333984375e-05
0.5351333618164062
0.000244140625
1.30010986328125
0.2513427734375
0.0403594970703125
0.00299072265625
0.05163145065307617
9.

0.001220703125
0.00124359130859375
0.0001068115234375
0.00048828125
0.001373291015625
0.000732421875
0.000244140625
8.20159912109375e-05
11.456039428710938
9.404876708984375
0.125762939453125
8.536994934082031
7.26519775390625
0.0296630859375
0.0
0.00023651123046875
0.113128662109375
0.00018310546875
0.0
0.0302886962890625
0.0034637451171875
0.0
0.005828857421875
2.2277450561523438
0.001739501953125
6.866455078125e-05
2.09808349609375e-05
0.0
0.00018310546875
0.001434326171875
5.7220458984375e-05
6.103515625e-05
0.00030517578125
9.1552734375e-05
3.814697265625e-05
0.276641845703125
0.004669189453125
0.91802978515625
0.00048828125
0.00091552734375
3.149169921875
0.223663330078125
0.15451812744140625
0.00640869140625
0.0001220703125
-7.62939453125e-06
12.6453857421875
0.0101776123046875
3.0517578125e-05
0.0021209716796875
2.367218017578125
0.005584716796875
0.0027313232421875
0.01177978515625
5.7220458984375e-05
0.025054931640625
0.001373291015625
0.0079193115234375
-3.0517578125e-05
0.0

6.103515625e-05
0.0
0.0
0.0
0.0001983642578125
0.00457763671875
0.0
0.001617431640625
0.000370025634765625
0.49603271484375
0.0
0.0
0.00030517578125
-3.0517578125e-05
0.000518798828125
0.0
0.0001983642578125
0.781494140625
-1.52587890625e-05
0.00028228759765625
3.0517578125e-05
0.9775390625
0.01837158203125
0.00128173828125
7.000213623046875
0.0009918212890625
0.29180908203125
0.5893287658691406
7.62939453125e-06
0.05517578125
0.019561767578125
0.18810272216796875
3.814697265625e-06
0.0004425048828125
0.00052642822265625
0.0002422332763671875
6.103515625e-05
0.00762939453125
0.0908203125
1.52587890625e-05
0.00018310546875
-1.52587890625e-05
0.017913818359375
0.0
0.10272407531738281
0.0
0.00066375732421875
3.0517578125e-05
0.0
0.0001220703125
9.1552734375e-05
0.0001983642578125
3.0517578125e-05
1.183837890625
0.0074005126953125
0.001575469970703125
0.134521484375
0.0
1.52587890625e-05
2.8339080810546875
0.23392486572265625
0.43927001953125
0.7353515625
0.00661468505859375
2.214477539062

6.67572021484375e-06
8.58306884765625e-05
6.103515625e-05
0.171630859375
4.57763671875e-05
3.0517578125e-05
-0.0001220703125
4.76837158203125e-05
0.0
-6.103515625e-05
7.079498291015625
0.003025054931640625
0.0
-0.0001220703125
0.00152587890625
3.0517578125e-05
0.00067138671875
2.288818359375e-05
1.28240966796875
7.82012939453125e-05
3.0517578125e-05
0.008327484130859375
1.52587890625e-05
0.029815673828125
0.2484130859375
1.7943115234375
0.0
0.000823974609375
0.032196044921875
0.116546630859375
0.000530242919921875
7.62939453125e-06
1.7783203125
0.0017852783203125
2.288818359375e-05
7.62939453125e-06
0.000396728515625
0.0
1.68511962890625
1.52587890625e-05
1.52587890625e-05
0.01593017578125
0.00396728515625
-7.62939453125e-06
1.52587890625e-05
0.00054931640625
0.0
1.52587890625e-05
0.1402435302734375
3.0517578125e-05
0.5078277587890625
4.7877197265625
0.0002536773681640625
-3.0517578125e-05
0.000274658203125
7.9654951095581055
0.0
0.006683349609375
0.00299072265625
4.57763671875e-05
-1.

0.072784423828125
4.253257751464844
0.0003662109375
19.96319580078125
0.002101898193359375
0.00156402587890625
-6.103515625e-05
0.0
-6.103515625e-05
0.0006103515625
13.74053955078125
0.004241943359375
0.000579833984375
0.1043701171875
7.62939453125e-06
0.0
0.002288818359375
-3.0517578125e-05
0.000213623046875
1.52587890625e-05
0.00563812255859375
0.000274658203125
0.0006103515625
-3.814697265625e-06
0.00019073486328125
0.009552001953125
-1.52587890625e-05
0.005126953125
0.000701904296875
0.03403472900390625
0.00710296630859375
0.00042724609375
0.0003814697265625
0.00042724609375
5.166229248046875
0.01461029052734375
0.006988525390625
5.27703857421875
3.0517578125e-05
0.0205078125
6.340057373046875
0.0175323486328125
0.518798828125
0.00049591064453125
0.000244140625
19.688720703125
0.0
3.0517578125e-05
3.0517578125e-05
0.0623779296875
0.0027923583984375
8.392333984375e-05
0.164154052734375
0.0003509521484375
0.00099945068359375
0.0002288818359375
0.0001068115234375
1.5904502868652344
0.

9.5367431640625e-07
0.02516937255859375
0.000377655029296875
0.00146484375
-1.52587890625e-05
2.288818359375e-05
7.62939453125e-06
0.0
0.00030517578125
0.000244140625
0.000213623046875
0.017181396484375
-1.52587890625e-05
0.0
3.814697265625e-06
0.000152587890625
0.000244140625
0.0
6.103515625e-05
0.016632080078125
7.62939453125e-06
0.0002079010009765625
0.3541259765625
0.0
5.7220458984375e-05
0.00030517578125
3.0517578125e-05
0.000518798828125
0.5675048828125
0.00018310546875
0.0006866455078125
-0.0001220703125
2.203948974609375
4.57763671875e-05
0.00079345703125
4.57763671875e-05
0.000762939453125
0.0
0.05474853515625
6.103515625e-05
-7.62939453125e-06
1.52587890625e-05
1.52587890625e-05
3.0517578125e-05
0.0
0.10821533203125
1.52587890625e-05
0.1309814453125
0.002391815185546875
0.018524169921875
0.000274658203125
1.52587890625e-05
-6.103515625e-05
0.0
0.000152587890625
0.0038604736328125
0.0003814697265625
0.00262451171875
0.0
0.02886962890625
-1.52587890625e-05
0.00079345703125
7.62

3.814697265625e-06
0.01839447021484375
0.0296630859375
0.0
0.0687103271484375
0.007293701171875
0.20677566528320312
0.09989166259765625
17.7589111328125
0.00775146484375
0.02581787109375
0.0002288818359375
1.9073486328125e-06
13.431427001953125
0.000640869140625
0.00012969970703125
2.390838623046875
6.103515625e-05
0.0
0.003265380859375
0.291748046875
0.0062580108642578125
0.0002422332763671875
6.103515625e-05
0.07904052734375
0.00457763671875
0.00022125244140625
7.62939453125e-06
1.52587890625e-05
0.001953125
0.0
1.52587890625e-05
0.1712646484375
0.9154052734375
-6.103515625e-05
0.0
0.005615234375
0.0078125
0.001331329345703125
5.91278076171875e-05
3.814697265625e-05
0.000335693359375
0.000152587890625
0.0003662109375
0.00091552734375
13.045623779296875
0.0001068115234375
0.000980377197265625
6.4849853515625e-05
0.870849609375
0.0009765625
0.00299072265625
0.000579833984375
0.000152587890625
0.0010166168212890625
0.000457763671875
0.502471923828125
0.0023651123046875
0.085372924804687

0.0
1.52587890625e-05
1.52587890625e-05
0.0054168701171875
-9.1552734375e-05
0.0078125
0.0
3.0517578125e-05
0.002899169921875
0.00054931640625
0.01251220703125
2.3327484130859375
0.06207275390625
0.00030517578125
9.1552734375e-05
0.0
9.173553466796875
0.00128173828125
0.9189453125
0.0
-1.52587890625e-05
0.01861572265625
-1.52587890625e-05
6.103515625e-05
9.1552734375e-05
0.0026092529296875
0.04686880111694336
-3.0517578125e-05
0.000213623046875
7.62939453125e-06
-3.814697265625e-06
0.4752349853515625
0.01068115234375
0.06329345703125
3.0517578125e-05
6.103515625e-05
0.2337646484375
0.024932861328125
0.970367431640625
0.0118408203125
0.00209808349609375
0.002227783203125
0.107666015625
3.814697265625e-06
4.9591064453125e-05
0.0
0.0008392333984375
0.022979736328125
0.8820648193359375
0.00103759765625
6.103515625e-05
0.0
0.10262298583984375
0.10620498657226562
-7.62939453125e-06
0.0166168212890625
0.0001220703125
-1.52587890625e-05
3.814697265625e-06
0.00177001953125
0.0003662109375
0.000

0.027008056640625
1.52587890625e-05
0.001712799072265625
0.01326751708984375
0.000579833984375
0.94195556640625
0.00054931640625
3.814697265625e-06
0.0002899169921875
0.00030517578125
1.2344970703125
0.0019378662109375
0.00140380859375
4.57763671875e-05
1.1444091796875e-05
7.62939453125e-06
0.0008544921875
0.000335693359375
-7.62939453125e-06
-7.62939453125e-06
5.340576171875e-05
0.00019073486328125
0.0
5.373931884765625
8.822052001953125
0.000701904296875
6.103515625e-05
0.0001220703125
0.001194000244140625
6.103515625e-05
0.0361785888671875
0.10015869140625
0.0
7.62939453125e-06
0.7666397094726562
0.955047607421875
0.18243408203125
3.0517578125e-05
0.0214691162109375
0.000156402587890625
0.0001983642578125
0.02681732177734375
6.103515625e-05
0.0
0.00048828125
7.62939453125e-06
1.52587890625e-05
0.0007781982421875
0.0
0.001617431640625
-3.0517578125e-05
0.00099945068359375
0.001068115234375
3.814697265625e-06
0.0139007568359375
0.000110626220703125
1.52587890625e-05
0.000274658203125


0.00225830078125
1.85101318359375
7.62939453125e-06
0.000335693359375
4.158447265625
0.0
-3.0517578125e-05
0.012908935546875
0.00434112548828125
0.00146484375
0.00018310546875
-0.00018310546875
0.00885009765625
0.0
3.0517578125e-05
0.001129150390625
0.00026702880859375
6.5804443359375
-0.0001220703125
0.008392333984375
0.00555419921875
4.57763671875e-05
0.146484375
0.124847412109375
0.180023193359375
0.0029144287109375
0.00196075439453125
0.00225830078125
1.8806037902832031
6.103515625e-05
0.0001678466796875
0.025844573974609375
0.09588623046875
0.04266357421875
0.00018310546875
0.000152587890625
4.116058349609375
-9.1552734375e-05
0.000244140625
9.1552734375e-05
0.3365478515625
0.00018310546875
0.0133056640625
1.16949462890625
0.00140380859375
3.814697265625e-05
0.47637939453125
0.0
0.0008544921875
0.00103759765625
0.24072265625
0.000518798828125
0.00018310546875
0.03179931640625
0.000518798828125
0.031585693359375
-3.0517578125e-05
0.077789306640625
0.0450592041015625
0.0128784179687

0.0
4.439453125
7.62939453125e-06
0.0
0.0
7.62939453125e-06
-3.0517578125e-05
0.00018310546875
1.52587890625e-05
0.5994873046875
0.20147705078125
0.0
-3.0517578125e-05
-7.62939453125e-06
0.112152099609375
0.0089111328125
0.011199951171875
0.00016021728515625
0.575469970703125
0.0001316070556640625
3.966064453125
0.37127685546875
0.0072021484375
-6.103515625e-05
0.0
0.0015869140625
0.0009002685546875
0.0
0.1015167236328125
0.0
0.0
1.52587890625e-05
3.96746826171875
3.0517578125e-05
0.0011444091796875
0.23345947265625
6.103515625e-05
0.0
-6.103515625e-05
0.6936492919921875
6.103515625e-05
1.52587890625e-05
0.706817626953125
0.0
6.103515625e-05
4.57763671875e-05
0.037689208984375
0.001678466796875
0.93402099609375
0.000152587890625
3.1868896484375
0.0
1.52587890625e-05
0.0007781982421875
-7.62939453125e-06
1.52587890625e-05
-6.103515625e-05
7.62939453125e-06
-3.0517578125e-05
0.16571044921875
0.00018310546875
0.000885009765625
0.03301239013671875
2.84417724609375
0.000102996826171875
-3.0

0.0
6.103515625e-05
6.103515625e-05
0.022705078125
0.0437164306640625
0.00018310546875
0.0066986083984375
0.006683349609375
1.470367431640625
0.00103759765625
0.0407867431640625
6.103515625e-05
0.0092010498046875
0.024503707885742188
0.0001068115234375
0.299896240234375
0.0001068115234375
0.02105712890625
4.57763671875e-05
0.00287628173828125
0.0001220703125
0.00067138671875
0.0
1.52587890625e-05
-3.0517578125e-05
-7.62939453125e-06
0.002899169921875
0.0030517578125
2.2716064453125
0.00286865234375
0.0140380859375
3.814697265625e-06
0.000213623046875
6.534446716308594
0.019439697265625
3.26263427734375
3.0517578125e-05
-7.62939453125e-06
7.62939453125e-06
0.00018310546875
0.316253662109375
0.001495361328125
40.624671936035156
0.0023956298828125
0.673828125
0.000335693359375
1.52587890625e-05
0.0007476806640625
0.0
0.00243377685546875
0.04410552978515625
3.70166015625
-3.0517578125e-05
6.103515625e-05
8.508380889892578
0.001434326171875
0.00897216796875
0.001495361328125
0.0095520019531

2.773162841796875
0.00012969970703125
1.52587890625e-05
0.0
0.0267333984375
5.65704345703125
-4.57763671875e-05
0.2222900390625
1.8646240234375
0.407470703125
0.00213623046875
-6.103515625e-05
2.60693359375
0.011989593505859375
0.0001068115234375
6.103515625e-05
0.08026504516601562
0.0181427001953125
0.00022125244140625
3.2662353515625
0.0010986328125
1.67706298828125
7.62939453125e-06
0.27685546875
3.0517578125e-05
0.0
0.011199951171875
0.00128173828125
0.00128173828125
1.52587890625e-05
0.00067138671875
0.001251220703125
0.025054931640625
1.9073486328125e-06
3.0517578125e-05
6.103515625e-05
0.00335693359375
3.0517578125e-05
0.0003814697265625
0.1173095703125
0.01318359375
0.001708984375
0.0732269287109375
0.13665771484375
0.0001068115234375
0.021269798278808594
5.340576171875e-05
0.0001678466796875
0.0018310546875
0.0002117156982421875
0.0205230712890625
0.099700927734375
0.00079345703125
0.002410888671875
0.0034942626953125
0.0001068115234375
0.001312255859375
0.0559844970703125
0.0

-0.000152587890625
0.1072998046875
0.0018310546875
0.00145721435546875
0.02490234375
0.41290283203125
0.0
0.0042724609375
-6.103515625e-05
1.52587890625e-05
0.000152587890625
0.000579833984375
0.006744384765625
0.00052642822265625
7.62939453125e-06
0.0
0.33587646484375
11.045257568359375
7.62939453125e-06
-6.103515625e-05
-7.62939453125e-06
12.23773193359375
0.00189208984375
0.0
0.00377655029296875
0.00103759765625
0.02294921875
17.3284912109375
-1.52587890625e-05
5.340576171875e-05
0.05194091796875
0.0117340087890625
0.00079345703125
0.001708984375
-6.103515625e-05
0.00030517578125
0.0001068115234375
0.02893829345703125
0.0001983642578125
9.1552734375e-05
0.02313232421875
-0.00030517578125
1.8846664428710938
7.62939453125e-06
0.005218505859375
5.170654296875
0.5414581298828125
0.0029296875
0.022735595703125
-1.52587890625e-05
0.0
1.1444091796875e-05
0.00672149658203125
0.0498046875
0.0
3.814697265625e-05
7.62939453125e-05
0.398956298828125
0.007965087890625
0.0001068115234375
0.0
0.00

0.22808837890625
0.0
0.00885009765625
0.002899169921875
0.0
0.00054931640625
1.52587890625e-05
0.0001850128173828125
5.68194580078125
0.09033203125
0.003116607666015625
-1.52587890625e-05
0.001861572265625
0.00311279296875
0.16262054443359375
0.01324462890625
0.0001220703125
1.52587890625e-05
0.0011444091796875
0.01528167724609375
0.1628875732421875
0.0140380859375
0.0001983642578125
0.00579833984375
0.0001220703125
0.005054473876953125
1.52587890625e-05
0.008575439453125
0.000732421875
8.2406005859375
0.0
0.000244140625
0.078643798828125
0.00152587890625
0.20416259765625
1.4518661499023438
5.7220458984375e-06
0.0001220703125
0.000736236572265625
3.0517578125e-05
0.916961669921875
0.0010986328125
0.002288818359375
7.951446533203125
-3.0517578125e-05
0.06140899658203125
-3.0517578125e-05
1.52587890625e-05
0.0001010894775390625
0.00732421875
0.0
0.0001220703125
0.002422332763671875
0.006072998046875
3.0517578125e-05
0.0
3.0517578125e-05
0.0532379150390625
0.2006683349609375
0.03948974609

0.004119873046875
0.6865692138671875
0.0
0.0
1.52587890625e-05
0.0039825439453125
0.000244140625
0.6376152038574219
1.52587890625e-05
0.000606536865234375
0.0113067626953125
0.006317138671875
6.103515625e-05
0.0025482177734375
7.62939453125e-06
1.52587890625e-05
2.288818359375e-05
0.00034332275390625
0.1895751953125
0.10955810546875
0.35223388671875
0.0
0.0
0.0
6.103515625e-05
-3.0517578125e-05
0.0006103515625
2.288818359375e-05
0.00576019287109375
0.0
0.0
6.103515625e-05
0.00146484375
0.00022125244140625
0.0
-1.52587890625e-05
5.340576171875e-05
1.52587890625e-05
1.52587890625e-05
0.005859375
2.0531005859375
-6.103515625e-05
1.52587890625e-05
0.0
5.57977294921875
0.60479736328125
0.00030517578125
0.67889404296875
0.01023101806640625
9.1552734375e-05
3.0517578125e-05
0.004241943359375
9.1552734375e-05
0.065673828125
-3.0517578125e-05
0.0001678466796875
0.0
0.0
-0.00018310546875
0.036712646484375
9.72747802734375e-05
0.00018310546875
0.0008544921875
0.8751373291015625
0.000274658203125


0.0003662109375
0.760711669921875
0.00506591796875
0.01611328125
0.0244140625
3.656402587890625
0.096649169921875
0.0244903564453125
3.814697265625e-05
0.00799560546875
0.02813720703125
0.00030517578125
0.0
0.0001220703125
7.62939453125e-05
3.0517578125e-05
0.30072021484375
0.002109527587890625
-3.0517578125e-05
0.0009765625
4.57763671875e-05
3.8384132385253906
3.45062255859375
0.00030517578125
0.003936767578125
0.00046539306640625
1.1444091796875e-05
-7.62939453125e-06
0.0001220703125
0.10364341735839844
0.001312255859375
0.0012969970703125
0.02447509765625
0.00081634521484375
0.0038299560546875
0.000370025634765625
0.011016845703125
-6.103515625e-05
0.0
0.0
0.0010986328125
0.0028076171875
1.52587890625e-05
0.00030517578125
0.00152587890625
0.000240325927734375
3.0517578125e-05
0.000518798828125
3.449188232421875
2.4576416015625
0.0001220703125
0.0358734130859375
0.00445556640625
0.02459716796875
0.0029296875
0.04833984375
-3.814697265625e-06
0.001369476318359375
-7.62939453125e-06
0.

0.0
0.00164794921875
7.62939453125e-05
3.814697265625e-06
0.000152587890625
1.049163818359375
0.0
0.00152587890625
2.288818359375e-05
3.814697265625e-06
0.09173583984375
0.00067138671875
0.0
0.0
0.00341796875
0.0
0.0
1.52587890625e-05
0.00421142578125
0.000244140625
0.0002899169921875
0.0
0.77569580078125
0.00604248046875
5.7220458984375e-05
0.000484466552734375
0.092041015625
0.0
2.18634033203125
0.05881500244140625
0.11993408203125
0.00048828125
1.38043212890625
0.01123046875
-1.52587890625e-05
1.52587890625e-05
6.103515625e-05
0.07635498046875
0.0
3.0517578125e-05
0.261260986328125
3.802032470703125
0.00518798828125
0.09381103515625
0.0
0.00054931640625
0.0054168701171875
0.01911163330078125
0.0
9.1552734375e-05
0.0
3.0517578125e-05
3.4332275390625e-05
0.0
0.00197601318359375
0.01336669921875
7.62939453125e-05
0.0008087158203125
0.05884742736816406
0.00360107421875
0.1041259765625
1.52587890625e-05
0.5669021606445312
0.0092010498046875
0.07611083984375
-3.0517578125e-05
-9.155273437

6.103515625e-05
3.814697265625e-06
7.62939453125e-05
-6.103515625e-05
-3.0517578125e-05
0.00048828125
5.91278076171875e-05
0.0004425048828125
0.45184326171875
0.089111328125
1.52587890625e-05
3.0517578125e-05
0.068756103515625
1.52587890625e-05
6.103515625e-05
0.0
2.25494384765625
0.0177001953125
0.0
0.005126953125
0.0
0.0005283355712890625
0.00048828125
0.164581298828125
0.0556488037109375
0.008636474609375
0.00048828125
7.62939453125e-06
0.0
0.09939002990722656
0.0023193359375
0.00018310546875
0.0002593994140625
3.814697265625e-06
-3.0517578125e-05
0.0078125
7.62939453125e-05
-7.62939453125e-06
0.00030517578125
0.0
2.873687744140625
7.62939453125e-06
0.0
0.0071563720703125
0.0
-3.0517578125e-05
0.002170562744140625
0.0001220703125
4.57763671875e-05
7.62939453125e-06
0.00018310546875
-0.00018310546875
3.4332275390625e-05
-7.62939453125e-06
1.52587890625e-05
2.288818359375e-05
7.62939453125e-06
0.0010986328125
0.00018310546875
2.288818359375e-05
9.1552734375e-05
0.01033782958984375
0.0

0.00439453125
-6.103515625e-05
0.0042266845703125
0.000244140625
3.02679443359375
0.0
0.4061279296875
6.553070068359375
0.00103759765625
-3.0517578125e-05
-3.814697265625e-06
-3.0517578125e-05
1.52587890625e-05
0.0071086883544921875
2.282958984375
7.62939453125e-06
0.0
0.160247802734375
0.000244140625
0.0
0.0030841827392578125
0.02594757080078125
8.7738037109375e-05
0.00017547607421875
0.0
0.00023651123046875
0.0001068115234375
0.0
1.52587890625e-05
0.0185546875
0.0128173828125
-0.0001220703125
0.182952880859375
2.288818359375e-05
0.00055694580078125
0.010986328125
0.0169677734375
0.0010223388671875
-0.000244140625
0.0
3.814697265625e-06
0.00079345703125
2.50994873046875
0.0
9.047142028808594
-1.52587890625e-05
0.00445556640625
0.0081787109375
0.0037841796875
3.814697265625e-06
6.103515625e-05
0.04510498046875
1.43170166015625
0.00034332275390625
0.0428009033203125
0.010009765625
-0.00030517578125
6.103515625e-05
0.008056640625
0.0
3.24249267578125e-05
-6.103515625e-05
0.07557678222656

4.083099365234375
0.04400634765625
0.0040740966796875
6.4849853515625e-05
0.0072784423828125
0.000335693359375
-3.814697265625e-06
1.52587890625e-05
0.04278564453125
0.0
0.04034423828125
0.0113067626953125
0.0281982421875
0.0001220703125
0.0529632568359375
1.52587890625e-05
6.103515625e-05
0.0008697509765625
0.000347137451171875
0.0
0.00048828125
0.0
0.002899169921875
0.000152587890625
3.0517578125e-05
0.0
9.1552734375e-05
0.0001983642578125
0.06201171875
9.1552734375e-05
0.0
0.1177978515625
0.0
0.00518798828125
0.0
7.62939453125e-06
-1.52587890625e-05
0.009066581726074219
3.0517578125e-05
0.000152587890625
3.0517578125e-05
0.0137939453125
0.0001220703125
0.0073394775390625
3.0517578125e-05
0.00091552734375
0.159881591796875
0.008392333984375
18.70062255859375
0.000244140625
5.340576171875e-05
0.003509521484375
9.1552734375e-05
6.103515625e-05
2.1798095703125
-3.0517578125e-05
0.0018310546875
0.0001373291015625
0.00177001953125
0.00030517578125
0.0001220703125
-1.52587890625e-05
7.6293

KeyboardInterrupt: 

# PYSYFT

In [49]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings("ignore")

import syft as sy
import torch as th
from torch import jit
from torch import nn
from syft.serde import protobuf
import os
from syft.execution.state import State
from syft.execution.placeholder import PlaceHolder



sy.make_hook(globals())
# force protobuf serialization for tensors
hook.local_worker.framework = None
th.random.manual_seed(1)

Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/home/marcel/Documents/Uni/susml/PySyft/venv/lib/python3.7/site-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.15.0.so'



Setting up Sandbox...
Done!


<torch._C.Generator at 0x7fa544e25fb0>

In [50]:
def serialize_to_bin_pb(worker, obj, filename):
    pb = protobuf.serde._bufferize(worker, obj)
    bin = pb.SerializeToString()
    print("Writing %s to %s/%s" % (obj.__class__.__name__, os.getcwd(), filename))
    with open(filename, "wb") as f:
        f.write(bin)


def set_model_params(module, params_list, start_param_idx=0):
    """ Set params list into model recursively
    """
    param_idx = start_param_idx

    for name, param in module._parameters.items():
        module._parameters[name] = params_list[param_idx]
        param_idx += 1

    for name, child in module._modules.items():
        if child is not None:
            param_idx += set_model_params(child, params_list, param_idx)

    return param_idx

In [None]:
@sy.func2plan()
def training_plan(X, y, batch_size, lr, model_params):
    # inject params into model
    set_model_params(model, model_params)

    parameters['reload']=False
print(parameters['reload'])
if not parameters['reload']:
    tr = time.time()
    model.train(True)
    for epoch in range(1,number_of_epochs):
        for i, index in enumerate(np.random.permutation(len(train_data))):
            count += 1
            data = train_data[index]

            ##gradient updates for each data entry
            model.zero_grad()

            sentence_in = data['words']
            sentence_in = Variable(torch.LongTensor(sentence_in))
            tags = data['tags']
            chars2 = data['chars']
            
            if parameters['char_mode'] == 'LSTM':
                chars2_sorted = sorted(chars2, key=lambda p: len(p), reverse=True)
                d = {}
                for i, ci in enumerate(chars2):
                    for j, cj in enumerate(chars2_sorted):
                        if ci == cj and not j in d and not i in d.values():
                            d[j] = i
                            continue
                chars2_length = [len(c) for c in chars2_sorted]
                char_maxl = max(chars2_length)
                chars2_mask = np.zeros((len(chars2_sorted), char_maxl), dtype='int')
                for i, c in enumerate(chars2_sorted):
                    chars2_mask[i, :chars2_length[i]] = c
                chars2_mask = Variable(torch.LongTensor(chars2_mask))
            
            if parameters['char_mode'] == 'CNN':

                d = {}

                ## Padding the each word to max word size of that sentence
                chars2_length = [len(c) for c in chars2]
                char_maxl = max(chars2_length)
                chars2_mask = np.zeros((len(chars2_length), char_maxl), dtype='int')
                for i, c in enumerate(chars2):
                    chars2_mask[i, :chars2_length[i]] = c
                chars2_mask = Variable(torch.LongTensor(chars2_mask))


            targets = torch.LongTensor(tags)

            #we calculate the negative log-likelihood for the predicted tags using the predefined function
            if use_gpu:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in.cuda(), targets.cuda(), chars2_mask.cuda(), chars2_length, d)
            else:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in, targets, chars2_mask, chars2_length, d)
            loss += neg_log_likelihood.data.item() / len(data['words'])
            neg_log_likelihood.backward()

            #we use gradient clipping to avoid exploding gradients
            torch.nn.utils.clip_grad_norm(model.parameters(), gradient_clip)
            optimizer.step()

            #Storing loss
            if count % plot_every == 0:
                loss /= plot_every
                print(count, ': ', loss)
                if losses == []:
                    losses.append(loss)
                losses.append(loss)
                loss = 0.0

            #Evaluating on Train, Test, Dev Sets
            if count % (eval_every) == 0 and count > (eval_every * 20) or \
                    count % (eval_every*4) == 0 and count < (eval_every * 20):
                model.train(False)
                best_train_F, new_train_F, _ = evaluating(model, train_data, best_train_F,"Train")
                best_dev_F, new_dev_F, save = evaluating(model, dev_data, best_dev_F,"Dev")
                if save:
                    print("Saving Model to ", model_name)
                    torch.save(model.state_dict(), model_name)
                best_test_F, new_test_F, _ = evaluating(model, test_data, best_test_F,"Test")

                all_F.append([new_train_F, new_dev_F, new_test_F])
                model.train(True)

            #Performing decay on the learning rate
            if count % len(train_data) == 0:
                adjust_learning_rate(optimizer, lr=learning_rate/(1+decay_rate*count/len(train_data)))

    print(time.time() - tr)
    plt.plot(losses)
    plt.show()

if not parameters['reload']:
    #reload the best model saved from training
    model.load_state_dict(torch.load(model_name))
    
    
    # forward pass
    logits = model.forward(X)
    
    # loss
    loss = softmax_cross_entropy_with_logits(logits, y, batch_size)

    # backprop
    loss.backward()

    # step
    updated_params = [
        naive_sgd(param, lr=lr)
        for param in model_params
    ]
    
    # accuracy
    pred = th.argmax(logits, dim=1)
    target = th.argmax(y, dim=1)
    acc = pred.eq(target).sum().float() / batch_size

    return (
        loss,
        acc,
        *updated_params
    )

In [51]:
# Dummy input parameters to make the trace
model_params = list(model.parameters())
X = th.randn(3, 28 * 28)
y = nn.functional.one_hot(th.tensor([1, 2, 3]), 10)
lr = th.tensor([0.01])
batch_size = th.tensor([3.0])

_ = training_plan.build(X, y, batch_size, lr, model_params, trace_autograd=True)

NameError: name 'training_plan' is not defined