In [None]:
import random
from conlleval import evaluate as conllevaluate
from tqdm import tqdm

directory = 'results/'

def decode(input_length, tagset, score, debug=False):
    viterbi = [[0 for _ in range(input_length)] for _ in range(len(tagset))]
    backpointer = [[0 for _ in range(input_length)] for _ in range(len(tagset))]
    best_path = []

    for i, tag in enumerate(tagset):
        viterbi[i][1] = score(tag, "<START>", 1)
    
    if debug: print(viterbi)
        
    for t in range(2, input_length - 1):
        for s, tag in enumerate(tagset):
            max_val = 0
            max_index = 0
            for b, prev_tag in enumerate(tagset):
                curr_val = viterbi[b][t - 1] + score(tag, prev_tag, t)
                if curr_val > max_val:
                    max_val = curr_val
                    max_index = b
            viterbi[s][t] = max_val
            backpointer[s][t] = max_index

    # Calculate the last column of viterbi matrix
    for i, tag in enumerate(tagset):
        viterbi[i][input_length - 1] = viterbi[i][input_length - 2] + score("<STOP>", tag, input_length - 1)

    # Find the best path probability in the last column
    best_path_prob = 0
    index_to_best_path = 0
    for i in range(len(tagset)):
        if viterbi[i][-1] > best_path_prob:
            best_path_prob = viterbi[i][-1]
            index_to_best_path = i

    # Retrieve the best path
    best_path = ["<STOP>"]
    for i in range(input_length - 1, 0, -1):
        if i == 1:
            best_path.insert(0, "<START>")
        else:
            index_to_best_path = backpointer[index_to_best_path][i]
            best_path.insert(0, tagset[index_to_best_path])
    return best_path

In [None]:
def compute_score(tag_seq, input_length, score):
    """
    Computes the total score of a tag sequence 
    """
    total_score = 0
    for i in range(1, input_length):
        total_score += score(tag_seq[i], tag_seq[i - 1], i)
    return total_score


def compute_features(tag_seq, input_length, features):
    """
    Compute f(xi, yi)
    """
    feats = FeatureVector({})
    for i in range(1, input_length):
        feats.times_plus_equal(1, features.compute_features(tag_seq[i], tag_seq[i - 1], i))
    return feats

In [3]:
import os 

os.path.exists('ner.train')

True

In [None]:
def sgd(training_size, epochs, gradient, parameters, training_observer):
    """
    Stochastic gradient descent
    """
    for i in range(epochs):
        print("epoch: ", i)
        indices = [i for i in range(training_size)]
        random.shuffle(indices)
        for t in tqdm(indices):
            parameters.times_plus_equal(-1, gradient(t))
        print("Running the training observer")
        training_observer(i, parameters)
    return parameters


def train(data, feature_names, tagset, epochs):
    """
    Trains the model on the data and returns the parameters
    """
    parameters = FeatureVector({})  

    def perceptron_gradient(i):
        inputs = data[i]
        input_len = len(inputs['tokens'])
        gold_labels = inputs['gold_tags']
        features = Features(inputs, feature_names)

        
        def score(cur_tag, pre_tag, i):
            return parameters.dot_product(features.compute_features(cur_tag, pre_tag, i))

        tags = decode(input_len, tagset, score)

        fvector = compute_features(tags, input_len, features)       
        fvector.times_plus_equal(-1, compute_features(gold_labels, input_len, features))   
        return fvector

    def training_observer(epoch, parameters):
        """
        Evaluates the parameters on the development data, and writes out the parameters to a 'model.iter'+epoch and
        the predictions to 'ner.dev.out'+epoch.
        """
        dev_data = read_data('ner.dev')[:1000]
        (_, _, f1) = evaluate(dev_data, parameters, feature_names, tagset)
        write_predictions(os.path.join(directory,'ner.dev.out'+str(epoch)), dev_data, parameters, feature_names, tagset)
        parameters.write_to_file(os.path.join(directory, 'model.iter'+str(epoch)))
        return f1

    
    return sgd(len(data), epochs, perceptron_gradient, parameters, training_observer)


def predict(inputs, input_len, parameters, feature_names, tagset):
    features = Features(inputs, feature_names)
    def score(cur_tag, pre_tag, i):
        return parameters.dot_product(features.compute_features(cur_tag, pre_tag, i))

    return decode(input_len, tagset, score)


def make_data_point(sent):
    dic = {}
    sent = [s.strip().split() for s in sent]
    dic['tokens'] = ['<START>'] + [s[0] for s in sent] + ['<STOP>']
    dic['pos'] = ['<START>'] + [s[1] for s in sent] + ['<STOP>']
    dic['NP_chunk'] = ['<START>'] + [s[2] for s in sent] + ['<STOP>']
    dic['gold_tags'] = ['<START>'] + [s[3] for s in sent] + ['<STOP>']
    return dic

def read_data(filename):
    data = []
    
    with open(filename, 'r') as f:
        sent = []
        for line in f.readlines():
            if line.strip():
                sent.append(line)
            else:
                data.append(make_data_point(sent))
                sent = []
        data.append(make_data_point(sent))

    return data


def write_predictions(out_filename, all_inputs, parameters, feature_names, tagset):
    with open(out_filename, 'w', encoding='utf-8') as f:
        for inputs in all_inputs:
            input_len = len(inputs['tokens'])
            tag_seq = predict(inputs, input_len, parameters, feature_names, tagset)
            for i, tag in enumerate(tag_seq[1:-1]):  
                f.write(' '.join([inputs['tokens'][i+1], inputs['pos'][i+1], inputs['NP_chunk'][i+1], inputs['gold_tags'][i+1], tag])+'\n') 
            f.write('\n')


def evaluate(data, parameters, feature_names, tagset):
    all_gold_tags = [ ]
    all_predicted_tags = [ ]
    for inputs in tqdm(data):
        all_gold_tags.extend(inputs['gold_tags'][1:-1])  
        input_len = len(inputs['tokens'])
        all_predicted_tags.extend(predict(inputs, input_len, parameters, feature_names, tagset)[1:-1]) 
    return conllevaluate(all_gold_tags, all_predicted_tags)

def test_decoder():
    
    tagset = ['NN', 'VB']    

    def score_wrap(cur_tag, pre_tag, i):
        retval = score(cur_tag, pre_tag, i)
        print('Score('+cur_tag+','+pre_tag+','+str(i)+') returning '+str(retval))
        return retval

    def score(cur_tag, pre_tag, i):
        if i == 0:
            print("ERROR: Don't call score for i = 0 (that points to <START>, with nothing before it)")
        if i == 1:
            if pre_tag != '<START>':
                print("ERROR: Previous tag should be <START> for i = 1. Previous tag = "+pre_tag)
            if cur_tag == 'NN':
                return 6
            if cur_tag == 'VB':
                return 4
        if i == 2:
            if cur_tag == 'NN' and pre_tag == 'NN':
                return 4
            if cur_tag == 'NN' and pre_tag == 'VB':
                return 9
            if cur_tag == 'VB' and pre_tag == 'NN':
                return 5
            if cur_tag == 'VB' and pre_tag == 'VB':
                return 0
        if i == 3:
            if cur_tag != '<STOP>':
                print('ERROR: Current tag at i = 3 should be <STOP>. Current tag = '+cur_tag)
            if pre_tag == 'NN':
                return 1
            if pre_tag == 'VB':
                return 1

    predicted_tag_seq = decode(4, tagset, score_wrap)
    print('Predicted tag sequence should be = <START> VB NN <STOP>')
    print('Predicted tag sequence = '+' '.join(predicted_tag_seq))
    print("Score of ['<START>','VB','NN','<STOP>'] = "+str(compute_score(['<START>','VB','NN','<STOP>'], 4, score)))
    print('Max score should be = 14')
    print('Max score = '+str(compute_score(predicted_tag_seq, 4, score)))


In [None]:
def main_predict(data_filename, model_filename, use_four_features=False):
    """
    Main function to make predictions.
    """
    data = read_data(data_filename)
    parameters = FeatureVector({})
    parameters.read_from_file(model_filename)

    tagset = ['B-PER', 'B-LOC', 'B-ORG', 'B-MISC', 'I-PER', 'I-LOC', 'I-ORG', 'I-MISC', 'O']

    feature_names = ['tag', 'prev_tag', 'current_word', 'curr_pos_tag']

    write_predictions(data_filename+'.out', data, parameters, feature_names, tagset)
    evaluate(data, parameters, feature_names, tagset)

    return


def main_train():
    """
    Main function to train the model
    :return: None
    """
    print('Reading training data')
    train_data = read_data('ner.train')[:1100]
    
    tagset = ['B-PER', 'B-LOC', 'B-ORG', 'B-MISC', 'I-PER', 'I-LOC', 'I-ORG', 'I-MISC', 'O']
    
    feature_names = ['tag', 'prev_tag', 'current_word', 'curr_pos_tag']
    
    print('Training...')
    parameters = train(train_data, feature_names, tagset, epochs=10)
    print('Training done')
    dev_data = read_data('ner.dev')[:1100]
    evaluate(dev_data, parameters, feature_names, tagset)
    test_data = read_data('ner.test')[:1100]
    
    evaluate(test_data, parameters, feature_names, tagset)
    parameters.write_to_file('model')

    return 



In [None]:
class Features(object):
    def __init__(self, inputs, feature_names):
        self.feature_names = feature_names
        self.inputs = inputs
        self.gazette_dict = {}

        with open('gazetteer.txt', 'r') as file:
            for row in file:
                words = row.split(' ')
                value = words[0]
                for w in words[1:]:
                    if (w in self.gazette_dict.keys()):
                        self.gazette_dict[w].append(value)
                    else:
                        self.gazette_dict[w] = [value]

    def compute_features(self, cur_tag, pre_tag, i):
        
        feats = FeatureVector({})
        curr_word = self.inputs['tokens'][i]
        len_curr_word = len(self.inputs['tokens'][i])
        
        if 'tag' in self.feature_names:
            feats.times_plus_equal(1, FeatureVector({'t='+cur_tag: 1}))
        if 'prev_tag' in self.feature_names:
            feats.times_plus_equal(1, FeatureVector({'ti='+cur_tag+"+ti-1="+pre_tag: 1}))
        if 'current_word' in self.feature_names:
            feats.times_plus_equal(1, FeatureVector({'t='+cur_tag+'+w='+self.inputs['tokens'][i]: 1}))

        # adding more features
        if 'curr_pos_tag' in self.feature_names:
            feats.times_plus_equal(1, FeatureVector({'t='+cur_tag+'+pi='+self.inputs['pos'][i]: 1}))
        
        # 
            
        #     if 'shape_curr_word' in self.feature_names:
        #         word_shape = ''.join(['a' if c.isalpha() else 'A' if c.isupper() else 'd' for c in curr_word])
        #         feats.times_plus_equal(1, FeatureVector({'t='+cur_tag+'si'+word_shape: 1}))
    

        #     if 'len_k' in self.feature_names:
        #         for j in range(1, min(5, len(curr_word) + 1)): 
        #             feats.times_plus_equal(1, FeatureVector({'t='+cur_tag+'+PRE'+str(j)+'='+curr_word[:j]: 1}))

    
           
        #     if 'in_gazetteer' in self.feature_names:
        #         if (curr_word) in self.gazette_dict.keys():
        #             if self.gazette_dict[curr_word] == cur_tag:
        #                 feats.times_plus_equal(1, FeatureVector({'t='+cur_tag+'+GAZ='+'True': 1}))
                

        #     if 'start_cap' in self.feature_names:
        #         if(curr_word[0].isupper()):
        #             feats.times_plus_equal(1, FeatureVector({'t='+cur_tag+'+CAP='+'True': 1}))
            
        return feats



In [7]:
class FeatureVector(object):

    def __init__(self, fdict):
        self.fdict = fdict

    def times_plus_equal(self, scalar, v2):
        try: 
            for key, value in v2.fdict.items():
                self.fdict[key] = scalar * value + self.fdict.get(key, 0)
        except:
            print(v2)

    def dot_product(self, v2):
        retval = 0
        for key, value in v2.fdict.items():
            retval += value * self.fdict.get(key, 0)
        return retval

    def write_to_file(self, filename):
        print('Writing to ' + filename)
        with open(filename, 'w', encoding='utf-8') as f:
            for key, value in self.fdict.items():
                f.write('{} {}\n'.format(key, value))


    def read_from_file(self, filename):
        self.fdict = {}
        with open(filename, 'r') as f:
            for line in f.readlines():
                txt = line.split()
                self.fdict[txt[0]] = float(txt[1])

main_train()   
main_predict('ner.dev', 'model')  

Reading training data
Training...
epoch:  0


100%|██████████| 1100/1100 [00:45<00:00, 24.09it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.89it/s]


processed 13018 tokens with 1657 phrases; found: 2815 phrases; correct: 587.
accuracy:  47.89%; (non-O)
accuracy:  77.15%; precision:  20.85%; recall:  35.43%; FB1:  26.25
              LOC: precision:  95.52%; recall:  36.09%; FB1:  52.39  201
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  309
              ORG: precision:  66.67%; recall:   0.61%; FB1:   1.20  3
              PER: precision:  17.07%; recall:  63.29%; FB1:  26.89  2302
Writing to results/model.iter0
epoch:  1


100%|██████████| 1100/1100 [00:45<00:00, 24.10it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.92it/s]


processed 13018 tokens with 1657 phrases; found: 3057 phrases; correct: 394.
accuracy:  32.00%; (non-O)
accuracy:  75.98%; precision:  12.89%; recall:  23.78%; FB1:  16.72
              LOC: precision:   0.00%; recall:   0.00%; FB1:   0.00  1
             MISC: precision:   2.53%; recall:  19.43%; FB1:   4.47  1345
              ORG: precision:  31.36%; recall:  16.11%; FB1:  21.29  169
              PER: precision:  19.91%; recall:  49.44%; FB1:  28.39  1542
Writing to results/model.iter1
epoch:  2


100%|██████████| 1100/1100 [00:45<00:00, 24.15it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.85it/s]


processed 13018 tokens with 1657 phrases; found: 2958 phrases; correct: 493.
accuracy:  28.32%; (non-O)
accuracy:  75.98%; precision:  16.67%; recall:  29.75%; FB1:  21.37
              LOC: precision:  32.13%; recall:  67.11%; FB1:  43.46  1111
             MISC: precision:   7.10%; recall:   7.43%; FB1:   7.26  183
              ORG: precision:  15.66%; recall:  18.84%; FB1:  17.10  396
              PER: precision:   4.81%; recall:   9.82%; FB1:   6.46  1268
Writing to results/model.iter2
epoch:  3


100%|██████████| 1100/1100 [00:45<00:00, 24.13it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.86it/s]


processed 13018 tokens with 1657 phrases; found: 2654 phrases; correct: 533.
accuracy:  34.76%; (non-O)
accuracy:  78.84%; precision:  20.08%; recall:  32.17%; FB1:  24.73
              LOC: precision:  87.25%; recall:  41.17%; FB1:  55.94  251
             MISC: precision:   6.06%; recall:   2.29%; FB1:   3.32  66
              ORG: precision:  16.61%; recall:  47.42%; FB1:  24.61  939
              PER: precision:  11.02%; recall:  24.80%; FB1:  15.26  1398
Writing to results/model.iter3
epoch:  4


100%|██████████| 1100/1100 [00:45<00:00, 24.17it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.89it/s]


processed 13018 tokens with 1657 phrases; found: 2973 phrases; correct: 483.
accuracy:  25.85%; (non-O)
accuracy:  74.15%; precision:  16.25%; recall:  29.15%; FB1:  20.86
              LOC: precision:  50.16%; recall:  59.77%; FB1:  54.55  634
             MISC: precision:   0.96%; recall:   1.14%; FB1:   1.04  208
              ORG: precision:  17.10%; recall:  48.02%; FB1:  25.22  924
              PER: precision:   0.41%; recall:   0.81%; FB1:   0.55  1207
Writing to results/model.iter4
epoch:  5


100%|██████████| 1100/1100 [00:46<00:00, 23.74it/s]


Running the training observer


100%|██████████| 1000/1000 [00:42<00:00, 23.52it/s]


processed 13018 tokens with 1657 phrases; found: 1921 phrases; correct: 324.
accuracy:  23.62%; (non-O)
accuracy:  77.35%; precision:  16.87%; recall:  19.55%; FB1:  18.11
              LOC: precision:  86.36%; recall:  39.29%; FB1:  54.01  242
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  50
              ORG: precision:  35.87%; recall:  10.03%; FB1:  15.68  92
              PER: precision:   5.34%; recall:  13.20%; FB1:   7.60  1537
Writing to results/model.iter5
epoch:  6


100%|██████████| 1100/1100 [00:45<00:00, 23.96it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 24.04it/s]


processed 13018 tokens with 1657 phrases; found: 2656 phrases; correct: 739.
accuracy:  50.12%; (non-O)
accuracy:  81.49%; precision:  27.82%; recall:  44.60%; FB1:  34.27
              LOC: precision:  46.38%; recall:  63.91%; FB1:  53.75  733
             MISC: precision:   6.48%; recall:   8.00%; FB1:   7.16  216
              ORG: precision:  53.33%; recall:  12.16%; FB1:  19.80  75
              PER: precision:  21.14%; recall:  55.56%; FB1:  30.63  1632
Writing to results/model.iter6
epoch:  7


100%|██████████| 1100/1100 [00:46<00:00, 23.85it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.87it/s]


processed 13018 tokens with 1657 phrases; found: 2742 phrases; correct: 797.
accuracy:  54.62%; (non-O)
accuracy:  82.05%; precision:  29.07%; recall:  48.10%; FB1:  36.24
              LOC: precision:  85.92%; recall:  44.74%; FB1:  58.84  277
             MISC: precision:  13.54%; recall:   7.43%; FB1:   9.59  96
              ORG: precision:  44.61%; recall:  27.66%; FB1:  34.15  204
              PER: precision:  21.02%; recall:  73.27%; FB1:  32.66  2165
Writing to results/model.iter7
epoch:  8


100%|██████████| 1100/1100 [00:45<00:00, 24.20it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.96it/s]


processed 13018 tokens with 1657 phrases; found: 2736 phrases; correct: 482.
accuracy:  28.82%; (non-O)
accuracy:  77.69%; precision:  17.62%; recall:  29.09%; FB1:  21.94
              LOC: precision:  32.00%; recall:  63.35%; FB1:  42.52  1053
             MISC: precision:   5.97%; recall:   2.29%; FB1:   3.31  67
              ORG: precision:  37.22%; recall:  25.23%; FB1:  30.07  223
              PER: precision:   4.16%; recall:   9.34%; FB1:   5.76  1393
Writing to results/model.iter8
epoch:  9


100%|██████████| 1100/1100 [00:45<00:00, 23.92it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.92it/s]


processed 13018 tokens with 1657 phrases; found: 2626 phrases; correct: 420.
accuracy:  24.48%; (non-O)
accuracy:  76.96%; precision:  15.99%; recall:  25.35%; FB1:  19.61
              LOC: precision:  30.83%; recall:  63.91%; FB1:  41.59  1103
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  59
              ORG: precision:  40.98%; recall:   7.60%; FB1:  12.82  61
              PER: precision:   3.92%; recall:   8.86%; FB1:   5.43  1403
Writing to results/model.iter9
Training done


100%|██████████| 1100/1100 [00:46<00:00, 23.75it/s]


processed 15257 tokens with 1808 phrases; found: 2868 phrases; correct: 446.
accuracy:  24.14%; (non-O)
accuracy:  78.48%; precision:  15.55%; recall:  24.67%; FB1:  19.08
              LOC: precision:  29.85%; recall:  62.16%; FB1:  40.33  1216
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  59
              ORG: precision:  41.54%; recall:   7.48%; FB1:  12.68  65
              PER: precision:   3.66%; recall:   8.43%; FB1:   5.11  1528


100%|██████████| 1100/1100 [00:45<00:00, 24.03it/s]


processed 13063 tokens with 1888 phrases; found: 3000 phrases; correct: 367.
accuracy:  20.70%; (non-O)
accuracy:  72.31%; precision:  12.23%; recall:  19.44%; FB1:  15.02
              LOC: precision:  23.96%; recall:  58.47%; FB1:  33.99  1152
             MISC: precision:  10.00%; recall:   4.39%; FB1:   6.10  90
              ORG: precision:  43.90%; recall:   3.31%; FB1:   6.16  41
              PER: precision:   3.73%; recall:   9.58%; FB1:   5.37  1717
Writing to model


100%|██████████| 3466/3466 [02:28<00:00, 23.27it/s]


processed 51578 tokens with 5917 phrases; found: 9101 phrases; correct: 1375.
accuracy:  22.89%; (non-O)
accuracy:  79.02%; precision:  15.11%; recall:  23.24%; FB1:  18.31
              LOC: precision:  29.44%; recall:  60.33%; FB1:  39.57  3750
             MISC: precision:   7.56%; recall:   1.86%; FB1:   2.99  225
              ORG: precision:  44.02%; recall:   6.04%; FB1:  10.62  184
              PER: precision:   3.50%; recall:   9.44%; FB1:   5.11  4942


In [8]:
test_decoder()

Score(NN,<START>,1) returning 6
Score(VB,<START>,1) returning 4
Score(NN,NN,2) returning 4
Score(NN,VB,2) returning 9
Score(VB,NN,2) returning 5
Score(VB,VB,2) returning 0
Score(<STOP>,NN,3) returning 1
Score(<STOP>,VB,3) returning 1
Predicted tag sequence should be = <START> VB NN <STOP>
Predicted tag sequence = <START> VB NN <STOP>
Score of ['<START>','VB','NN','<STOP>'] = 14
Max score should be = 14
Max score = 14


## Training using SGD (trained on 1100 training examples)

In [9]:
main_train()

Reading training data
Training...
epoch:  0


100%|██████████| 1100/1100 [00:45<00:00, 24.31it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s]


processed 13018 tokens with 1657 phrases; found: 2053 phrases; correct: 540.
accuracy:  40.38%; (non-O)
accuracy:  79.94%; precision:  26.30%; recall:  32.59%; FB1:  29.11
              LOC: precision:  91.76%; recall:  31.39%; FB1:  46.78  182
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  42
              ORG: precision:  47.83%; recall:   3.34%; FB1:   6.25  23
              PER: precision:  20.04%; recall:  58.29%; FB1:  29.83  1806
Writing to results/model.iter0
epoch:  1


100%|██████████| 1100/1100 [00:45<00:00, 23.96it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 24.00it/s]


processed 13018 tokens with 1657 phrases; found: 2812 phrases; correct: 451.
accuracy:  24.36%; (non-O)
accuracy:  76.20%; precision:  16.04%; recall:  27.22%; FB1:  20.18
              LOC: precision:  29.51%; recall:  63.35%; FB1:  40.26  1142
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  48
              ORG: precision:  22.22%; recall:  26.75%; FB1:  24.28  396
              PER: precision:   2.12%; recall:   4.19%; FB1:   2.82  1226
Writing to results/model.iter1
epoch:  2


100%|██████████| 1100/1100 [00:44<00:00, 24.47it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.86it/s]


processed 13018 tokens with 1657 phrases; found: 1488 phrases; correct: 107.
accuracy:   5.41%; (non-O)
accuracy:  74.01%; precision:   7.19%; recall:   6.46%; FB1:   6.80
              LOC: precision:  79.66%; recall:  17.67%; FB1:  28.92  118
             MISC: precision:   0.65%; recall:   0.57%; FB1:   0.61  155
              ORG: precision:  56.25%; recall:   2.74%; FB1:   5.22  16
              PER: precision:   0.25%; recall:   0.48%; FB1:   0.33  1199
Writing to results/model.iter2
epoch:  3


100%|██████████| 1100/1100 [00:45<00:00, 24.36it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 23.94it/s]


processed 13018 tokens with 1657 phrases; found: 1721 phrases; correct: 245.
accuracy:  14.62%; (non-O)
accuracy:  75.68%; precision:  14.24%; recall:  14.79%; FB1:  14.51
              LOC: precision:  86.85%; recall:  34.77%; FB1:  49.66  213
             MISC: precision:   5.56%; recall:   2.29%; FB1:   3.24  72
              ORG: precision:  22.16%; recall:  13.07%; FB1:  16.44  194
              PER: precision:   1.05%; recall:   2.09%; FB1:   1.40  1242
Writing to results/model.iter3
epoch:  4


100%|██████████| 1100/1100 [00:45<00:00, 24.26it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 24.19it/s]


processed 13018 tokens with 1657 phrases; found: 2788 phrases; correct: 713.
accuracy:  50.74%; (non-O)
accuracy:  80.38%; precision:  25.57%; recall:  43.03%; FB1:  32.08
              LOC: precision:  93.88%; recall:  34.59%; FB1:  50.55  196
             MISC: precision:  12.24%; recall:   6.86%; FB1:   8.79  98
              ORG: precision:  41.89%; recall:  18.84%; FB1:  26.00  148
              PER: precision:  19.39%; recall:  73.27%; FB1:  30.67  2346
Writing to results/model.iter4
epoch:  5


100%|██████████| 1100/1100 [00:44<00:00, 24.65it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 24.35it/s]


processed 13018 tokens with 1657 phrases; found: 2629 phrases; correct: 685.
accuracy:  49.96%; (non-O)
accuracy:  80.58%; precision:  26.06%; recall:  41.34%; FB1:  31.96
              LOC: precision:  40.46%; recall:  65.79%; FB1:  50.11  865
             MISC: precision:   5.36%; recall:   1.71%; FB1:   2.60  56
              ORG: precision:  92.86%; recall:   3.95%; FB1:   7.58  14
              PER: precision:  18.83%; recall:  51.37%; FB1:  27.56  1694
Writing to results/model.iter5
epoch:  6


100%|██████████| 1100/1100 [00:44<00:00, 24.74it/s]


Running the training observer


100%|██████████| 1000/1000 [00:44<00:00, 22.56it/s]


processed 13018 tokens with 1657 phrases; found: 2601 phrases; correct: 700.
accuracy:  48.43%; (non-O)
accuracy:  80.94%; precision:  26.91%; recall:  42.25%; FB1:  32.88
              LOC: precision:  85.14%; recall:  44.17%; FB1:  58.17  276
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  41
              ORG: precision:  57.14%; recall:   3.65%; FB1:   6.86  21
              PER: precision:  20.02%; recall:  72.95%; FB1:  31.41  2263
Writing to results/model.iter6
epoch:  7


100%|██████████| 1100/1100 [00:46<00:00, 23.89it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 24.34it/s]


processed 13018 tokens with 1657 phrases; found: 2704 phrases; correct: 503.
accuracy:  32.04%; (non-O)
accuracy:  78.20%; precision:  18.60%; recall:  30.36%; FB1:  23.07
              LOC: precision:  34.64%; recall:  62.97%; FB1:  44.70  967
             MISC: precision:  12.75%; recall:   7.43%; FB1:   9.39  102
              ORG: precision:  51.72%; recall:  22.80%; FB1:  31.65  145
              PER: precision:   5.37%; recall:  12.88%; FB1:   7.58  1490
Writing to results/model.iter7
epoch:  8


100%|██████████| 1100/1100 [00:44<00:00, 24.54it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 24.36it/s]


processed 13018 tokens with 1657 phrases; found: 1995 phrases; correct: 476.
accuracy:  32.04%; (non-O)
accuracy:  78.68%; precision:  23.86%; recall:  28.73%; FB1:  26.07
              LOC: precision:  81.79%; recall:  44.74%; FB1:  57.84  291
             MISC: precision:  13.98%; recall:   7.43%; FB1:   9.70  93
              ORG: precision:  29.89%; recall:  16.72%; FB1:  21.44  184
              PER: precision:  11.91%; recall:  27.38%; FB1:  16.60  1427
Writing to results/model.iter8
epoch:  9


100%|██████████| 1100/1100 [00:44<00:00, 24.55it/s]


Running the training observer


100%|██████████| 1000/1000 [00:41<00:00, 24.36it/s]


processed 13018 tokens with 1657 phrases; found: 2655 phrases; correct: 512.
accuracy:  30.76%; (non-O)
accuracy:  78.06%; precision:  19.28%; recall:  30.90%; FB1:  23.75
              LOC: precision:  33.96%; recall:  61.28%; FB1:  43.70  960
             MISC: precision:   7.93%; recall:   7.43%; FB1:   7.67  164
              ORG: precision:  27.75%; recall:  16.11%; FB1:  20.38  191
              PER: precision:   8.96%; recall:  19.32%; FB1:  12.24  1340
Writing to results/model.iter9
Training done


100%|██████████| 1100/1100 [00:45<00:00, 24.21it/s]


processed 15257 tokens with 1808 phrases; found: 2900 phrases; correct: 539.
accuracy:  29.72%; (non-O)
accuracy:  79.37%; precision:  18.59%; recall:  29.81%; FB1:  22.90
              LOC: precision:  32.89%; recall:  59.59%; FB1:  42.39  1058
             MISC: precision:   8.19%; recall:   7.04%; FB1:   7.57  171
              ORG: precision:  25.94%; recall:  15.24%; FB1:  19.20  212
              PER: precision:   8.36%; recall:  18.37%; FB1:  11.49  1459


100%|██████████| 1100/1100 [00:45<00:00, 24.31it/s]

processed 13063 tokens with 1888 phrases; found: 3026 phrases; correct: 450.
accuracy:  25.12%; (non-O)
accuracy:  73.08%; precision:  14.87%; recall:  23.83%; FB1:  18.32
              LOC: precision:  26.54%; recall:  57.63%; FB1:  36.34  1025
             MISC: precision:  10.17%; recall:   8.78%; FB1:   9.42  177
              ORG: precision:  21.86%; recall:   8.66%; FB1:  12.40  215
              PER: precision:   7.02%; recall:  16.92%; FB1:   9.93  1609
Writing to model





In [10]:
main_predict('ner.dev', 'results/model.iter8')

100%|██████████| 3466/3466 [02:37<00:00, 21.99it/s]

processed 51578 tokens with 5917 phrases; found: 7002 phrases; correct: 1566.
accuracy:  29.86%; (non-O)
accuracy:  80.39%; precision:  22.37%; recall:  26.47%; FB1:  24.24
              LOC: precision:  78.13%; recall:  40.22%; FB1:  53.10  942
             MISC: precision:  18.06%; recall:   7.33%; FB1:  10.43  371
              ORG: precision:  29.21%; recall:  16.03%; FB1:  20.70  736
              PER: precision:  11.06%; recall:  29.91%; FB1:  16.15  4953





In [11]:
main_predict('ner.test', 'results/model.iter8')

100%|██████████| 3684/3684 [02:52<00:00, 21.40it/s]

processed 46666 tokens with 5616 phrases; found: 7149 phrases; correct: 1301.
accuracy:  26.80%; (non-O)
accuracy:  77.67%; precision:  18.20%; recall:  23.17%; FB1:  20.38
              LOC: precision:  76.96%; recall:  41.30%; FB1:  53.75  894
             MISC: precision:  12.89%; recall:   5.85%; FB1:   8.05  318
              ORG: precision:  26.28%; recall:  10.63%; FB1:  15.13  666
              PER: precision:   7.53%; recall:  24.78%; FB1:  11.55  5271





In [12]:
data = read_data('ner.test')[:4]
parameters = FeatureVector({})
parameters.read_from_file('results/model.iter8')

tagset = ['B-PER', 'B-LOC', 'B-ORG', 'B-MISC', 'I-PER', 'I-LOC', 'I-ORG', 'I-MISC', 'O']

feature_names = ['tag', 'prev_tag', 'current_word', 'curr_pos_tag']

print(data[0])

{'tokens': ['<START>', 'SOCCER', '-', 'JAPAN', 'GET', 'LUCKY', 'WIN', ',', 'CHINA', 'IN', 'SURPRISE', 'DEFEAT', '.', '<STOP>'], 'pos': ['<START>', 'NN', ':', 'NNP', 'VB', 'NNP', 'NNP', ',', 'NNP', 'IN', 'DT', 'NN', '.', '<STOP>'], 'NP_chunk': ['<START>', 'I-NP', 'O', 'I-NP', 'I-VP', 'I-NP', 'I-NP', 'O', 'I-NP', 'I-PP', 'I-NP', 'I-NP', 'O', '<STOP>'], 'gold_tags': ['<START>', 'O', 'O', 'I-LOC', 'O', 'O', 'O', 'O', 'I-PER', 'O', 'O', 'O', 'O', '<STOP>']}


In [13]:
all_gold_tags = [ ]
all_predicted_tags = [ ]
for inputs in tqdm(data):
    all_gold_tags.append(inputs['gold_tags'][1:-1]) 
    input_len = len(inputs['tokens'])
    all_predicted_tags.append(predict(inputs, input_len, parameters, feature_names, tagset)[1:-1]) 

100%|██████████| 4/4 [00:00<00:00, 19.83it/s]


In [14]:
display_id = 2

In [15]:
print(data[display_id]['tokens'])

['<START>', 'AL-AIN', ',', 'United', 'Arab', 'Emirates', '1996-12-06', '<STOP>']


In [16]:
print(all_gold_tags[display_id])

['I-LOC', 'O', 'I-LOC', 'I-LOC', 'I-LOC', 'O']


In [17]:
print(all_predicted_tags[display_id])

['O', 'O', 'I-LOC', 'O', 'I-MISC', 'B-PER']


In [19]:
!cat "results/model.iter9" | awk '{print $2, $1}' | sort -gr > "results/model.4features.txt"

The file `model.sorted.txt` will be viewable in your Google Drive folder.