# Part 5

In [6]:
import math
import numpy as np
from collections import defaultdict
from scipy.optimize import fmin_l_bfgs_b
import time
from datetime import datetime
import pytz

In [7]:
DATA_FILE = "dataset" # dir where your train/dev.in is stored
FEATURE_OUTPUT_DIRECTORY = "features"
LEARN_CO = 0.1

In [8]:
def read_train_file(directory):
    x_train = []
    y_train = []
    all_tags = []
    all_words = []

    with open(directory) as f:
        x_sent = []
        y = []
        for line in f:
            if line == '\n': # end of a sentence
                x_train.append(x_sent)
                y_train.append(y)
                x_sent=[]
                y=[]
            else:
                temp = line.strip().split()
                x_sent.append(temp[0]) # word
                y.append(temp[1]) # tag

                if temp[1] not in all_tags:
                    all_tags.append(temp[1])
                if temp[0] not in all_words:
                    all_words.append(temp[0])

    return x_train, y_train, all_tags, all_words

In [9]:
x_train, y_train, ALL_TAGS,  ALL_WORDS = read_train_file(DATA_FILE + '/train')
all_tags_len = len(ALL_TAGS)
# x_train
# ALL_TAGS # without START, STOP

In [10]:
def write_feature(feat, output_file):
    with open(output_file, "w") as out:
        for (k, v) in feat.items():
            out.write(f"{k} {v}\n")

In [102]:
def get_feature_dict_part5(x_train, y_train, output_dir):
    
    features = {}
    tags = ALL_TAGS + ["START", "STOP"] 
    unk = "<UNK>"
    all_words = ALL_WORDS +[unk]
    #Initiate transition
    for i in tags:
        for j in tags:
            for e in all_words:
                string = f"transition:{str(i)}+{str(j)}+{str(e)}"
                features[string] = -2**21
    
    #Initiate emission dictionaries 1 and 2
    for j in ALL_TAGS:
        for i in ALL_WORDS:
            string = f"emission1:{str(j)}+{str(i)}"
            features[string] = -2**21

    for j in ALL_TAGS:
        for i in ALL_WORDS:
            string = f"emission2:{str(j)}+{str(i)}"
            features[string] = -2**21
    
    #Initiate transition2
    for i in tags:
        for j in tags:
            string = f"transition2:{str(i)}+{str(j)}"
            features[string] = -2**21
    
    #Initiate emission dictionaries 3
    for j in ALL_TAGS:
        for i in ALL_WORDS:
            string = f"emission3:{str(j)}+{str(i)}"
            features[string] = -2**21


    #Initiate dictionary for counting emissions
    label_dict = defaultdict(int)  # {LABEL : COUNT} e.g: {'o': 24273, 'B-negative': 278, ...}
    word_label_dict_prev = defaultdict(int) # {(LABEL, WORD): COUNT} 
                               # e.g: {('O', 'All'): 3, ('B-positive', 'food'): 131, ...}
    word_label_dict_next = defaultdict(int) 

    label_dict2 = defaultdict(int)  # {LABEL : COUNT} e.g: {'o': 24273, 'B-negative': 278, ...}
    word_label_dict2 = defaultdict(int) # {(LABEL, WORD): COUNT} 
                               # e.g: {('O', 'All'): 3, ('B-positive', 'food'): 131, ...}

    #Populate dictionary for emission 3 with count
    for i in range(len(x_train)):
        for j in range(len(x_train[i])):
            label_dict2[y_train[i][j]] += 1
            word_label_dict2[(y_train[i][j], x_train[i][j])] += 1

    #Populate dictionary for emission 1 with count
    for i in range(len(x_train)):
        for j in range(1,len(x_train[i])):
            label_dict[y_train[i][j]] += 1
            word_label_dict_prev[(y_train[i][j], x_train[i][j-1])] += 1

    #Populate dictionary for emission 2 with count
    for i in range(len(x_train)):
        for j in range(len(x_train[i])-1):
            label_dict[y_train[i][j]] += 1
            word_label_dict_next[(y_train[i][j], x_train[i][j+1])] += 1


    #Populate dictionary of emission 1 with log prob
    emission_prev = defaultdict(int)
    for k in word_label_dict_prev:
        tag = k[0]
        string = f"emission1:{str(k[0])}+{str(k[1])}"
        emission_prev[string] = math.log(float(word_label_dict_prev[k])/label_dict[tag])
    # print(f"emission: {emission}")

    #Populate dictionary of emission 2 with log prob
    emission_next = defaultdict(int)
    for k in word_label_dict_next:
        tag = k[0]
        string = f"emission2:{str(k[0])}+{str(k[1])}"
        emission_next[string] = math.log(float(word_label_dict_next[k])/label_dict[tag])

    emission = defaultdict(int)
    for k in word_label_dict2:
        tag = k[0]
        string = f"emission3:{str(k[0])}+{str(k[1])}"
        emission[string] = math.log(float(word_label_dict2[k])/label_dict2[tag])

    # getting transition
    yi_dict = defaultdict(int)
    yj_dict = defaultdict(int)

    yi_dict2 = defaultdict(int)
    yj_dict2 = defaultdict(int)
    
    for i in range(len(x_train)):
#         if len(y_train[i]) == 0: # this seems to be useless
#             continue
        #For second dictionary for original features
        yi_dict2['START'] += 1
        yj_dict2[('START', y_train[i][0])] += 1
        yj_dict2[(y_train[i][-1],'STOP')] += 1


         # adding START and STOP tag to each sentence
        yi_dict['START'] += 1
        yj_dict[('START', y_train[i][0],x_train[i][0])] += 1
        yj_dict[(y_train[i][-1],'STOP',None)] += 1

        for j in range(1,len(x_train[i])):
            yi_dict[y_train[i][j]] += 1
            yj_dict[(y_train[i][j-1],y_train[i][j],x_train[i][j])] += 1
       
            if j<len(x_train[i])-1:
                yi_dict2[y_train[i][j]] += 1
                yj_dict2[(y_train[i][j],y_train[i][j+1])] += 1
        yi_dict2[y_train[i][-1]] += 1
            
                
    transition = defaultdict(int)
    for k in yj_dict:
        string = f"transition:{str(k[0])}+{str(k[1])}+{str(k[2])}"
        transition[string] = math.log(float(yj_dict[k])/(yi_dict[k[0]]+1)) 
        unk_string = f"transition:{str(k[0])}+{str(k[1])}+{unk}"
        transition[unk_string]= math.log(float(1/yi_dict[k[0]]))
    
  

    #for second dictionary for original features
    transition2 = defaultdict(int)
    for k in yj_dict2:
        string = f"transition2:{str(k[0])}+{str(k[1])}"
        transition2[string] = math.log(float(yj_dict2[k])/yi_dict2[k[0]])
    
    if "transition2:START+STOP" in transition2:
        del transition2["transition2:START+STOP"]

    write_feature(emission_prev, output_dir + "/emission_P5_1.txt") # save emission 1 dictionary
    write_feature(emission_next, output_dir + "/emission_P5_2.txt") # save emission 2 dictionary
    write_feature(transition, output_dir + "/transition_P5.txt") # save transition dictionary
    write_feature(emission, output_dir + "/emission_P5_original.txt") # save emission dictionary
    write_feature(transition2, output_dir + "/transition_P5_original.txt") # save transition dictionary

    for key in emission_prev:
        features[key] = emission_prev[key]
    for key in emission_next:
        features[key] = emission_next[key]
    for key in transition:
        features[key] = transition[key]
    for key in emission:
        features[key] = emission[key]
    for key in transition2:
        features[key] = transition2[key]

    return features

In [103]:
feature_dict = get_feature_dict_part5(x_train, y_train, FEATURE_OUTPUT_DIRECTORY)
write_feature(feature_dict, FEATURE_OUTPUT_DIRECTORY + "/features_P5.txt")

In [104]:
def write_output(directory, x, y):
    with open(directory, 'w') as f:
        for i in range(len(x)):
            for j in range(len(x[i])):
                f.write(f"{x[i][j]} {y[i][j]}\n")
            f.write('\n')

In [149]:
def viterbi_part5(sent, feature_dict): 
    table = []
    for i in range(all_tags_len):
        temp = []
        for j in range(len(sent)):
            temp.append(-2**31)
        table.append(temp)
            
    trace = []
    for j in range(all_tags_len):
        temp = []
        for i in range(len(sent)):
            temp.append(None)
        trace.append(temp)
    
    score = -2**31
    pointer = None
    
    # START -> first tag
    # check first word sent[0]
    for i in range(all_tags_len):
        n = len(sent)
        trace[i][0] = 'START'
        if n!=1:
            
            if (f"transition:START+{ALL_TAGS[i]}+{sent[0]}" in feature_dict):
                if (f"emission2:{ALL_TAGS[i]}+{sent[1]}" in feature_dict):
                    table[i][0] = feature_dict[f"transition:START+{ALL_TAGS[i]}+{sent[0]}"] +\
                                feature_dict[f"emission2:{ALL_TAGS[i]}+{sent[1]}"] +\
                                            feature_dict[f"transition2:START+{ALL_TAGS[i]}"] +\
                                            feature_dict[f"emission3:{ALL_TAGS[i]}+{sent[0]}"]

                else:
                    table[i][0] = feature_dict[f"transition:START+{ALL_TAGS[i]}+{sent[0]}"] +\
                                                feature_dict[f"transition2:START+{ALL_TAGS[i]}"] +\
                                                feature_dict[f"emission3:{ALL_TAGS[i]}+{sent[0]}"]
          
            else:
                if (f"emission2:{ALL_TAGS[i]}+{sent[1]}" in feature_dict):
                    # table[i][0] = feature_dict[f"transition:START+{ALL_TAGS[i]}+{unk}"] +\
                    table[i][0] =feature_dict[f"emission2:{ALL_TAGS[i]}+{sent[1]}"] +\
                                                 feature_dict[f"transition2:START+{ALL_TAGS[i]}"]
                                                # feature_dict[f"emission3:{ALL_TAGS[i]}+{unk}"]
                else:
                    # table[i][0] = feature_dict[f"transition:START+{ALL_TAGS[i]}+{unk}"] +\
                    table[i][0] =   feature_dict[f"transition2:START+{ALL_TAGS[i]}"] 
                                            # feature_dict[f"emission3:{ALL_TAGS[i]}+{unk}"]
        else:
            table[i][0] = feature_dict[f"transition:START+{ALL_TAGS[i]}+{sent[0]}"] +\
                                         feature_dict[f"transition2:START+{ALL_TAGS[i]}"] +\
                                            feature_dict[f"emission3:{ALL_TAGS[i]}+{sent[0]}"]

            
        
            
            
    # iterate through the rest of sent
    for s in range(1, len(sent)):
        for v in range(all_tags_len):
            
            # transition score u -> v
            for u in range(all_tags_len):
                if (f"transition:{ALL_TAGS[u]}+{ALL_TAGS[v]}+{sent[s]}"in feature_dict):
                    if table[v][s] < table[u][s-1] + feature_dict[f"transition:{ALL_TAGS[u]}+{ALL_TAGS[v]}+{sent[s]}"] + feature_dict[f"transition2:{ALL_TAGS[u]}+{ALL_TAGS[v]}"]:
                        table[v][s] = table[u][s-1] + feature_dict[f"transition:{ALL_TAGS[u]}+{ALL_TAGS[v]}+{sent[s]}"] + feature_dict[f"transition2:{ALL_TAGS[u]}+{ALL_TAGS[v]}"]
                        trace[v][s] = u
                else:
                    if table[v][s] < table[u][s-1] +  feature_dict[f"transition2:{ALL_TAGS[u]}+{ALL_TAGS[v]}"]:
                        table[v][s] = table[u][s-1] +  feature_dict[f"transition2:{ALL_TAGS[u]}+{ALL_TAGS[v]}"]
                        trace[v][s] = u

            if (f"emission3:{ALL_TAGS[v]}+{sent[s]}" in feature_dict):
                table[v][s] += feature_dict[f"emission3:{ALL_TAGS[v]}+{sent[s]}"]    
            

            if s!=len(sent)-1:
                if (f"emission2:{ALL_TAGS[v]}+{sent[s+1]}" in feature_dict) and (f"emission1:{ALL_TAGS[v]}+{sent[s-1]}" in feature_dict):
                    table[v][s] += feature_dict[f"emission2:{ALL_TAGS[v]}+{sent[s+1]}"] + feature_dict[f"emission1:{ALL_TAGS[v]}+{sent[s-1]}"]
                elif (f"emission2:{ALL_TAGS[v]}+{sent[s+1]}" in feature_dict):
                    table[v][s] += feature_dict[f"emission2:{ALL_TAGS[v]}+{sent[s+1]}"]
                elif  (f"emission2:{ALL_TAGS[v]}+{sent[s-1]}" in feature_dict):
                    table[v][s] +=feature_dict[f"emission1:{ALL_TAGS[v]}+{sent[s-1]}"]
            else:
                if (f"emission1:{ALL_TAGS[v]}+{sent[s-1]}" in feature_dict):
                    table[v][s] += feature_dict[f"emission1:{ALL_TAGS[v]}+{sent[s-1]}"]
        

    
    # last word -> STOP
    for i in range(all_tags_len):
        

        if score < table[i][-1] + feature_dict[f"transition:{ALL_TAGS[i]}+STOP+{None}"] + feature_dict[f"transition2:{ALL_TAGS[i]}+STOP"]:
            score = table[i][-1] + feature_dict[f"transition:{ALL_TAGS[i]}+STOP+{None}"] + feature_dict[f"transition2:{ALL_TAGS[i]}+STOP"]
            pointer = i
    
    
    output = ['STOP']
    output.append(ALL_TAGS[pointer])
    wanted_tag = pointer
    
    for i in range(len(sent)-1, 0, -1):
        output.append(ALL_TAGS[trace[wanted_tag][i]])
        wanted_tag = trace[wanted_tag][i]
    
    output.append('START')
    return output[::-1], score

In [150]:
viterbi_part5('Great food with an awesome atmosphere !'.split(), feature_dict)

(['START', 'O', 'B-positive', 'O', 'O', 'O', 'B-positive', 'O', 'STOP'],
 -163.96888670538772)

In [137]:
def read_validation_file(directory):
    dev = []
    with open(directory) as f:
        s = []
        for line in f:
            if line == '\n':
                dev.append(s)
                s = []
            else:
                temp = line.strip()
                s.append(temp)
    return dev
x_dev = read_validation_file(DATA_FILE +'/dev.in')
y_dev = []

for i in range(len(x_dev)):
    output,_ = viterbi_part5(x_dev[i], feature_dict)
    y_dev.append(output[1:-1]) #remove START, STOP

write_output(DATA_FILE +'/dev.p5.out', x_dev, y_dev)

In [105]:
def log_sum_exp(n):
    _max = np.max(n)
    i = n - _max
    _sum = np.exp(i).sum()
    return np.log(_sum) + _max

In [106]:
def forward_part5(sent, feature_dict):
    alpha = []
    for j in range(all_tags_len):
        temp = []
        for i in range(len(sent)):
            temp.append(0)
        alpha.append(temp)
    
    # START -> first tag
    # check first word sent[0]
    
    for i in range(all_tags_len):
        
        if len(sent)>1:
            alpha[i][0] =feature_dict[f"transition:START+{ALL_TAGS[i]}+{sent[0]}"] +\
                        feature_dict[f"emission2:{ALL_TAGS[i]}+{sent[1]}"] +\
                                    feature_dict[f"transition2:START+{ALL_TAGS[i]}"] +\
                                    feature_dict[f"emission3:{ALL_TAGS[i]}+{sent[0]}"]
        else:
            alpha[i][0] =feature_dict[f"transition:START+{ALL_TAGS[i]}+{sent[0]}"] +\
                                    feature_dict[f"transition2:START+{ALL_TAGS[i]}"] +\
                                    feature_dict[f"emission3:{ALL_TAGS[i]}+{sent[0]}"]
        
    # iterate through the rest of sent
    for i in range(1, len(sent)):
        for v in range(all_tags_len):
            temp = np.zeros(all_tags_len)
            
            # transition score u -> v
            for u in range(all_tags_len):
               
                temp[u] = alpha[u][i-1] +\
                feature_dict[f"transition:{ALL_TAGS[u]}+{ALL_TAGS[v]}+{sent[i]}"] +\
                feature_dict[f"emission1:{ALL_TAGS[v]}+{sent[i-1]}"] +\
                                feature_dict[f"transition2:{ALL_TAGS[u]}+{ALL_TAGS[v]}"] +\
                                feature_dict[f"emission3:{ALL_TAGS[v]}+{sent[i]}"]
                if i<len(sent)-1:
                    temp[u] += feature_dict[f"emission2:{ALL_TAGS[v]}+{sent[i+1]}"]     
            alpha[v][i] = log_sum_exp(temp)
    
    # last word -> STOP
    temp = np.zeros(all_tags_len)
    for i in range(all_tags_len):
       
        temp[i] = alpha[i][-1] + feature_dict[f"transition:{ALL_TAGS[i]}+STOP+{None}"] +\
                                feature_dict[f"transition2:{ALL_TAGS[i]}+STOP"]
    
    return alpha, log_sum_exp(temp)

In [107]:
def backward_part5(sent, feature_dict):
    beta = []
    for i in range(all_tags_len):
        temp = []
        for j in range(len(sent)):
            temp.append(0)
        beta.append(temp)
    
    for i in range(all_tags_len):
        beta[i][-1] = feature_dict[f"transition:{ALL_TAGS[i]}+STOP+{None}"] +\
                                    feature_dict[f"transition2:{ALL_TAGS[i]}+STOP"]
    
    for i in range(len(sent)-2, -1, -1):
        for v in range(all_tags_len):
            temp = np.zeros(all_tags_len)
            
            for u in range(all_tags_len):
              
                temp[u] = beta[u][i+1] +\
                           feature_dict[f"transition:{ALL_TAGS[v]}+{ALL_TAGS[u]}+{sent[i+1]}"]+\
                           feature_dict[f"emission1:{ALL_TAGS[u]}+{sent[i]}"]+\
                                    feature_dict[f"transition2:{ALL_TAGS[v]}+{ALL_TAGS[u]}"] +\
                                    feature_dict[f"emission3:{ALL_TAGS[u]}+{sent[i+1]}"]
                if i<len(sent)-2:
                           temp[u] += feature_dict[f"emission2:{ALL_TAGS[u]}+{sent[i+2]}"]
                           
                
            beta[v][i] = log_sum_exp(temp)
    return beta

In [108]:
def compute_score_part5(feature_dict, x, y):
    score = 0
    score += feature_dict[f"transition:START+{y[0]}+{x[0]}"] +\
                            feature_dict[f"transition2:START+{y[0]}"]

    
    #Original feature
    if f"emission3:{y[0]}+{x[0]}" in feature_dict:
        score += feature_dict[f"emission3:{y[0]}+{x[0]}"]

    if len(x)>1:
        if f"emission2:{y[0]}+{x[1]}" in feature_dict:
            score += feature_dict[f"emission2:{y[0]}+{x[1]}"]
    
    for i in range(1, len(x)):
        score += feature_dict[f"transition:{y[i-1]}+{y[i]}+{x[i]}"]

        #original feature emission
        if f"emission:{y[i]}+{x[i]}" in feature_dict:
            score += feature_dict[f"emission3:{y[i]}+{x[i]}"]

        #Original feature transmission
        score += feature_dict[f"transition2:{y[i-1]}+{y[i]}"]
        
        if f"emission1:{y[i]}+{x[i-1]}" in feature_dict:
            score += feature_dict[f"emission1:{y[i]}+{x[i-1]}"]

        if i<len(x)-1:
            if f"emission2:{y[i]}+{x[i+1]}" in feature_dict:
                score += feature_dict[f"emission2:{y[i]}+{x[i+1]}"]

    score += feature_dict[f"transition:{y[-1]}+STOP+{None}"]
    
    #original feature transmission STOP
    score += feature_dict[f"transition2:{y[-1]}+STOP"]
    return score

In [112]:
compute_score_part5(feature_dict,
              'Great food with an awesome atmosphere !'.split(),
              'O B-positive O O O B-positive O'.split())

-133.73742006205444

In [113]:
def compute_loss_part5(X_train, y_train, feature_dict):
    loss = 0

    for i in range(len(X_train)):
        gold_score = compute_score_part5(feature_dict, X_train[i], y_train[i])
        _, total_score = forward_part5(X_train[i], feature_dict)
        loss += gold_score - total_score
    return -loss

In [114]:
temp_loss = compute_loss_part5(x_train, y_train, feature_dict)
print('Loss with features from part 1:', temp_loss)

Loss with features from part 1: -147875.36879758135


In [115]:
def compute_soft_count_part5(sent, feature_dict, alpha, beta, score):
    result = defaultdict(int)
    for k in feature_dict:
        result[k] = 0
    
    # transition
    for i in range(all_tags_len):
        update = alpha[i][0] + beta[i][0] - score

        #original
        result[f"transition2:START+{ALL_TAGS[i]}"] += np.exp(update)

        result[f"transition:START+{ALL_TAGS[i]}+{sent[0]}"] += np.exp(update)
    
    for i in range(1, len(sent)):
        for u in range(all_tags_len):
            for v in range(all_tags_len):
                string2 = f"transition2:{ALL_TAGS[v]}+{ALL_TAGS[u]}"
                string = f"transition:{ALL_TAGS[v]}+{ALL_TAGS[u]}+{sent[i]}"
                update = alpha[v][i-1] \
                                + feature_dict[string] \
                                + feature_dict[f"emission1:{ALL_TAGS[u]}+{sent[i-1]}"] \
                                + beta[u][i] - score \
                                        + feature_dict[string2]
                if i<len(sent)-1:
                    update += feature_dict[f"emission2:{ALL_TAGS[u]}+{sent[i+1]}"]
                result[string] += np.exp(update)
                result[string2] += np.exp(update)

    for i in range(all_tags_len):
        update = alpha[i][-1] + beta[i][-1] - score
        result[f"transition:{ALL_TAGS[i]}+STOP+{None}"] += np.exp(update)
        result[f"transition2:{ALL_TAGS[i]}+STOP"] += np.exp(update)
    
    # emission
    for i in range(len(sent)):
        for j in range(all_tags_len):
            string = f"emission1:{ALL_TAGS[j]}+{sent[i-1]}"
            update = alpha[j][i] + beta[j][i] - score
            result[string] += np.exp(update)
    for i in range(len(sent)):
        for j in range(all_tags_len):
            if i < len(sent)-1:
                string = f"emission2:{ALL_TAGS[j]}+{sent[i+1]}"
                update = alpha[j][i] + beta[j][i] - score
            result[string] += np.exp(update)

    # original emission
    for i in range(len(sent)):
        for j in range(all_tags_len):
            string = f"emission3:{ALL_TAGS[j]}+{sent[i]}"
            update = alpha[j][i] + beta[j][i] - score
            result[string] += np.exp(update)
    return result

In [116]:
def compute_hard_count_part5(x, y, feature_dict):
    result = defaultdict(int)
    for k in feature_dict:
        result[k] = 0
    
    # start
    result[f"transition:START+{y[0]}+{x[0]}"] += 1
    if len(x)>1:
        result[f"emission2:{y[0]}+{x[1]}"] += 1
    
    # recursive
    for i in range(1, len(x)):
        result[f"transition:+{y[i-1]}+{y[i]}+{x[i]}"] += 1
        result[f"emission1:{y[i]}+{x[i-1]}"] += 1
        if i<len(x)-1:
            result[f"emission2:{y[i]}+{x[i+1]}"] += 1
    
    # end
    result[f"transition:{y[-1]}+STOP+{None}"] += 1



    #Original Features
     # start
    result[f"transition2:START+{y[0]}"] += 1
    result[f"emission3:{y[0]}+{x[0]}"] += 1
    
    # recursive
    for i in range(1, len(x)):
        result[f"transition2:+{y[i-1]}+{y[i]}"] += 1
        result[f"emission3:{y[i]}+{x[i]}"] += 1
    
    # end
    result[f"transition2:{y[-1]}+STOP"] += 1
    
    return result

In [117]:
def feat_index_mapping(feat_dict):
    feat_mapping = {}
    index_mapping = {}
    index = 0
    
    for key in feat_dict:
        feat_mapping[key] = index
        index_mapping[index] = key
        index += 1
        
    return feat_mapping, index_mapping

In [118]:
def compute_loss_and_grad_part5(x, y, feature_dict):
    grad_dict = defaultdict(int)
    for k in feature_dict:
        grad_dict[k] = 0
    
    loss = 0
    
    for i in range(len(x)):
        alpha, forward_score = forward_part5(x[i], feature_dict)
        beta = backward_part5(x[i], feature_dict)
        
        # loss
        expected_result = compute_score_part5(feature_dict, x[i], y[i])
        loss += expected_result - forward_score
        
        # gradient
        soft_dict = compute_soft_count_part5(x[i], feature_dict, alpha, beta, forward_score)
        hard_dict = compute_hard_count_part5(x[i], y[i], feature_dict)
        for feat in feature_dict:
            update = soft_dict[feat] - hard_dict[feat]
            grad_dict[feat] += update
    
    return -loss, grad_dict

In [119]:
def format_time(t):
    m, s = divmod(t, 60)
    h, m = divmod(m, 60)
    return f"{h:.0f}h {m:.0f}m {s:.2f}s"

def now():
    tz = pytz.timezone('Asia/Singapore')
    now = datetime.now(tz)
    return datetime.strftime(now, "%Y-%m-%d %H:%M:%S")

In [120]:
def get_loss_grad(w, *args):
    '''
    This function will be called by "fmin_l_bfgs_b"
    Arg:
    w: weights, numpy array
    Returns:
    loss: loss, float
    grads: gradients, numpy array
    '''
    start_time = time.time()
    grads = np.zeros(len(w))
    x, y  = args
    
    features_dict = {}
    for i in range(len(w)):
        features_dict[index_mapping[i]] = w[i]
        
    loss, grad_dict = compute_loss_and_grad_part5(x, y, features_dict)
    
    # loss with reg
    loss += LEARN_CO * np.sum(w**2)
    
    # grad with reg
    for key in grad_dict:
        grads[feat_mapping[key]] = grad_dict[key] + 2 * LEARN_CO * w[feat_mapping[key]]
    
    
    print(f"Now:\t\t{now()}")
    print(f"loss:\t\t{loss:.5f}")
    print(f"time taken for computing loss:\t{format_time(time.time() - start_time)}")
    print(f"overall time taken so far:\t{format_time(time.time() - OVERALL_START)}")
    print("\n")
    return loss, grads

In [123]:
feat_mapping, index_mapping = feat_index_mapping(feature_dict)

print("-"*20 + "Start training CRF" + "-"*20)
OVERALL_START = time.time()

opt_w, final_loss, _ = fmin_l_bfgs_b(get_loss_grad,
                 x0 = np.zeros(len(index_mapping)), #initial weight guess
                 pgtol = 0.1,
                 args = (x_train, y_train))

print(f"final loss:\t\t{final_loss}")

print("-"*20 + 'Training finished' + "-"*20)

TT = time.time() - OVERALL_START
print(f"TOTAL TIME TAKEN: {format_time(TT)}")

--------------------Start training CRF--------------------
Now:		2022-08-12 12:32:29
loss:		52775.02915
time taken for computing loss:	0h 14m 13.68s
overall time taken so far:	0h 14m 15.23s


Now:		2022-08-12 12:46:29
loss:		50169.75871
time taken for computing loss:	0h 14m 0.02s
overall time taken so far:	0h 28m 15.46s


Now:		2022-08-12 13:00:41
loss:		43394.44239
time taken for computing loss:	0h 14m 12.48s
overall time taken so far:	0h 42m 28.01s


Now:		2022-08-12 13:15:14
loss:		77099.17454
time taken for computing loss:	0h 14m 32.09s
overall time taken so far:	0h 57m 0.18s


Now:		2022-08-12 13:29:53
loss:		47592.07140
time taken for computing loss:	0h 14m 39.38s
overall time taken so far:	1h 11m 39.62s


Now:		2022-08-12 13:49:59
loss:		44006.43311
time taken for computing loss:	0h 20m 6.32s
overall time taken so far:	1h 31m 46.00s


Now:		2022-08-12 14:04:08
loss:		43488.11777
time taken for computing loss:	0h 14m 8.73s
overall time taken so far:	1h 45m 54.80s


Now:		2022-08-

In [151]:
pred_feat = {}
for i in range(len(opt_w)):
    pred_feat[index_mapping[i]] = opt_w[i]

x_dev = read_validation_file(DATA_FILE + '/dev.in')
y_dev = []

for i in range(len(x_dev)):
    output, _ = viterbi_part5(x_dev[i], pred_feat)
    y_dev.append(output[1:-1])

write_output(DATA_FILE + '/dev.p5.out', x_dev, y_dev)

# Part 5 Results
processed 3809 tokens with 44 phrases; found: 210 phrases; correct: 14.
<br />
accuracy:  50.00%; (non-O)
<br />
accuracy:  92.41%; precision:   6.67%; recall:  31.82%; FB1:  11.02
<br />
         negative: precision:   0.00%; recall:   0.00%; FB1:   0.00  65
         <br />
          neutral: precision:   0.00%; recall:   0.00%; FB1:   0.00  8<br />
         positive: precision:  10.22%; recall:  32.56%; FB1:  15.56  137<br />
((6.666666666666667, 31.818181818181817, 11.023622047244094), 0)<br />