In [100]:
import numpy as np 
import os
import conlleval

train_dir = "dataset/train"
test_dir = "dataset/dev.in"


START_STATE_KEY = "START"
STOP_STATE_KEY = "STOP"

LARGE_NEG = -2**52

In [101]:
def tokenize(file_path):  
    data, lst = [], []
    with open(file_path, 'r') as f:  
        for line in f:
            if line== '\n':
                data.append(lst)
                lst = []    
            else:
                lines = line.replace("\n",'').split(" ")
                lst.append(tuple(lines))
    return data

train_sentences = tokenize(train_dir)
print(train_sentences[:5])      


[[('All', 'O'), ('in', 'O'), ('all', 'O'), (',', 'O'), ('the', 'O'), ('food', 'B-positive'), ('was', 'O'), ('great', 'O'), ('(', 'O'), ('except', 'O'), ('for', 'O'), ('the', 'O'), ('dessserts', 'B-negative'), (')', 'O'), ('.', 'O')], [('I', 'O'), ('have', 'O'), ('NEVER', 'O'), ('been', 'O'), ('disappointed', 'O'), ('in', 'O'), ('the', 'O'), ('Red', 'B-positive'), ('Eye', 'I-positive'), ('.', 'O')], [('Great', 'O'), ('food', 'B-positive'), ('with', 'O'), ('an', 'O'), ('awesome', 'O'), ('atmosphere', 'B-positive'), ('!', 'O')], [('The', 'O'), ('sangria', 'B-positive'), ('was', 'O'), ('pretty', 'O'), ('tasty', 'O'), ('and', 'O'), ('good', 'O'), ('on', 'O'), ('a', 'O'), ('hot', 'O'), ('muggy', 'O'), ('day', 'O'), ('.', 'O')], [('Also', 'O'), (',', 'O'), ('waiters', 'B-negative'), ('try', 'O'), ('to', 'O'), ('push', 'O'), ('more', 'O'), ('food', 'O'), ('on', 'O'), ('you', 'O'), (',', 'O'), ('like', 'O'), ('suggest', 'O'), ('things', 'O'), ('as', 'O'), ('if', 'O'), ('they', 'O'), ('are', 'O'

# PART 1
## 1i)

In [102]:

def MLE_emission_parameters(train_sentences):
    ''' Calculates the emission parameters by count(y->x)/count(y)
    
    :param train_sentences: our train file tokenised sentences
    :type train_sentences: list(tuple())

    :return count_y_dict: Count of labels 
    :rtype: dict()

    :return count_y_to_x_dict: Count of words and labels
    :rtype: dict()

    :param emission_dict: value of Count(labels->words)/Count(labels), keys are tuples of word and label ('emission: O+All', -9.01768561), value MLE
    :rtype: dict

    '''

    count_y_dict = {}
    count_y_to_x_dict = {}
    emission_dict = {}

    for sentence in train_sentences:
        for x_y_pair in sentence:
            word, label = x_y_pair
            if label in count_y_dict:
                count_y_dict[label] = count_y_dict.get(label) + 1
            else:
                count_y_dict[label] = 1
            if (word,label) in count_y_to_x_dict:
                count_y_to_x_dict[(word,label)] = count_y_to_x_dict.get((word,label)) + 1
            else:
                count_y_to_x_dict[(word,label)] = 1
    # print("count(y): \n", count_y_dict, "\n")
    # print("count(y->x): \n",list(count_y_to_x_dict.items())[0:5], len(count_y_to_x_dict), "\n")
    # Calculate our emission
    for key, value in count_y_to_x_dict.items(): # Default is iterate keys()
        word = key[0]
        label = key[1]
        string = f"emission: {label}+{word}" 
        prob =  value / count_y_dict.get(label)
        emission_dict[string] = float(np.where(prob != 0, np.log(prob), 0))
    # print("MLE: \n",list(emission_dict.items())[0:5],len(emission_dict) ,"\n")

    return count_y_dict, count_y_to_x_dict, emission_dict

In [103]:
count_y_dict, count_y_to_x_dict, emission_dict = MLE_emission_parameters(train_sentences)
print(list(emission_dict.items())[0:5])

[('emission: O+All', -9.017685611042436), ('emission: O+in', -4.54034879656423), ('emission: O+all', -5.785564559424215), ('emission: O+,', -3.24728344904484), ('emission: O+the', -3.0916488692569097)]


## 1ii)

In [104]:
def  MLE_transition_parameters(train_dir, emission_dict):
    ''' Calculates the transition parameters by count(y->y-1)/count(y)

    :param train_dir: our train file
    :type train_sentences: str

    :param emission_dict: Count(y->x)/Count(y), keys are tuples of word and label ('emission: O+All', -9.01768561), value MLE
    :type emission_dict: dict()

    :return count_y_to_y_dict: Count of labels and previous label
    :rtype: dict()

    :return emission_transition_dict: value of Count(labels->words)/Count(labels) for emission and Count(prev_labels->labels)/Count(labels) for transmission, keys are tuples of word and label ('emission: O+All', -9.01768561), value MLE
    :rtype: dict()
    '''
    count_y_dict = {}
    count_y_to_y_dict = {}
    prev_label = ""

    with open(train_dir, "r", encoding="utf8") as f:
        for line in f:
            # Parse each line
            if len(line.split(" ")) == 2:
                _, label = line.replace("\n","").split(" ")
            else:
                label = ''
            if label == '' and prev_label != '':
                count_y_dict[STOP_STATE_KEY] = count_y_dict.get(STOP_STATE_KEY) + 1 if count_y_dict.get(STOP_STATE_KEY) else 1
            elif label !='':
                if prev_label == '':
                    count_y_dict[START_STATE_KEY] = count_y_dict.get(START_STATE_KEY) + 1 if count_y_dict.get(START_STATE_KEY) else 1
                if label in count_y_dict:
                    count_y_dict[label] = count_y_dict.get(label)+1
                else:
                    count_y_dict[label] = 1
            if prev_label == '' and label != '':
                if (START_STATE_KEY, label) in count_y_to_y_dict:
                    count_y_to_y_dict[(START_STATE_KEY, label)] = count_y_to_y_dict.get((START_STATE_KEY, label)) + 1
                else:
                    count_y_to_y_dict[(START_STATE_KEY, label)] = 1
            elif label == '' and prev_label != '':
                if (prev_label, STOP_STATE_KEY) in count_y_to_y_dict:
                    count_y_to_y_dict[(prev_label, STOP_STATE_KEY)] = count_y_to_y_dict.get((prev_label, STOP_STATE_KEY)) + 1
                else:
                    count_y_to_y_dict[(prev_label, STOP_STATE_KEY)] = 1
            elif label != '' and prev_label != '':
                if (prev_label, label) in count_y_to_y_dict:
                    count_y_to_y_dict[(prev_label, label)] = count_y_to_y_dict.get((prev_label, label)) + 1
                else:
                    count_y_to_y_dict[(prev_label, label)] = 1
            prev_label = label
    # Calculate our transition
    for key, value in count_y_to_y_dict.items(): # Default is iterate keys()
        prev_label = key[0]
        label = key[1]
        string = f"transition: {prev_label}+{label}" 
        prob =  value / count_y_dict.get(prev_label)
        emission_dict[string] = float(np.where(prob != 0, np.log(prob), 0))
    # print("MLE: \n",list(emission_dict.items()), len(emission_dict) ,"\n")
    emission_transition_dict = emission_dict

    return count_y_to_y_dict, emission_transition_dict

In [105]:
count_y_to_y_dict, emission_transition_dict = MLE_transition_parameters(train_dir, emission_dict)
print(list(emission_transition_dict.items())[:5])
print(list(emission_transition_dict.items())[-5:])

[('emission: O+All', -9.017685611042436), ('emission: O+in', -4.54034879656423), ('emission: O+all', -5.785564559424215), ('emission: O+,', -3.24728344904484), ('emission: O+the', -3.0916488692569097)]
[('transition: B-positive+STOP', -4.252688120309395), ('transition: I-positive+STOP', -4.564348191467836), ('transition: B-positive+B-positive', -7.0859014643656115), ('transition: B-neutral+STOP', -3.245193133185574), ('transition: B-negative+STOP', -4.836281906951478)]


# Part 2

## 2i)

In [106]:
def score(sentence, emission_transition_dict):
    ''' Calculates the score with of a given pair based on emission and transmission features
    
    :param sentences: our  file tokenised sentences
    :type sentences: list(tuple())

    :return emission_transition_dict: value of Count(labels->words)/Count(labels) for emission and Count(prev_labels->labels)/Count(labels) for transmission, keys are tuples of word and label ('emission: O+All', -9.01768561), value MLE
    :type emission_transition_dict: dict()

    :param score: our emission score + transition score for sentence
    :type sentences: float
    '''
    score = 0
    emission_score = 0 
    transition_score = 0
    x_seq = [x[0] for x in sentence]
    y_seq = [START_STATE_KEY]+[y[1] for y in sentence]+[STOP_STATE_KEY]
    
    for i in range(len(x_seq)):
        label = y_seq[i+1]
        word = x_seq[i]
        key = f"emission: {label}+{word}" 
        emission_score += emission_transition_dict[key]
    for j in range(1, len(y_seq)):
        prev_label = y_seq[j-1]
        label = y_seq[j]
        key = f"transition: {prev_label}+{label}" 
        transition_score += emission_transition_dict[key]
    score = emission_score + transition_score
    return score



In [107]:
score(train_sentences[0],emission_transition_dict)

-85.52845366888094

## 2ii)

In [108]:
test_sentences = tokenize(test_dir)
print(test_sentences[:5])   

[[('Loved',), ('it',)], [('The',), ('music',), ('playing',), ('was',), ('very',), ('hip',), (',',), ('20-30',), ('something',), ('pop',), ('music',), (',',), ('but',), ('the',), ('subwoofer',), ('to',), ('the',), ('sound',), ('system',), ('was',), ('located',), ('under',), ('my',), ('seat',), (',',), ('which',), ('became',), ('annoying',), ('midway',), ('through',), ('dinner',), ('.',)], [('This',), ('place',), ('has',), ('ruined',), ('me',), ('for',), ('neighborhood',), ('sushi',), ('.',)], [('I',), ('have',), ('never',), ('eaten',), ('in',), ('the',), ('restaurant',), (',',), ('however',), (',',), ('upon',), ('reading',), ('the',), ('reviews',), ('I',), ('got',), ('take',), ('out',), ('last',), ('week',), ('.',)], [('It',), ("isn't",), ('the',), ('cheapest',), ('sushi',), ('but',), ('has',), ('been',), ('worth',), ('it',), ('every',), ('time',), ('.',)]]


In [109]:
def viterbi_algo(test_sentences, count_y_dict, emission_transition_dict):
    ''' Decoding process that finds greedily finds the best possible labels from past MLE scores, saves file to output folder
    
    :param test_sentences: our file tokenised sentences
    :type test_sentences: list(tuple())

    :param count_y_dict: Count of labels 
    :param count_y_dict: dict()

    :param emission_transition_dict: value of Count(labels->words)/Count(labels) for emission and Count(prev_labels->labels)/Count(labels) for transmission, keys are tuples of word and label ('emission: O+All', -9.01768561), value MLE
    :param emission_transition_dict: dict()
    '''
    
    pi = [{}]
    path = {}
    labels = count_y_dict.keys()
    os.makedirs('output',exist_ok=True)

    with open('output/dev.p2.out', "w") as outfile:
        for sentence in test_sentences:
            # j = 0 (START)
            for label in labels:
                pi[0][label] = emission_transition_dict.get(f"transition: {'START'}+{label}",LARGE_NEG) + emission_transition_dict.get(f"emission: {label}+{sentence[0][0]}",LARGE_NEG)
                path[label] = [label]
            # j = 1 to N-1
            for idx in range(1,len(sentence)):
                pi.append({})
                newpath = {}
                for label_y in labels:
                    (prob, label) = max([(pi[idx-1][prev_label] + emission_transition_dict.get(f"transition: {prev_label}+{label_y}",LARGE_NEG) + emission_transition_dict.get(f"emission: {label_y}+{sentence[idx][0]}",LARGE_NEG), prev_label) 
                                    for prev_label in labels])
                    pi[idx][label_y] = prob
                    newpath[label_y] = path[label] + [label_y]
                path = newpath
            # j = N (STOP)
            idx = len(sentence)
            (prob, label) = max([(pi[idx-1][label_y] + emission_transition_dict.get(f"transition: {label_y}+{'STOP'}", LARGE_NEG), label_y) for label_y in labels])
            
            # handle inconsistent length
            if len(sentence) != len(path[label]):
                print(len(sentence),len(path[label]))
                raise Exception("{} has a different lenght with {}".format(sentence, path[label]))
            
            # write to file
            for i in range(len(sentence)):
                line = "{} {}\n".format(sentence[i][0], path[label][i])
                outfile.write(line)
                
            outfile.write("\n")

In [110]:
viterbi_algo(test_sentences, count_y_dict, emission_transition_dict)

# Evaluation of dev.p2.out

In [111]:
prediction_dir = 'output/dev.p2.out'
truth_dir = 'dataset/dev.out'

def evaluate_results(truth_dir,prediction_dir):
    predictions = []
    prediction_sentences = tokenize(prediction_dir)
    for sentence in prediction_sentences:
        for word_pair in sentence:
            predictions.append(word_pair[1])     
    lines = """"""
    idx = 0
    with open(truth_dir, "r", encoding="utf8") as tstr:
        for line in tstr:
            if len(line) > 1:
                newline = line.replace("\n",f" {predictions[idx]}\n")
                lines += newline
                idx += 1
            else:
                lines += "\n"
    return lines.splitlines()

lines = evaluate_results(truth_dir,prediction_dir)
res = conlleval.evaluate(lines)
print(conlleval.report(res))

processed 3809 tokens with 210 phrases; found: 132 phrases; correct: 63.
accuracy:  93.23%; precision:  47.73%; recall:  30.00%; FB1:  36.84
         negative: precision:  35.29%; recall:   9.23%; FB1:  14.63  17
          neutral: precision:  20.00%; recall:  12.50%; FB1:  15.38  5
         positive: precision:  50.91%; recall:  40.88%; FB1:  45.34  110



# Part 3

In [114]:
def logSumExp(a):
    max = np.max(a)
    sumOfExp = np.exp(a - max).sum()
    return max + np.log(sumOfExp)

def forward_algorithm(sentence, count_y_dict, emission_transition_dict):    
    pi = [{}]
    labels = count_y_dict.keys()
    # j = 0 (START)
    for label in labels:
        pi[0][label] = emission_transition_dict.get(f"transition: {'START'}+{label}",LARGE_NEG) + emission_transition_dict.get(f"emission: {label}+{sentence[0][0]}",LARGE_NEG)

    # j = 1 to N-1
    for idx in range(1,len(sentence)):
        pi.append({})

        for label in labels:
            log_a = []
            for prev_label in labels:
                log_a.append(pi[idx-1][prev_label] + emission_transition_dict.get(f"transition: {prev_label}+{label}",LARGE_NEG) + emission_transition_dict.get(f"emission: {label}+{sentence[idx][0]}",LARGE_NEG))
            pi[idx][label] = logSumExp(log_a)
            
    # j = N (STOP)
    idx = len(sentence)
    log_a = []
    for label in labels:
        log_a.append(pi[idx-1][label] + emission_transition_dict.get(f"transition: {label}+{'STOP'}", LARGE_NEG))
    return pi, logSumExp(log_a)

forward_algorithm(train_sentences[0], count_y_dict, emission_transition_dict)

([{'O': -9.079345204990318,
   'B-positive': -4503599627370499.0,
   'B-negative': -4503599627370501.0,
   'I-positive': -9007199254740992,
   'B-neutral': -4503599627370501.0,
   'I-neutral': -9007199254740992,
   'I-negative': -9007199254740992},
  {'O': -13.766622979615455,
   'B-positive': -4503599627370508.0,
   'B-negative': -4503599627370509.0,
   'I-positive': -4503599627370506.0,
   'B-neutral': -4503599627370511.0,
   'I-neutral': -9007199254741000.0,
   'I-negative': -4503599627370507.0},
  {'O': -19.699116517100578,
   'B-positive': -23.953109923204384,
   'B-negative': -23.93728208854489,
   'I-positive': -9007199254741004.0,
   'B-neutral': -4503599627370516.0,
   'I-neutral': -9007199254741006.0,
   'I-negative': -9007199254741004.0},
  {'O': -23.068893163028275,
   'B-positive': -4503599627370519.0,
   'B-negative': -4503599627370520.0,
   'I-positive': -30.46462428231426,
   'B-neutral': -4503599627370522.0,
   'I-neutral': -9007199254741012.0,
   'I-negative': -450359

In [115]:
def loss_fn(sentences, count_y_dict, emission_transition_dict):
    loss = 0
    for sent in sentences:
        loss+= score(sent, emission_transition_dict)
        _, update = forward_algorithm(sent, count_y_dict, emission_transition_dict)
        loss-= update
    return (-1)*loss
loss_fn(train_sentences, count_y_dict, emission_transition_dict)

2050.74053383538

In [116]:
def backward_algorithm(sentence, count_y_dict, emission_transition_dict):    
    pi = [{} for i in range(len(sentence))]
    labels = count_y_dict.keys()

    # j = N (STOP)
    for label in labels:
        pi[len(sentence)-1][label] = emission_transition_dict.get(f"transition: {label}+{'STOP'}", LARGE_NEG)

    # j = N-1 to 1 
    for idx in range(len(sentence)-1,0,-1):
        for label in labels:
            log_b = []
            for next_label in labels:
                log_b.append(pi[idx][next_label] + emission_transition_dict.get(f"transition: {label}+{next_label}",LARGE_NEG) + emission_transition_dict.get(f"emission: {next_label}+{sentence[idx][0]}",LARGE_NEG))
            pi[idx-1][label] = logSumExp(log_b)

    # j = 0 (START)
    log_b = []
    for label in labels:
        log_b.append(pi[0][label] + emission_transition_dict.get(f"transition: {'START'}+{label}",LARGE_NEG) + emission_transition_dict.get(f"emission: {label}+{sentence[0][0]}",LARGE_NEG))
    
    return pi, logSumExp(log_b)

In [117]:
backward_algorithm(train_sentences[0], count_y_dict, emission_transition_dict)

([{'O': -75.79247295073526,
   'B-positive': -75.96013210037187,
   'B-negative': -75.65394924920902,
   'I-positive': -76.11520236719656,
   'B-neutral': -75.87847553035473,
   'I-neutral': -76.29947044008102,
   'I-negative': -75.64641026599291},
  {'O': -71.10519517611013,
   'B-positive': -71.34020585812414,
   'B-negative': -71.21765957710079,
   'I-positive': -71.52518608374271,
   'B-neutral': -71.21614051255226,
   'I-neutral': -71.63713542227855,
   'I-negative': -71.49666270104414},
  {'O': -65.19764439544767,
   'B-positive': -65.36383891214948,
   'B-negative': -65.28516603961566,
   'I-positive': -65.51792969991837,
   'B-neutral': -65.28364697506713,
   'I-neutral': -65.70464188479342,
   'I-negative': -65.56416916355901},
  {'O': -61.80343196834192,
   'B-positive': -61.890165966505435,
   'B-negative': -61.831706821012546,
   'I-positive': -61.994494944318916,
   'B-neutral': -61.889434547961386,
   'I-neutral': -62.310429457687675,
   'I-negative': -62.020553032988104}

In [127]:
def forward_backward_algorithm(sentence, count_y_dict, emission_transition_dict):
    feature_expectation = {}
    labels = count_y_dict.keys()
    fwd_pi, fwd_score = forward_algorithm(sentence, count_y_dict, emission_transition_dict)
    bkd_pi, bkd_score = backward_algorithm(sentence, count_y_dict, emission_transition_dict)
        
    # idx = 1
    for label in labels:
        string_transition = f"transition: {'START'}+{label}"
        string_emission = f"emission: {label}+{sentence[0][0]}" 

        # update transition features
        update = bkd_pi[0][label] \
                + emission_transition_dict.get(string_transition,0) \
                + emission_transition_dict.get(string_emission,0) \
                - fwd_score
         
        feature_expectation[string_transition] = feature_expectation.get(string_transition,0) + np.exp(update)
        
        # update emission features
        update = fwd_pi[0][label] + bkd_pi[0][label] - fwd_score
        feature_expectation[string_emission] = feature_expectation.get(string_emission,0) + np.exp(update)
        
    # idx = 2 to N-1
    for idx in range(1,len(sentence)):
        for label in labels:
            string_emission = f"emission: {label}+{sentence[idx][0]}" 

            # update transition features
            for prev_label in labels:
                string_transition = f"transition: {prev_label}+{label}" 
                update = fwd_pi[idx-1][prev_label] \
                        + bkd_pi[idx][label] \
                        + emission_transition_dict.get(string_transition,0) \
                        + emission_transition_dict.get(string_emission,0) \
                        - fwd_score
                feature_expectation[string_transition] = feature_expectation.get(string_transition,0) + np.exp(update)

            # update emission features
            update = fwd_pi[idx][label] + bkd_pi[idx][label] - fwd_score      
            feature_expectation[string_emission] = feature_expectation.get(string_emission,0) + np.exp(update)
                
    # idx = N (STOP)
    idx = len(sentence)
    for label in labels:
        # update transition features
        string_transition = f"transition: {label}+{'STOP'}" 
        update = fwd_pi[idx-1][label] + emission_transition_dict.get(string_transition,0) - fwd_score
        feature_expectation[string_transition] = feature_expectation.get(string_transition,0) + np.exp(update)
        
    return feature_expectation
                
forward_backward_algorithm(train_sentences[0], count_y_dict, emission_transition_dict)

{'transition: START+O': 1.0000000000000284,
 'emission: O+All': 1.0000000000000284,
 'transition: START+B-positive': 316.846043142975,
 'emission: B-positive+All': 0.0,
 'transition: START+B-negative': 107.58750082314015,
 'emission: B-negative+All': 0.0,
 'transition: START+I-positive': 6352.576696321845,
 'emission: I-positive+All': 0.0,
 'transition: START+B-neutral': 51.57064280610568,
 'emission: B-neutral+All': 0.0,
 'transition: START+I-neutral': 5283.519442066207,
 'emission: I-neutral+All': 0.0,
 'transition: START+I-negative': 10151.816050862699,
 'emission: I-negative+All': 0.0,
 'transition: O+O': 24628.90013534531,
 'transition: B-positive+O': 0.5428700135957362,
 'transition: B-negative+O': 1.185388749994202,
 'transition: I-positive+O': 0.0005068471541609425,
 'transition: B-neutral+O': 0.04993841289642202,
 'transition: I-neutral+O': 0.0,
 'transition: I-negative+O': 0.0,
 'emission: O+in': 1.0000000000000284,
 'transition: O+B-positive': 2533.9752118767406,
 'transitio

In [119]:
def get_features(sentences, count_y_dict, emission_transition_dict):
    features = {}
    for sent in sentences:
        expect = forward_backward_algorithm(sent, count_y_dict, emission_transition_dict)
        for k,v in expect.items():
            features[k] = features.get(k,0) + v
    return features

In [125]:
def mapping_fn(emission_transition_dict):
    index_mapping = {}
    for idx, value in enumerate(emission_transition_dict): 
        first = value.split(" ")[1].split("+")[0]
        second = value.split(" ")[1].split("+")[1]
        index_mapping[idx] = (first,second)
    return index_mapping

index_map = mapping_fn(emission_transition_dict)

In [97]:
def compute_grad(sentences, count_y_dict, count_y_to_x_dict, count_y_to_y_dict, emission_transition_dict):
    labels = count_y_dict.keys()
    features = get_features(sentences, count_y_dict, emission_transition_dict)
    print(len(features))
    index_map = mapping_fn(count_y_to_x_dict,count_y_to_y_dict)
    print(len(index_map))

    grad_lst = np.zeros(len(emission_transition_dict),)
    for i in range(len(features)):
        if index_map[i] in count_y_to_x_dict:
            grad_lst[i] += (features.get(i,0) - count_y_to_x_dict[index_map[i]])
        elif index_map[i] in count_y_to_y_dict:
            grad_lst[i] += (features.get(i,0) - count_y_to_y_dict[index_map[i]])
        else:
            grad_lst[i] += features.get(i,0)
    return grad_lst

len(compute_grad(train_sentences, count_y_dict, count_y_to_x_dict, count_y_to_y_dict, emission_transition_dict))

27881
4602


KeyError: 4602

# PART 4

In [128]:
def loss_with_reg(w, sentences, count_y_dict, emission_transition_dict, n = 0.1):
    loss = loss_fn(sentences, count_y_dict, emission_transition_dict)
    # regularization
    loss += n*sum(w1*w1 for w1 in w)
    return loss

In [129]:
def grad_with_reg(w, sentences, count_y_dict, count_y_to_x_dict, count_y_to_y_dict, emission_transition_dict, n = 0.1):
    grad_lst = compute_grad(sentences, count_y_dict, count_y_to_x_dict, count_y_to_y_dict, emission_transition_dict)
    for i in range(len(w)):
        grad_lst[i] += w[i]*2*n
    return grad_lst

In [49]:
import time
from scipy.optimize import fmin_l_bfgs_b
def callbackF(w):
    '''
    This function will only be called by "fmin_l_bfgs_b"
    Arg:
    w: weights, numpy array
    '''
    loss = get_loss_grad(w)[0]
    print('Loss:{0:.4f}'.format(loss))
def get_loss_grad(w):
    '''
    This function will only be called by "fmin_l_bfgs_b"
    Arg:
    w: weights, numpy array
    Returns:
    loss: loss, float
    grads: gradients, numpy array
    '''
    # to be completed by you,
    # based on the modified loss and gradients,
    # with L2 regularization included
    return loss, grads
init_w = np.asarray([0 for i in range(len(list(index_map.keys())))])
result = fmin_l_bfgs_b(get_loss_grad, init_w,
pgtol=0.01, callback=callbackF)



def get_loss_grad(w):
    '''
    This function will be called by "fmin_l_bfgs_b"
    Arg:
    w: weights, numpy array
    Returns:
    loss: loss, float
    grads: gradients, numpy array
    '''
    # to be completed by you
    start = time.time()
    emi, trans = {}, {}
    for i in range(len(list(index_mapping.keys()))): #27899
        if i< len(EN_train_emission): #27818
            emi[index_mapping[i]] = w[i]
        else:
            trans[index_mapping[i]] = w[i]
    
    feat_exp = get_feat_exp(EN_train_sentences, EN_all_tags, trans, emi)
    # change feat_exp to list
    ft = []
    for i in range(len(list(index_mapping.keys()))):
        ft.append(feat_exp[index_mapping[i]])
    
    loss = loss_with_reg(w, EN_train_sentences, trans, emi, EN_all_tags, n = 0.1)
    print('loss: '+ str(loss))
    
    grad_lst = grad_with_reg(ft, w, trans, emi, EN_all_tags, n = 0.1)
    #print('grad_lst: '+ str(grad_lst))
    grads = np.asarray(list(grad_lst)) 
    
    print(' time taken: '+ str(time.time()-start) +' total time: '+ str(time.time()-total_start))
    
    return loss, grads
  
  
init_w = np.asarray([0 for i in range(len(list(index_mapping.keys())))])
result = fmin_l_bfgs_b(get_loss_grad, init_w, pgtol=0.01)

NameError: name 'index_mapping' is not defined