# Kaggle Competition Development Notebook

### Import necessary libraries

In [292]:
import math
import pandas as pd
import numpy as np
import re
import time
import unicodedata as ud
import csv
import random
from sklearn.metrics import log_loss

# Import the vocab file

In [293]:
def create_vocab():
    with open('vocab.csv') as file:
        reader = csv.reader(file)
        vocab = list(reader)
    return vocab

### Prep training data for N Gram Model

In [345]:
def prepare_data(filename, N):
    pre_string = "<s>"
    post_string = "</s>"
    train_dat = []
        
    with open(filename, encoding="utf8") as file:
        for line in file:
            line = ud.normalize("NFC",line)
            line = re.sub('[,.?"“”]','',line)
            line = re.sub('\s+',' ',line)
            split_line = line.strip().split()
            for gram in range(1,N):
                split_line.insert(0,pre_string)
                split_line.append(post_string)
            train_dat.append(split_line)
    
    return train_dat

## Create a test example from a training example

In [295]:
def create_test_example(training_line,word):
    if " " + word[0] + " " in training_line:
        new_str = ' {'+word[0]+"|"+word[1] + '} '
        test_line = re.sub(" " + word[0] + " ",new_str, training_line,1)
        test_line = re.sub("<s>","",test_line)
        test_line = re.sub("</s>","",test_line)
        prob = 1
        return test_line.strip(),prob
    elif " " + word[1] + " " in training_line:
        new_str = ' {'+word[0]+"|"+word[1] + '} '
        test_line = re.sub(" " + word[1] + " ",new_str, training_line,1)
        test_line = re.sub("<s>","",test_line)
        test_line = re.sub("</s>","",test_line)
        prob = 0
        return test_line.strip(),prob
    else:
        print("Error, word not found")

## Determine which vocab word corresponds to location in the training data

In [296]:
def which_vocab(index):
    if index < 24200:
        return 0, 24200
    elif index < 48400:
        return 1, 24200
    elif index < 51290:
        return 2, 2890
    elif index < 75490:
        return 3, 24200
    elif index < 99690:
        return 4, 24200
    elif index < 123890:
        return 5, 24200
    elif index < 131159:
        return 6, 7269
    elif index < 155359:
        return 7, 24200
    elif index < 179559:
        return 8, 24200
    elif index < 203759:
        return 9, 24200
    elif index < 227959:
        return 10, 24200
    elif index < 252159:
        return 11, 24200
    elif index < 258227:
        return 12, 6068
    elif index < 282427:
        return 13, 24200
    elif index < 306627:
        return 14, 24200
    elif index < 310023:
        return 15, 3396
    elif index < 334223:
        return 16, 24200
    elif index < 358423:
        return 17, 24200
    elif index < 382623:
        return 18, 24200
    elif index < 406823:
        return 19, 24200
    elif index < 418928:
        return 20, 12105
    elif index < 430425:
        return 21, 11497
    elif index < 446988:
        return 22, 16563
    elif index < 452037:
        return 23, 5049
    elif index < 456571:
        return 24, 4534

## Split the data into training and development
#### Dev is saved to file, along with dev answers, new training array is returned

In [297]:
def train_dev_split(train_array, percent_split, vocab_df, dev_filename, answers_filename):
    dev_file = open(dev_filename,'w',encoding='utf8')
    dev_answers = open(answers_filename,'w',encoding='utf8')
    dev_answers.write("Id,Expected\n")
    new_train = []

    count = 1
    vocab_word = 0
    for index,training_line in enumerate(train_array):
        vocab_word, vocab_count = which_vocab(index)  
        if random.random() < percent_split/100:
            test_line, prob = create_test_example(" ".join(training_line),vocab_df[vocab_word])
            dev_answers.write(str(count) + "," + str(prob) + "\n")
            dev_file.write(test_line + "\n")
            count += 1
        else:
            new_train.append(training_line)
        
    dev_file.close()
    dev_answers.close()
    
    return new_train

### Actually create the dictionary with all n-grams

In [298]:
def train_model(train_data, N):
    model = {}
    for line in train_data:
        for each_N in range(1,N+1):
            #for each line, generate all ngrams
            for index in range(0,len(line)-each_N):
                ngram = line[index]
                for n_forward in range(1,each_N):
                    ngram += ' ' + line[index+n_forward]
                if ngram in model:
                    model[ngram] += 1
                else:
                    model[ngram] = 1
    return model            

### Extract N-gram choices from the test data

In [357]:
def prepare_test_data(filename, N):
    choices=[]
    reg_exp_str = ' \{(.*)\|(.*)\} '
    pre_string = ''
    post_string = ''
    for gram in range(1,N):
        pre_string += '<s> '
        post_string += ' </s>'
        if gram % 2 == 0:
            reg_exp_str = reg_exp_str + '([^ ]+) '
        else:
            reg_exp_str = ' ([^ ]+)' + reg_exp_str
    
    reg_exp = re.compile(reg_exp_str)
    with open(filename, encoding="utf8") as file:
        for row in file:
            row = ud.normalize("NFC",row)
            row = re.sub('[,.?"“”]','',row)
            row = re.sub('\s+',' ',row)
            row = pre_string + row.strip() + post_string
            #extract options
            match = reg_exp.search(row)
            if match:
                if N%2 ==0:
                    midpoint = math.ceil((N+2)/2)
                else:
                    midpoint = math.ceil(N/2)
                choice_1 = match.group(midpoint)
                #print(choice_1)
                choice_2 = match.group(midpoint+1)
                #print(choice_2)
                for match_group in range(midpoint+2,N+2):
                    choice_1 += ' ' + match.group(match_group)
                    choice_2 += ' ' + match.group(match_group)
                for match_group in range(midpoint-1,0,-1):
                    choice_1 = match.group(match_group) + ' ' + choice_1
                    choice_2 = match.group(match_group) + ' ' + choice_2
                choice = (choice_1, choice_2)
            else:
                print('error, no value')
                choice = 'error'
            choices.append(choice)
    return choices

In [347]:
choices =prepare_test_data('test.txt',3)

## Probability function to determine likelihood of option_1 and option_2

In [356]:
def backoff_probability(option_1,option_2,model):
    if option_1 in model:
        count_1 = model[option_1]
    else:
        count_1 = 0
        
    if option_2 in model:
        count_2 = model[option_2]
    else:
        count_2 = 0
    
    #if abs(count_1 - count_2) < 2:
    #if count_1 < 2 and count_2 < 2:
    if count_1 == 0 and count_2 == 0:
        split_1 = option_1.split()
        split_2 = option_2.split()
        if len(split_1) > 1 and len(split_2) > 1:
            if len(split_1) % 2 ==0:
                split_1.pop(0)
                split_2.pop(0)
            else:
                split_1.pop()
                split_2.pop()
            new_option_1 = " ".join(split_1)
            new_option_2 = " ".join(split_2)
            return backoff_probability(new_option_1,new_option_2,model)
            
    elif count_1 == 0:
        count_1 = .1
    elif count_2 == 0:
        count_2 = .1
        
    prob_1 = count_1/(count_1+count_2)
    prob_2 = count_2/(count_1+count_2)
    
    return prob_1, prob_2

In [301]:
def basic_probability(option_1,option_2,model):
    if option_1 in model:
        count_1 = model[option_1]
    else:
        count_1 = 0
        
    if option_2 in model:
        count_2 = model[option_2]
    else:
        count_2 = 0
        
    if count_1 == 0 and count_2 == 0:
        count_1 = 1
        count_2 = 1    
    elif count_1 == 0:
        count_1 = .1
    elif count_2 == 0:
        count_2 = .1
        
    prob_1 = count_1/(count_1+count_2)
    prob_2 = count_2/(count_1+count_2)
    
    return prob_1, prob_2

In [386]:
def interpolate_probability(option_1,option_2,model,weights):
    prob_1_arr=[]
    prob_2_arr=[]
    n_option_1 = option_1
    n_option_2 = option_2
    
    while True:
        # get basic probabilty and add to overall
        #c_prob_1, c_prob_2 = backoff_probability(n_option_1,n_option_2,model)
        c_prob_1, c_prob_2 = basic_probability(n_option_1,n_option_2,model)
        prob_1_arr.append(c_prob_1)
        prob_2_arr.append(c_prob_2)
        
        # split the options
        split_1 = n_option_1.split()
        split_2 = n_option_2.split()
        
        #get option lengths
        len_1 = len(split_1)
        len_2 = len(split_2)
        
        if len_1 <= 1 or len_2 <= 1:
            break
        
        if len(split_1) % 2 ==0:
            split_1.pop(0)
            split_2.pop(0)
        else:
            split_1.pop()
            split_2.pop()
        n_option_1 = " ".join(split_1)
        n_option_2 = " ".join(split_2)   
    
    #multiply probabilities by weights
    prob_1 = np.dot(weights,prob_1_arr)
    prob_2 = np.dot(weights,prob_2_arr)
    
    return prob_1, prob_2

In [352]:
start = time.time()
prob = interpolate_probability(choices[4937][0],choices[4937][1],model,[0.1,0.9,0])
end = time.time()
print(end-start, prob)

0.0004715919494628906 (0.0076335877862595426, 0.9923664122137406)


In [342]:
choices[4937]

('phlúr ban 100\u2005g', 'phlúr bán 100\u2005g')

In [305]:
#expected_runtime = (end-start)*20000
#expected_runtime/60

### Try Bigram Model

In [306]:
def evaluate_model(model, choices):
    results = [['Id','Expected']]
    for index, choice in enumerate(choices):
        c1,c2 = basic_probability(choice[0],choice[1],model)
        results.append([index+1,c1])
    return results

In [307]:
def evaluate_interp_model(model, choices, weights):
    results = [['Id','Expected']]
    for index, choice in enumerate(choices):
        c1,c2 = interpolate_probability(choice[0],choice[1],model, weights)
        results.append([index+1,c1])
    return results

In [308]:
results = evaluate_model(model,choices)

In [309]:
#results

In [310]:
def write_output(filename,results):
    out_file = open(filename,'w')
    count =0
    for line in results:
        output = str(line[0]) + "," + str(line[1]) + "\n"
        out_file.write(output)
    out_file.close()
    return 1
    

In [311]:
def evaluate_results(prediction_file,actual_file):
    with open(prediction_file) as file:
        reader = csv.reader(file)
        predictions = list(reader)
    
    with open(actual_file) as file:
        reader = csv.reader(file)
        actual = list(reader)
    
    if len(actual) != len(predictions):
        print("Error: Files not the same length")
        return
    
    actual = np.array(actual)
    actual = actual[1:,1].astype(np.float64)
    predictions = np.array(predictions)
    predictions = predictions[1:,1].astype(np.float64)
    
    log_loss_score = log_loss(actual, predictions, eps=1e-15)
        
    return log_loss_score


In [199]:
evaluate_results("First_Dev_Test.csv","dev_answers.csv")

0.8597526024197777

In [312]:
def full_pipeline(output_name,train,test,N):
    train_data = prepare_data(train,N)
    model = train_model(train_data,N)
    choices = prepare_test_data(test,N)
    results = evaluate_model(model,choices)
    write_output(output_name,results)

In [313]:
def full_interp_pipeline(output_name,train,test,N,weights):
    train_data = prepare_data(train,N)
    model = train_model(train_data,N)
    choices = prepare_test_data(test,N)
    results = evaluate_interp_model(model,choices,weights)
    write_output(output_name,results)

In [314]:
#full_pipeline("trigram_with_bold_reassignment.csv","train.txt","test.txt",3)

In [371]:
full_interp_pipeline("optimized_weight_pentagram.csv","train.txt","test.txt",5,[0.1, 0.1, 0.7, 0.1, 0])

In [223]:
def full_dev_pipeline(output_name,train,dev_test,dev_answers,N,percent_split,weights):
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    new_train = train_dev_split(train_data, percent_split, vocab, dev_test, dev_answers)
    model = train_model(new_train,N)
    choices = prepare_test_data(dev_test,N)
    results = evaluate_interp_model(model,choices,weights)
    success = write_output(output_name,results)
    score = evaluate_results(output_name,dev_answers)
    return score

In [353]:
score = full_dev_pipeline("First_Dev_Test.csv","train.txt","dev_test.txt","dev_answers.csv",6,10,[0.1, 0.1, 0.6, 0.1, 0.1, 0])
score

0.18732598935190142

In [354]:
for x in np.arange(0,1.1,0.1):
    if x == 1:
        weights = [x,0,0]
        print(weights,full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",3,10,weights))
    for y in np.arange (0,1.05-x,0.1):
        z = 1-y-x
        weights = [x,y,z]
        print(weights,full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",3,10,weights))

[0.0, 0.0, 1.0] 0.38062955591667336
[0.0, 0.1, 0.9] 0.34355854928712437
[0.0, 0.2, 0.8] 0.3136351846652727
[0.0, 0.30000000000000004, 0.7] 0.28957332646596795
[0.0, 0.4, 0.6] 0.269573376526344
[0.0, 0.5, 0.5] 0.25193587771123865
[0.0, 0.6000000000000001, 0.3999999999999999] 0.23895886886199058
[0.0, 0.7000000000000001, 0.29999999999999993] 0.22030778975511348
[0.0, 0.8, 0.19999999999999996] 0.20806457540148218
[0.0, 0.9, 0.09999999999999998] 0.20116295263874215
[0.0, 1.0, 0.0] 0.2017953799245475
[0.1, 0.0, 0.9] 0.3358309672764408
[0.1, 0.1, 0.8] 0.30924147987125666
[0.1, 0.2, 0.7000000000000001] 0.2851049845188644
[0.1, 0.30000000000000004, 0.6] 0.26665941676423005
[0.1, 0.4, 0.5] 0.24545792507676092
[0.1, 0.5, 0.4] 0.22822594324829476
[0.1, 0.6000000000000001, 0.29999999999999993] 0.22085445652429167
[0.1, 0.7000000000000001, 0.19999999999999993] 0.20644956598170813
[0.1, 0.8, 0.09999999999999995] 0.19299391606001973
[0.1, 0.9, -2.7755575615628914e-17] 0.18891790102385275
[0.2, 0.0, 0

KeyboardInterrupt: 

In [369]:
model,choices,output_name,dev_answers = full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",5,10)
for x in np.arange(0.1,1.1,0.1):
    if x == 1:
        weights = [x,0,0,0,0]
        print(weights, rest_of_dev(model,choices,output_name,dev_answers,weights))
    for y in np.arange (0,1.05-x,0.1):
        if y ==1:
            weights = [0,y,0,0,0]
            print(weights,rest_of_dev(model,choices,output_name,dev_answers,weights))
        for z in np.arange(0,1.05-x-y,0.1):
            if z ==1:
                weights = [0,0,z,0,0]
                print(weights,rest_of_dev(model,choices,output_name,dev_answers,weights))
            for a in np.arange(0,1.05-x-y-z,0.1):
                b = 1-x-y-z-a
                weights = [x,y,z,a,b]
                print(weights,rest_of_dev(model,choices,output_name,dev_answers,weights))

[0.1, 0.0, 0.0, 0.0, 0.9] 0.3376951047606046
[0.1, 0.0, 0.0, 0.1, 0.8] 0.3097278049747795
[0.1, 0.0, 0.0, 0.2, 0.7] 0.2859965479712128
[0.1, 0.0, 0.0, 0.30000000000000004, 0.6] 0.2652066200119652
[0.1, 0.0, 0.0, 0.4, 0.5] 0.24673296791638963
[0.1, 0.0, 0.0, 0.5, 0.4] 0.23024874405406434
[0.1, 0.0, 0.0, 0.6000000000000001, 0.29999999999999993] 0.21562056157990495
[0.1, 0.0, 0.0, 0.7000000000000001, 0.19999999999999996] 0.2029165785324049
[0.1, 0.0, 0.0, 0.8, 0.09999999999999998] 0.19259657164977567
[0.1, 0.0, 0.0, 0.9, 0.0] 0.1872542119189464
[0.1, 0.0, 0.1, 0.0, 0.8] 0.3048805181897841
[0.1, 0.0, 0.1, 0.1, 0.7000000000000001] 0.2812244523169554
[0.1, 0.0, 0.1, 0.2, 0.6000000000000001] 0.2604772955286064
[0.1, 0.0, 0.1, 0.30000000000000004, 0.5] 0.2420157870799788
[0.1, 0.0, 0.1, 0.4, 0.4] 0.2255141671006279
[0.1, 0.0, 0.1, 0.5, 0.30000000000000004] 0.21083719408998391
[0.1, 0.0, 0.1, 0.6000000000000001, 0.19999999999999996] 0.19804663654712276
[0.1, 0.0, 0.1, 0.7000000000000001, 0.0999

[0.1, 0.2, 0.4, 0.30000000000000004, -1.1102230246251565e-16] 0.1664452460802398
[0.1, 0.2, 0.5, 0.0, 0.19999999999999996] 0.1815789014841877
[0.1, 0.2, 0.5, 0.1, 0.09999999999999995] 0.17068251868335232
[0.1, 0.2, 0.5, 0.2, -5.551115123125783e-17] 0.16461245832316276
[0.1, 0.2, 0.6000000000000001, 0.0, 0.09999999999999987] 0.1693573828306782
[0.1, 0.2, 0.6000000000000001, 0.1, -1.3877787807814457e-16] 0.16331886898007902
[0.1, 0.2, 0.7000000000000001, 0.0, -1.1102230246251565e-16] 0.16320502267840836
[0.1, 0.30000000000000004, 0.0, 0.0, 0.6] 0.25470101336639234
[0.1, 0.30000000000000004, 0.0, 0.1, 0.5] 0.23610370366315586
[0.1, 0.30000000000000004, 0.0, 0.2, 0.39999999999999997] 0.21946809634807343
[0.1, 0.30000000000000004, 0.0, 0.30000000000000004, 0.29999999999999993] 0.2046358239235019
[0.1, 0.30000000000000004, 0.0, 0.4, 0.19999999999999996] 0.1916526394214109
[0.1, 0.30000000000000004, 0.0, 0.5, 0.09999999999999998] 0.18093538829322345
[0.1, 0.30000000000000004, 0.0, 0.600000000

[0.2, 0.0, 0.30000000000000004, 0.5, 0.0] 0.17135476804392552
[0.2, 0.0, 0.4, 0.0, 0.4] 0.21343788089536642
[0.2, 0.0, 0.4, 0.1, 0.30000000000000004] 0.1985407261778008
[0.2, 0.0, 0.4, 0.2, 0.2] 0.18550937205585824
[0.2, 0.0, 0.4, 0.30000000000000004, 0.09999999999999998] 0.17474614496974505
[0.2, 0.0, 0.4, 0.4, 0.0] 0.16868744000218375
[0.2, 0.0, 0.5, 0.0, 0.30000000000000004] 0.19640157637283717
[0.2, 0.0, 0.5, 0.1, 0.20000000000000004] 0.18329702031608086
[0.2, 0.0, 0.5, 0.2, 0.10000000000000003] 0.1724887698852276
[0.2, 0.0, 0.5, 0.30000000000000004, 0.0] 0.16642982652129437
[0.2, 0.0, 0.6000000000000001, 0.0, 0.19999999999999996] 0.1815210321418953
[0.2, 0.0, 0.6000000000000001, 0.1, 0.09999999999999995] 0.17063874197882642
[0.2, 0.0, 0.6000000000000001, 0.2, -5.551115123125783e-17] 0.1646015012704651
[0.2, 0.0, 0.7000000000000001, 0.0, 0.09999999999999998] 0.1693185487264801
[0.2, 0.0, 0.7000000000000001, 0.1, -2.7755575615628914e-17] 0.16331443685751393
[0.2, 0.0, 0.8, 0.0, 0.0]

[0.2, 0.6000000000000001, 0.0, 0.0, 0.19999999999999996] 0.18480265946548152
[0.2, 0.6000000000000001, 0.0, 0.1, 0.09999999999999995] 0.17374861201485522
[0.2, 0.6000000000000001, 0.0, 0.2, -5.551115123125783e-17] 0.16744843413981664
[0.2, 0.6000000000000001, 0.1, 0.0, 0.09999999999999995] 0.172385829291512
[0.2, 0.6000000000000001, 0.1, 0.1, -5.551115123125783e-17] 0.1660938769727161
[0.2, 0.6000000000000001, 0.2, 0.0, -5.551115123125783e-17] 0.16589742950164346
[0.2, 0.7000000000000001, 0.0, 0.0, 0.09999999999999998] 0.17318536051288203
[0.2, 0.7000000000000001, 0.0, 0.1, -2.7755575615628914e-17] 0.16686609336310576
[0.2, 0.7000000000000001, 0.1, 0.0, -2.7755575615628914e-17] 0.16665871741127794
[0.2, 0.8, 0.0, 0.0, 0.0] 0.16762417363697654
[0.30000000000000004, 0.0, 0.0, 0.0, 0.7] 0.2791063555691506
[0.30000000000000004, 0.0, 0.0, 0.1, 0.6] 0.2582486620157603
[0.30000000000000004, 0.0, 0.0, 0.2, 0.49999999999999994] 0.23968612075593937
[0.30000000000000004, 0.0, 0.0, 0.3000000000000

[0.30000000000000004, 0.30000000000000004, 0.1, 0.0, 0.29999999999999993] 0.1988000661179307
[0.30000000000000004, 0.30000000000000004, 0.1, 0.1, 0.19999999999999993] 0.18557408278268261
[0.30000000000000004, 0.30000000000000004, 0.1, 0.2, 0.09999999999999992] 0.17463355812202933
[0.30000000000000004, 0.30000000000000004, 0.1, 0.30000000000000004, -1.1102230246251565e-16] 0.168352781847416
[0.30000000000000004, 0.30000000000000004, 0.2, 0.0, 0.1999999999999999] 0.1837724769102146
[0.30000000000000004, 0.30000000000000004, 0.2, 0.1, 0.0999999999999999] 0.17275666720007085
[0.30000000000000004, 0.30000000000000004, 0.2, 0.2, -1.1102230246251565e-16] 0.16648501361704468
[0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.0, 0.09999999999999987] 0.17140318125922274
[0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.1, -1.3877787807814457e-16] 0.16515011463427287
[0.30000000000000004, 0.30000000000000004, 0.4, 0.0, -1.1102230246251565e-16] 0.16496281248643951
[0

[0.5, 0.0, 0.0, 0.0, 0.5] 0.23544859951652775
[0.5, 0.0, 0.0, 0.1, 0.4] 0.21866829160076487
[0.5, 0.0, 0.0, 0.2, 0.3] 0.20371160445663886
[0.5, 0.0, 0.0, 0.30000000000000004, 0.19999999999999996] 0.19060422369867178
[0.5, 0.0, 0.0, 0.4, 0.09999999999999998] 0.1797487270696324
[0.5, 0.0, 0.0, 0.5, 0.0] 0.17350485137648264
[0.5, 0.0, 0.1, 0.0, 0.4] 0.21615706875994364
[0.5, 0.0, 0.1, 0.1, 0.30000000000000004] 0.20113595410603058
[0.5, 0.0, 0.1, 0.2, 0.2] 0.18798200863786527
[0.5, 0.0, 0.1, 0.30000000000000004, 0.09999999999999998] 0.17708366825415714
[0.5, 0.0, 0.1, 0.4, 0.0] 0.17080585229597747
[0.5, 0.0, 0.2, 0.0, 0.3] 0.1989755046564229
[0.5, 0.0, 0.2, 0.1, 0.19999999999999998] 0.18574852985161766
[0.5, 0.0, 0.2, 0.2, 0.09999999999999998] 0.17480419081360188
[0.5, 0.0, 0.2, 0.30000000000000004, -5.551115123125783e-17] 0.16851726374280115
[0.5, 0.0, 0.30000000000000004, 0.0, 0.19999999999999996] 0.18394834048144043
[0.5, 0.0, 0.30000000000000004, 0.1, 0.09999999999999995] 0.17292958618

In [360]:
full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",4,10,[0.1,0,0,0.9])

0.33740994750617076

In [364]:
def half_dev_pipeline(output_name,train,dev_test,dev_answers,N,percent_split):
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    new_train = train_dev_split(train_data, percent_split, vocab, dev_test, dev_answers)
    model = train_model(new_train,N)
    choices = prepare_test_data(dev_test,N)
    return model,choices,output_name,dev_answers

In [366]:
def rest_of_dev(model,choices,output_name,dev_answers,weights):
    results = evaluate_interp_model(model,choices,weights)
    success = write_output(output_name,results)
    score = evaluate_results(output_name,dev_answers)
    return score

In [372]:
model,choices,output_name,dev_answers = full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",6,10)

In [402]:
weights = [0,0.0,.2,.3,.5,0]

In [403]:
rest_of_dev(model,choices,output_name,dev_answers,weights)

0.2518376139064784