# Kaggle Competition Development Notebook

### Import necessary libraries

In [173]:
import math
import pandas as pd
import numpy as np
import re
import time
import unicodedata as ud
import csv
import random
from sklearn.metrics import log_loss

# Import the vocab file

In [174]:
def create_vocab():
    with open('vocab.csv') as file:
        reader = csv.reader(file)
        vocab = list(reader)
    return vocab

### Prep training data for N Gram Model

In [175]:
def prepare_data(filename, N):
    pre_string = "<s>"
    post_string = "</s>"
    train_dat = []
        
    with open(filename, encoding="utf8") as file:
        for line in file:
            line = ud.normalize("NFC",line)
            line = re.sub('[,.?"“”]','',line)
            line = re.sub('\s+',' ',line)
            split_line = line.strip().split()
            for gram in range(1,N):
                split_line.insert(0,pre_string)
                split_line.append(post_string)
            train_dat.append(split_line)
    
    return train_dat

## Create a test example from a training example

In [176]:
def create_test_example(training_line,word):
    if " " + word[0] + " " in training_line:
        new_str = ' {'+word[0]+"|"+word[1] + '} '
        test_line = re.sub(" " + word[0] + " ",new_str, training_line,1)
        test_line = re.sub("<s>","",test_line)
        test_line = re.sub("</s>","",test_line)
        prob = 1
        return test_line.strip(),prob
    elif " " + word[1] + " " in training_line:
        new_str = ' {'+word[0]+"|"+word[1] + '} '
        test_line = re.sub(" " + word[1] + " ",new_str, training_line,1)
        test_line = re.sub("<s>","",test_line)
        test_line = re.sub("</s>","",test_line)
        prob = 0
        return test_line.strip(),prob
    else:
        print("Error, word not found")

## Determine which vocab word corresponds to location in the training data

In [177]:
def which_vocab(index):
    if index < 24200:
        return 0, 24200
    elif index < 48400:
        return 1, 24200
    elif index < 51290:
        return 2, 2890
    elif index < 75490:
        return 3, 24200
    elif index < 99690:
        return 4, 24200
    elif index < 123890:
        return 5, 24200
    elif index < 131159:
        return 6, 7269
    elif index < 155359:
        return 7, 24200
    elif index < 179559:
        return 8, 24200
    elif index < 203759:
        return 9, 24200
    elif index < 227959:
        return 10, 24200
    elif index < 252159:
        return 11, 24200
    elif index < 258227:
        return 12, 6068
    elif index < 282427:
        return 13, 24200
    elif index < 306627:
        return 14, 24200
    elif index < 310023:
        return 15, 3396
    elif index < 334223:
        return 16, 24200
    elif index < 358423:
        return 17, 24200
    elif index < 382623:
        return 18, 24200
    elif index < 406823:
        return 19, 24200
    elif index < 418928:
        return 20, 12105
    elif index < 430425:
        return 21, 11497
    elif index < 446988:
        return 22, 16563
    elif index < 452037:
        return 23, 5049
    elif index < 456571:
        return 24, 4534

## Split the data into training and development
#### Dev is saved to file, along with dev answers, new training array is returned

In [178]:
def train_dev_split(train_array, percent_split, vocab_df, dev_filename, answers_filename):
    dev_file = open(dev_filename,'w',encoding='utf8')
    dev_answers = open(answers_filename,'w',encoding='utf8')
    dev_answers.write("Id,Expected\n")
    new_train = []

    count = 1
    vocab_word = 0
    for index,training_line in enumerate(train_array):
        vocab_word, vocab_count = which_vocab(index)  
        if random.random() < percent_split/100:
            test_line, prob = create_test_example(" ".join(training_line),vocab_df[vocab_word])
            dev_answers.write(str(count) + "," + str(prob) + "\n")
            dev_file.write(test_line + "\n")
            count += 1
        else:
            new_train.append(training_line)
        
    dev_file.close()
    dev_answers.close()
    
    return new_train

### Actually create the dictionary with all n-grams

In [179]:
def train_model(train_data, N):
    model = {}
    for line in train_data:
        for each_N in range(1,N+1):
            #for each line, generate all ngrams
            for index in range(0,len(line)-each_N):
                ngram = line[index]
                for n_forward in range(1,each_N):
                    ngram += ' ' + line[index+n_forward]
                if ngram in model:
                    model[ngram] += 1
                else:
                    model[ngram] = 1
    return model            

### Extract N-gram choices from the test data

In [180]:
def prepare_test_data(filename, N):
    choices=[]
    reg_exp_str = ' \{(.*)\|(.*)\} '
    pre_string = ''
    post_string = ''
    for gram in range(1,N):
        pre_string += '<s> '
        post_string += ' </s>'
        if gram % 2 == 0:
            reg_exp_str = reg_exp_str + '([^ ]+) '
        else:
            reg_exp_str = ' ([^ ]+)' + reg_exp_str
    
    reg_exp = re.compile(reg_exp_str)
    with open(filename, encoding="utf8") as file:
        for row in file:
            row = ud.normalize("NFC",row)
            row = re.sub('[,.?"“”]','',row)
            row = re.sub('\s+',' ',row)
            row = pre_string + row.strip() + post_string
            #extract options
            match = reg_exp.search(row)
            if match:
                if N%2 ==0:
                    midpoint = math.ceil((N+2)/2)
                else:
                    midpoint = math.ceil(N/2)
                choice_1 = match.group(midpoint)
                #print(choice_1)
                choice_2 = match.group(midpoint+1)
                #print(choice_2)
                for match_group in range(midpoint+2,N+2):
                    choice_1 += ' ' + match.group(match_group)
                    choice_2 += ' ' + match.group(match_group)
                for match_group in range(midpoint-1,0,-1):
                    choice_1 = match.group(match_group) + ' ' + choice_1
                    choice_2 = match.group(match_group) + ' ' + choice_2
                choice = (choice_1, choice_2)
            else:
                print('error, no value')
                choice = 'error'
            choices.append(choice)
    return choices

In [181]:
choices =prepare_test_data('test.txt',3)

## Probability function to determine likelihood of option_1 and option_2

In [218]:
def backoff_probability(option_1,option_2,model):
    if option_1 in model:
        count_1 = model[option_1]
    else:
        count_1 = 0
        
    if option_2 in model:
        count_2 = model[option_2]
    else:
        count_2 = 0
    
    if abs(count_1 - count_2) < 2:
    #if count_1 < 2 and count_2 < 2:
    #if count_1 == 0 and count_2 == 0:
        split_1 = option_1.split()
        split_2 = option_2.split()
        if len(split_1) > 1 and len(split_2) > 1:
            if len(split_1) % 2 ==0:
                split_1.pop(0)
                split_2.pop(0)
            else:
                split_1.pop()
                split_2.pop()
            new_option_1 = " ".join(split_1)
            new_option_2 = " ".join(split_2)
            #return basic_probability(new_option_1,new_option_2,model)
            return backoff_probability(new_option_1,new_option_2,model)
            #return add_k_probability(new_option_1,new_option_2,model,.1,56951334,18881974)
            
    elif count_1 == 0:
        count_1 = .1
    elif count_2 == 0:
        count_2 = .1
        
    prob_1 = count_1/(count_1+count_2)
    prob_2 = count_2/(count_1+count_2)
    
    return prob_1, prob_2

In [183]:
def basic_probability(option_1,option_2,model):
    if option_1 in model:
        count_1 = model[option_1]
    else:
        count_1 = 0
        
    if option_2 in model:
        count_2 = model[option_2]
    else:
        count_2 = 0
        
    if count_1 == 0 and count_2 == 0:
        count_1 = 1
        count_2 = 1    
    elif count_1 == 0:
        count_1 = .1
    elif count_2 == 0:
        count_2 = .1
        
    prob_1 = count_1/(count_1+count_2)
    prob_2 = count_2/(count_1+count_2)
    
    return prob_1, prob_2

In [219]:
def weighted_probability(option_1,option_2,model,weight):
    if option_1 in model:
        count_1 = model[option_1]
    else:
        count_1 = 0
        
    if option_2 in model:
        count_2 = model[option_2]
    else:
        count_2 = 0
        
    if count_1 == 0 and count_2 == 0:
        count_1 = 1
        count_2 = 1    
    elif count_1 == 0:
        count_1 = .1
    elif count_2 == 0:
        count_2 = .1
        
    prob_1 = count_1/(count_1+count_2) *weight
    prob_2 = count_2/(count_1+count_2) *weight
    
    return prob_1, prob_2

In [84]:
# N1 = sum(1 for x in model.values() if x==1)
# N2 = sum(1 for x in model.values() if x==2)
# N3 = sum(1 for x in model.values() if x==3)
# N4 = sum(1 for x in model.values() if x==4)
# N5 = sum(1 for x in model.values() if x==5)
# print(str(N1)+' '+str(N2)+' '+str(N3)+' '+str(N4)+' '+str(N5))
# print(len(model))
# N = sum(model.values())
# print(N)

14747572 2366058 654353 305936 169488
18881974
56951334


In [86]:
#print((0+1)/(N+len(model)))

1.3186817592079723e-08


In [186]:
def interpolate_probability(option_1,option_2,model,weights):
    prob_1_arr=[]
    prob_2_arr=[]
    n_option_1 = option_1
    n_option_2 = option_2
    
    while True:
        # get basic probabilty and add to overall
        c_prob_1, c_prob_2 = backoff_probability(n_option_1,n_option_2,model)
        #c_prob_1, c_prob_2 = basic_probability(n_option_1,n_option_2,model)
        #c_prob_1, c_prob_2 = add_k_probability(n_option_1,n_option_2,model,.1,56951334,18881974)
        prob_1_arr.append(c_prob_1)
        prob_2_arr.append(c_prob_2)
        
        # split the options
        split_1 = n_option_1.split()
        split_2 = n_option_2.split()
        
        #get option lengths
        len_1 = len(split_1)
        len_2 = len(split_2)
        
        if len_1 <= 1 or len_2 <= 1:
            break
        
        if len(split_1) % 2 ==0:
            split_1.pop(0)
            split_2.pop(0)
        else:
            split_1.pop()
            split_2.pop()
        n_option_1 = " ".join(split_1)
        n_option_2 = " ".join(split_2)   
    
    #multiply probabilities by weights
    prob_1 = np.dot(weights,prob_1_arr)
    prob_2 = np.dot(weights,prob_2_arr)
    
    return prob_1, prob_2

In [187]:
def add_k_probability(option_1,option_2,model,k,N,V):
    n_option_1 = option_1
    n_option_2 = option_2
    
    if option_1 in model:
        count_1 = model[option_1]
    else:
        count_1 = 0
        
    if option_2 in model:
        count_2 = model[option_2]
    else:
        count_2 = 0
        
    count_1 = (count_1+k)*N/(N+k*V)
    count_2 = (count_2+k)*N/(N+k*V)
        
    prob_1 = count_1/(count_1+count_2)
    prob_2 = count_2/(count_1+count_2)
    
    return prob_1, prob_2

In [189]:
start = time.time()
prob = interpolate_probability(choices[4937][0],choices[4937][1],model,[.1,.9,0])
end = time.time()
print(end-start, prob)

0.00047898292541503906 (0.0076335877862595426, 0.9923664122137406)


In [305]:
#expected_runtime = (end-start)*20000
#expected_runtime/60

# Try AdaBoost

In [190]:
def ada_boost(train):
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    new_train = train_dev_split(train_data, percent_split, vocab, dev_test, dev_answers)
    choices = prepare_test_data(dev_test,N)
    
    results_array = []
    #initialize weights
    weights = []
    for x in range(0,len(train)):
        weights.append(1/len(train))
        
    for trial in range(0,K):
        #train unigram with weights
        model = train_model(new_train,N)
        results = evaluate_weighted_model(model,choices,weights)
        results_array.append(results)
        success = write_output(output_name,results)
        score = evaluate_results(output_name,dev_answers)
        adaptive_param = 0.5*log((1-score)/score)
        weights = 1/Z * weights * exp(adaptive_param)
        
    
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    new_train = train_dev_split(train_data, percent_split, vocab, dev_test, dev_answers)
    model = train_model(new_train,N)
    choices = prepare_test_data(dev_test,N)
    results = evaluate_interp_model(model,choices,weights)
    success = write_output(output_name,results)
    score = evaluate_results(output_name,dev_answers)
        

SyntaxError: invalid syntax (<ipython-input-190-8f08a6c7b847>, line 21)

### Try Bigram Model

In [191]:
def evaluate_model(model, choices):
    results = [['Id','Expected']]
    for index, choice in enumerate(choices):
        c1,c2 = basic_probability(choice[0],choice[1],model)
        results.append([index+1,c1])
    return results

In [192]:
def evaluate_interp_model(model, choices, weights):
    results = [['Id','Expected']]
    for index, choice in enumerate(choices):
        c1,c2 = interpolate_probability(choice[0],choice[1],model, weights)
        results.append([index+1,c1])
    return results

In [193]:
def evaulate_weighted_model(model, choices, weights):
    results = [['Id','Expected']]
    for index, choice in enumerate(choices):
        c1,c2 = weighted_probability(choice[0],choice[1],model, weights[index])
        results.append([index+1,c1])
    return results

In [308]:
#results = evaluate_weighted_model(model,choices,)

In [309]:
#results

In [194]:
def write_output(filename,results):
    out_file = open(filename,'w')
    count =0
    for line in results:
        output = str(line[0]) + "," + str(line[1]) + "\n"
        out_file.write(output)
    out_file.close()
    return 1 

In [195]:
def evaluate_results(prediction_file,actual_file):
    with open(prediction_file) as file:
        reader = csv.reader(file)
        predictions = list(reader)
    
    with open(actual_file) as file:
        reader = csv.reader(file)
        actual = list(reader)
    
    if len(actual) != len(predictions):
        print("Error: Files not the same length")
        return
    
    actual = np.array(actual)
    actual = actual[1:,1].astype(np.float64)
    predictions = np.array(predictions)
    predictions = predictions[1:,1].astype(np.float64)
    
    log_loss_score = log_loss(actual, predictions, eps=1e-15)
        
    return log_loss_score

In [199]:
evaluate_results("First_Dev_Test.csv","dev_answers.csv")

0.8597526024197777

In [196]:
def full_pipeline(output_name,train,test,N):
    train_data = prepare_data(train,N)
    model = train_model(train_data,N)
    choices = prepare_test_data(test,N)
    results = evaluate_model(model,choices)
    write_output(output_name,results)

In [197]:
def full_interp_pipeline(output_name,train,test,N,weights):
    train_data = prepare_data(train,N)
    model = train_model(train_data,N)
    choices = prepare_test_data(test,N)
    results = evaluate_interp_model(model,choices,weights)
    write_output(output_name,results)

In [198]:
#full_pipeline("trigram_with_bold_reassignment.csv","train.txt","test.txt",3)

In [199]:
full_interp_pipeline("optimized_weight_pentagram.csv","train.txt","test.txt",5,[0.1, 0.1, 0.7, 0.1, 0])

In [216]:
def full_dev_pipeline(output_name,train,dev_test,dev_answers,N,percent_split,weights):
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    new_train = train_dev_split(train_data, percent_split, vocab, dev_test, dev_answers)
    model = train_model(new_train,N)
    choices = prepare_test_data(dev_test,N)
    results = evaluate_interp_model(model,choices,weights)
    success = write_output(output_name,results)
    score = evaluate_results(output_name,dev_answers)
    return score

In [201]:
score = full_dev_pipeline("First_Dev_Test.csv","train.txt","dev_test.txt","dev_answers.csv",4,10,[0.2, 0.7, 0.1, 0])

KeyboardInterrupt: 

In [354]:
for x in np.arange(0,1.1,0.1):
    if x == 1:
        weights = [x,0,0]
        print(weights,full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",3,10,weights))
    for y in np.arange (0,1.05-x,0.1):
        z = 1-y-x
        weights = [x,y,z]
        print(weights,full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",3,10,weights))

[0.0, 0.0, 1.0] 0.38062955591667336
[0.0, 0.1, 0.9] 0.34355854928712437
[0.0, 0.2, 0.8] 0.3136351846652727
[0.0, 0.30000000000000004, 0.7] 0.28957332646596795
[0.0, 0.4, 0.6] 0.269573376526344
[0.0, 0.5, 0.5] 0.25193587771123865
[0.0, 0.6000000000000001, 0.3999999999999999] 0.23895886886199058
[0.0, 0.7000000000000001, 0.29999999999999993] 0.22030778975511348
[0.0, 0.8, 0.19999999999999996] 0.20806457540148218
[0.0, 0.9, 0.09999999999999998] 0.20116295263874215
[0.0, 1.0, 0.0] 0.2017953799245475
[0.1, 0.0, 0.9] 0.3358309672764408
[0.1, 0.1, 0.8] 0.30924147987125666
[0.1, 0.2, 0.7000000000000001] 0.2851049845188644
[0.1, 0.30000000000000004, 0.6] 0.26665941676423005
[0.1, 0.4, 0.5] 0.24545792507676092
[0.1, 0.5, 0.4] 0.22822594324829476
[0.1, 0.6000000000000001, 0.29999999999999993] 0.22085445652429167
[0.1, 0.7000000000000001, 0.19999999999999993] 0.20644956598170813
[0.1, 0.8, 0.09999999999999995] 0.19299391606001973
[0.1, 0.9, -2.7755575615628914e-17] 0.18891790102385275
[0.2, 0.0, 0

KeyboardInterrupt: 

In [217]:
model,choices,output_name,dev_answers = half_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",5,10)
for x in np.arange(0.1,1.1,0.1):
    if x == 1:
        weights = [x,0,0,0,0]
        print(weights, rest_of_dev(model,choices,output_name,dev_answers,weights))
    for y in np.arange (0,1.05-x,0.1):
        if y ==1:
            weights = [0,y,0,0,0]
            print(weights,rest_of_dev(model,choices,output_name,dev_answers,weights))
        for z in np.arange(0,1.05-x-y,0.1):
            if z ==1:
                weights = [0,0,z,0,0]
                print(weights,rest_of_dev(model,choices,output_name,dev_answers,weights))
            for a in np.arange(0,1.05-x-y-z,0.1):
                b = 1-x-y-z-a
                weights = [x,y,z,a,b]
                print(weights,rest_of_dev(model,choices,output_name,dev_answers,weights))

[0.1, 0.0, 0.0, 0.0, 0.9] 0.3592849652183154
[0.1, 0.0, 0.0, 0.1, 0.8] 0.3311490991789249
[0.1, 0.0, 0.0, 0.2, 0.7] 0.30760038048317145
[0.1, 0.0, 0.0, 0.30000000000000004, 0.6] 0.2869986481387449
[0.1, 0.0, 0.0, 0.4, 0.5] 0.26861367988811213
[0.1, 0.0, 0.0, 0.5, 0.4] 0.25205284103686126
[0.1, 0.0, 0.0, 0.6000000000000001, 0.29999999999999993] 0.23710540323869186
[0.1, 0.0, 0.0, 0.7000000000000001, 0.19999999999999996] 0.2237025565816232
[0.1, 0.0, 0.0, 0.8, 0.09999999999999998] 0.21196771460937552
[0.1, 0.0, 0.0, 0.9, 0.0] 0.20269885529067716
[0.1, 0.0, 0.1, 0.0, 0.8] 0.32671077433535384
[0.1, 0.0, 0.1, 0.1, 0.7000000000000001] 0.30329969582707855
[0.1, 0.0, 0.1, 0.2, 0.6000000000000001] 0.28278767939907595
[0.1, 0.0, 0.1, 0.30000000000000004, 0.5] 0.2644590076198142
[0.1, 0.0, 0.1, 0.4, 0.4] 0.24792750612404726
[0.1, 0.0, 0.1, 0.5, 0.30000000000000004] 0.2329846290390802
[0.1, 0.0, 0.1, 0.6000000000000001, 0.19999999999999996] 0.2195603756291684
[0.1, 0.0, 0.1, 0.7000000000000001, 0.

KeyboardInterrupt: 

In [360]:
full_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",4,10,[0.2,0.7,0.1,0])

0.33740994750617076

In [202]:
def half_dev_pipeline(output_name,train,dev_test,dev_answers,N,percent_split):
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    new_train = train_dev_split(train_data, percent_split, vocab, dev_test, dev_answers)
    model = train_model(new_train,N)
    choices = prepare_test_data(dev_test,N)
    return model,choices,output_name,dev_answers

In [203]:
def rest_of_dev(model,choices,output_name,dev_answers,weights):
    results = evaluate_interp_model(model,choices,weights)
    success = write_output(output_name,results)
    score = evaluate_results(output_name,dev_answers)
    return score

In [211]:
model,choices,output_name,dev_answers = half_dev_pipeline("a.csv","train.txt","dev_test.txt","dev_answers.csv",6,10)

In [212]:
weights = [0,0.0,.2,.3,.5,0]

In [213]:
rest_of_dev(model,choices,output_name,dev_answers,weights)

0.18820252692236433

In [207]:
def bootstrap_resample(train_data):
    train_size = len(train_data)
    new_train = []
    for x in range(0,len(train_data)):
        new_train.append(train_data[math.floor(random.random()*train_size)])
    return new_train

In [None]:
def bagging_test(output_name,train,dev_test,dev_answers,N,percent_split,weights,bags):
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    new_train = train_dev_split(train_data, percent_split, vocab, dev_test, dev_answers)
    overall_results_array = []
    mean_array = [['ID','Expected']]
    choices = prepare_test_data(dev_test,N)
    for bag in range(0,bags):
        bootstrapped_train = bootstrap_resample(new_train)
        model = train_model(bootstrapped_train,N)
        results = evaluate_interp_model(model,choices,weights)
        overall_results_array.append(results)
    for x in range(1,len(results)):
        mean = 0
        for result_array in overall_results_array:
            mean += result_array[x][1]
        mean = mean/len(overall_results_array)
        mean_array.append([x,mean])
    success = write_output(output_name,mean_array)
    score = evaluate_results(output_name,dev_answers)
    return score

In [168]:
score = bagging_test("a.csv","train.txt","dev_test.txt","dev_answers.csv",4,10,[0.1,0.1,0.7,0.1,0],5)

In [169]:
score

0.2821542034186898

In [230]:
def bagging_real(output_name,train,test,N,weights,bags):
    train_data = prepare_data(train,N)
    vocab = create_vocab()
    overall_results_array = []
    mean_array = [['ID','Expected']]
    choices = prepare_test_data(test,N)
    for bag in range(0,bags):
        print(bag)
        bootstrapped_train = bootstrap_resample(train_data)
        model = train_model(bootstrapped_train,N)
        results = evaluate_interp_model(model,choices,weights)
        overall_results_array.append(results)
    for x in range(1,len(results)):
        mean = 0
        for result_array in overall_results_array:
            mean += result_array[x][1]
        mean = mean/len(overall_results_array)
        mean_array.append([x,mean])
    success = write_output(output_name,mean_array)
    return output_name

In [231]:
bagging_real("bagging_quad.csv","train.txt","test.txt",4,[0.2,0.7,0.1,0],100)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18


KeyboardInterrupt: 

In [232]:
def compile_best_solutions(names):
    all_predictions = []
    mean_array = [['ID','Expected']]
    pred_len = 0
    for name in names:
        with open(name) as file:
            reader = csv.reader(file)
            predictions = list(reader)
#         predictions = np.array(predictions)
#         predictions = predictions[1:,1].astype(np.float64)
            all_predictions.append(predictions)
            pred_len = len(predictions)
    for x in range(1,pred_len):
        mean = 0
        for ind,pred_list in enumerate(all_predictions):
            mean += float(pred_list[x][1])
        mean = mean/len(all_predictions)
        mean_array.append([x,mean])
    
    return mean_array

In [233]:
names = ['optimized_weight_pentagram.csv','optimized_weight_quadgram.csv','combined_best_tries.csv','a_very_bold_trigram.csv','optimized_weight_trigram.csv']

In [234]:
results = compile_best_solutions(names)

In [235]:
results

[['ID', 'Expected'],
 [1, 0.9856907036585907],
 [2, 0.9255309037074027],
 [3, 0.9486942831189358],
 [4, 0.9423442487264475],
 [5, 0.9551780992875495],
 [6, 0.9995605248642334],
 [7, 0.9420145810372812],
 [8, 0.9858172528653165],
 [9, 0.981202333203405],
 [10, 0.9403607662719293],
 [11, 0.9838988638171461],
 [12, 0.9200377409624592],
 [13, 0.943513327239148],
 [14, 0.9200377409624592],
 [15, 0.9890406849571379],
 [16, 0.9903641428012957],
 [17, 0.9759193572212551],
 [18, 0.06812538536833639],
 [19, 0.9200377409624592],
 [20, 0.9918216872016232],
 [21, 0.9814221444825801],
 [22, 0.9806368642719546],
 [23, 0.9682182883253733],
 [24, 0.9964997376749836],
 [25, 0.9935656578753773],
 [26, 0.5556659158923358],
 [27, 0.9768688187152149],
 [28, 0.9725596173133069],
 [29, 0.924915656241529],
 [30, 0.9906986234614548],
 [31, 0.9898722200990934],
 [32, 0.914658229795603],
 [33, 0.9617844560759921],
 [34, 0.9974307335974298],
 [35, 0.9925418678431808],
 [36, 0.9744834110926149],
 [37, 0.99613016948

In [236]:
write_output('combined_best_tries__no_reassignment.csv',results)

1