In [59]:
import nltk
from nltk.tokenize import word_tokenize, RegexpTokenizer
import re
import copy
from collections import defaultdict , Counter
import math
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def custom_tokenize( text, must_have_textnum=True):
    # Regular expression pattern to match quoted text
    quote_pattern = r'\".*?\"|\`.*?\`|\'.*?\''
    
    # Find all quoted texts
    quoted_texts = nltk.re.findall(quote_pattern, text)

    # Replace quoted texts with placeholders to avoid re-tokenization
    placeholder = "QUOTE_PLACE_HOLDER"
    modified_text = nltk.re.sub(quote_pattern, placeholder, text)

    # Tokenize the modified text using word_tokenize
    tokens = word_tokenize(modified_text)

    # Replace placeholders with the original quoted texts
    final_tokens = []
    quote_index = 0
    for token in tokens:
        if token == placeholder:
            final_tokens.append(quoted_texts[quote_index])
            quote_index += 1
        else:
            final_tokens.append(token)

    return [s for s in final_tokens if re.search(r'[a-zA-Z0-9]', s)] if must_have_textnum else final_tokens

#calculating the similarity score of the two tokenized sentences.
def similarity_meassure( hypothesis_tokens , reference_tokens, in_table_keywords):

    hypothesis_counts = Counter(hypothesis_tokens)
    reference_counts = Counter(reference_tokens)
    clipped_counts = dict()

    for token in hypothesis_tokens:
        if token not in reference_counts.keys():
            reference_counts[token] = 0
        if token in in_table_keywords:
            clipped_counts[token] = min(hypothesis_counts[token], reference_counts[token])
            # print(f'{token}')
        else:
            clipped_counts[token] = min(hypothesis_counts[token], reference_counts[token])/2
    
    total_clipped = sum(clipped_counts.values())

    similarity = (total_clipped*2+0.1) / ( len(hypothesis_tokens) + len(reference_tokens) + 0.1 )

    return similarity

def ensemble( decoded_text_list , inputs_log_prob, batch_text, similarity_func='avg' ):# , past_key_value_tensor ):
    #function for performing ensemble using the bleu metric between the candidate sequences.
    #input:
        #inputs_ids: torch tensor representing the prompt tokens per component with shape (batch_size , input_len , num_beam)
        #inputs_log_prob: torch tensor representing the probability of each input_ids with the shape(batch_size , 1 , num_beam)
        #starting_batch_input_len: The length of the batch before any predictions.
        #extra_added_paddings: torch tensor with size (batch_size , 1 , num_beam) recording the token index of the last token of the prompt (where the answer starts)
        #batch_text: list of string with len()=number_components. Having the prompts for each components.
    #return:
        #ensembled_inputs_ids: torch tensor with size (batch_size , input_len , num_beam)
        #ensembled_inputs_log_prob: torch tensor with size (batch_size , 1 , num_beam)
    # print('extra_added_paddings before ensemble: ' , extra_added_paddings)
    length_penalty = 0.5
    table_creation_part_prompt = batch_text.split('database schema :')[-1]
    # print('table_creation_part_prompt: ', table_creation_part_prompt)
    in_table_keywords = custom_tokenize( table_creation_part_prompt.replace('.' , ' ')  )
    # print('in_table_keywords: ' , in_table_keywords)
    tokenized_responses = []
    for text in decoded_text_list: #number of candidates we have for each question
        tokenized_responses.append( custom_tokenize( text.replace('.' , ' ')))#, must_have_textnum=False ) )
        
    selection_score_list = [] #This scoring is used to select the best candidates. It uses the length penalty to calculate the scores

    tmp_input_log_prob = []
    for text, prob in zip(decoded_text_list , inputs_log_prob):
        gen_text_len = len( tokenizer(text, return_tensors="pt" , padding=True).input_ids )
        tmp_input_log_prob.append( prob/( gen_text_len ** length_penalty ) )

    #Calculating the score for each candidate
    for j in range( len( tokenized_responses ) ): #[tok_component1_beam1, tok_component2_beam1, tok_component3_beam1, ..., tok_component1_beam2, tok_component2_beam2 , ...]
        temp_tokenized_responses = copy.deepcopy( tokenized_responses )
        temp_query_list = decoded_text_list.copy()
        query = temp_query_list.pop(j)
        tokenized_response = temp_tokenized_responses.pop(j)
        selection_score = 0
        selection_denominator = 0
        for index , (other_tokenized_response, other_query) in enumerate( zip( temp_tokenized_responses , temp_query_list ) ):
            if index>=j:
                index+=1
    
            if query=='':
                query='***weirdanswer***'
            elif other_query=='':
                other_query='***weirdanswer***'
            
            jacc_weighted_sim = similarity_meassure( tokenized_response, other_tokenized_response, in_table_keywords )

            if similarity_func == 'jacc':
                similarity = math.log(jacc_weighted_sim)
            elif similarity_func == 'tree':
                tree_based_sim = sql_scoring.unparsed_query_similarity(query , other_query)
                if tree_based_sim == 0:
                    similarity = math.log(jacc_weighted_sim)
                else:
                    similarity = math.log( tree_based_sim )
            elif similarity_func == 'bleu':
                blue_score = sentence_bleu( word_tokenize( query.replace('.' , ' ') ) , word_tokenize( other_query.replace('.' , ' ') ) , smoothing_function = SmoothingFunction().method1)
                if blue_score == 0:
                    similarity = math.log(jacc_weighted_sim)
                else:
                    similarity = math.log( blue_score )
                    
            prob_with_lenpen = torch.tensor( tmp_input_log_prob[ index ] )
            selection_score_with_other_response = prob_with_lenpen + similarity
            
            if selection_score == 0:
                selection_score = selection_score_with_other_response
                selection_denominator = prob_with_lenpen
            else:
                alpha = max( selection_score_with_other_response , selection_score )
                beta = min( selection_score_with_other_response , selection_score )
                selection_score = alpha + torch.log1p(torch.exp(beta-alpha))

                alpha = max( prob_with_lenpen , selection_denominator )
                beta = min( prob_with_lenpen , selection_denominator )
                selection_denominator = alpha + torch.log1p(torch.exp(beta-alpha))

        selection_log_score = (selection_score/ selection_denominator + tmp_input_log_prob[ j ] )#/2
        selection_score_list.append(selection_log_score)

    max_bleu_score_value = max( selection_score_list )
    max_index_bleu_score = selection_score_list.index(max_bleu_score_value)
    
    return decoded_text_list[max_index_bleu_score]


# prompt_list
# responses_list
# probabilities_list
output_list = []
for index in range( len(responses_list[0]) ):
    candids = []
    probs = []
    prompt = prompt_list[index]
    for comp in range( len(responses_list) ):
        candids.append(responses_list[comp][index])
        probs.append(probabilities_list[comp][index])
    
    output_list.append( ensemble( candids , probs , prompt , similarity_func='bleu') )
        
with open('../final_output_CodeS_preprocess_ensemble.pkl' , 'wb') as f:
    pkl.dump(output_list , f)


In [44]:
from codes.utils.db_utils import check_sql_executability, detect_special_char

def get_candidates_from_dirList(dir_list):
    #returns:
        #inputs_log_probs_list: list of list of torch tensors: [  [q0_comp0, q1_comp0 , ...], ... , [q0_compN, q1_compN , ... ] ]
        #all_candidates : list of list of strings: [ [ [comp0_q0_beam0, ... , comp0_q0_beamM] , ... , [comp0_qN_beam0, ... , comp0_qN_beamM] ] , ... , [ [compK_q0_beam0, ... , comp0K_q0_beamM] , ... , [compK_qN_beam0, ... , compK_qN_beamM] ]  ] 
    input_ids_corss_component_list = []
    starting_batch_input_len_cross_comp_lst = []
    inputs_log_probs_list = []
    all_candidates = []
    for the_dir in dir_list:
        with open( the_dir + '/output_sequences_test_lenpen05_input_ids.pkl' , 'rb' ) as f:
            input_ids_corss_component_list.append( pkl.load(f) )
            
        with open( the_dir + '/output_sequences_test_lenpen05_starting_batch_input_len.pkl' , 'rb' ) as f:
            starting_batch_input_len_cross_comp_lst.append( pkl.load(f) )

        with open(the_dir + '/output_sequences_test_lenpen05_inputs_log_prob.pkl', 'rb') as f:
            inputs_log_probs_list.append( pkl.load(f) )
    
    for component in range( len(input_ids_corss_component_list) ):
        candidate_list = []
        for index in range( len( input_ids_corss_component_list[0] ) ):
            start = starting_batch_input_len_cross_comp_lst[component][index]
            the_tensor = input_ids_corss_component_list[component][index][start:, :].transpose(0 , 1)
            candidate_list.append( tokenizer.batch_decode( the_tensor , skip_special_tokens=True ) )
        all_candidates.append(candidate_list)
    return all_candidates, inputs_log_probs_list

def post_process(sql, schema_items):
    sql = sql.replace("\n", " ")
    for table in schema_items:
        for column_name in table["column_names"]:
            if detect_special_char(column_name) and column_name in sql:
                sql = sql.replace(column_name, "`"+column_name+"`")

    while "``" in sql:
        sql = sql.replace("``", "`")

    sql = sql.split(";")[0].strip() + ";"

    return sql

with open('./codes/eval_set.pkl' , 'rb')as f:
    eval_set = pkl.load(f)

def codeS_postProcess( generated_sqls , q_index ):
    eval_data = eval_set[q_index]
    generated_sqls = [ post_process( generated_sql, eval_data["schema"]["schema_items"] ) for generated_sql in generated_sqls ]
    return generated_sqls
    

In [1]:
#This code is written as a prerequisite for my scoring method in sequence-level fashion that has already have had the CodeS ensemble on the components.
#Getting the selected queries by CodeS and their probabilities. Storing them in pkl files
import json
output_file = 'myScoringMethod_treeBased_ens_after_CodeS_ens_5comp.pkl'
dir_list = [ '../bird_0_5_lenpen05' , '../bird_5_10_lenpen05' , '../bird_10_15_lenpen05' , '../bird_15_20_lenpen05' , '../bird_20_25_lenpen05' ]

tokenizer = AutoTokenizer.from_pretrained('seeklhy/codes-1b')

comp1 = '../codeS_pred/codes-1b_beam4_lenpen05_BIRD_table_num_5_column_num_6_5-shot_0_5_max_tokens_8192_max_new_tokens_256.json'
comp2 = '../codeS_pred/codes-1b_beam4_lenpen05_BIRD_table_num_5_column_num_6_5-shot_5_10_max_tokens_8192_max_new_tokens_256.json'
comp3 = '../codeS_pred/codes-1b_beam4_lenpen05_BIRD_table_num_5_column_num_6_5-shot_10_15_max_tokens_8192_max_new_tokens_256.json'
comp4 = '../codeS_pred/codes-1b_beam4_lenpen05_BIRD_table_num_5_column_num_6_5-shot_15_20_max_tokens_8192_max_new_tokens_256.json'
comp5 = '../codeS_pred/codes-1b_beam4_lenpen05_BIRD_table_num_5_column_num_6_5-shot_20_25_max_tokens_8192_max_new_tokens_256.json'

#Reading the selected questions from each component into responses_list
comp_list = [ comp1 , comp2 , comp3 , comp4 , comp5 ]
responses_list = [] # [ [comp0_q0, comp0q1, comp0_q2 , ...] , [comp1_q0, comp1_q1, ...] , ... ]
for comp in comp_list:
    responses = []
    with open(comp , 'r') as f:
        generated_response_file_byte = f.read()
        generated_response = json.loads(generated_response_file_byte)
        for item in generated_response['questions']:
            responses.append(item['response'])
    responses_list.append(responses)

probabilities_list = [ [] for comp in comp_list ] # We want to store the probability of each selected candidate in each component here.
each_comp_candids , inputs_log_probs_list = get_candidates_from_dirList(dir_list) #[ [ [comp0_q0_beam0, ... , comp0_q0_beamM] , ... , [comp0_qN_beam0, ... , comp0_qN_beamM] ] , ... , [ [compK_q0_beam0, ... , comp0K_q0_beamM] , ... , [compK_qN_beam0, ... , compK_qN_beamM] ]  ] 
num_comp = len(comp_list)
for comp_index in range(num_comp) :
    print('the component that we are processing: ' , comp_index)
    this_comp_selected_q = responses_list[comp_index]
    this_comp_candids = each_comp_candids[comp_index]
    this_comp_candid_probs = inputs_log_probs_list[comp_index]
    for q_index in range( len(this_comp_selected_q) ):
        print('The question we are processing: ', q_index)
        selected_q = this_comp_selected_q[q_index]
        last_beams = codeS_postProcess( this_comp_candids[q_index] , q_index)
        candid_probs = this_comp_candid_probs[q_index]
        found_any = False
        for beam_index in range(len(last_beams)):
            if selected_q == last_beams[beam_index]:
                probabilities_list[comp_index].append( candid_probs[0,beam_index].item() )
                found_any = True
                break
        if found_any==False:
            probabilities_list[comp_index].append(candid_probs[0,-1].item())
            print(selected_q)
            for i in last_beams:
                print(i)
            print('---------------------------------------------')

prompt_list = []
with open('../components/codes-1b_BIRD_table_num_5_column_num_6_5-shot_0-5_max_tokens_8192_max_new_tokens_256.json' , 'r') as f:
    generated_response_file_byte = f.read()
    generated_response = json.loads(generated_response_file_byte)
    for item in generated_response['questions']:
            prompt_list.append(item['prompt'])
        
with open('BIRD_CodeS_1-5_prompt_list.pkl' , 'wb') as f:
    pkl.dump(prompt_list , f)

with open('5comp_response_list.pkl' , 'wb') as f:
    pkl.dump(responses_list , f)

with open('5comp_probabilities_list.pkl' , 'wb') as f:
    pkl.dump(probabilities_list , f)


In [3]:
#BLEU score Sequence level ensemble that is ready to have the codeS ensemble after on the sorted candidates
#Sorting in the the following order: 0, 1, 2, 3, 4, 5, 6, ...
#input: the input_ids and starting_batch_input_len of the word-level generated components
#output: a list of strings of size 20*dataset_size ordered by the scoring method
from transformers import AutoTokenizer
import SQLtree_based_similarity as sql_scoring
import pickle as pkl
import torch
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import sentence_bleu

output_file = 'output_CodeS_beam5_lenpen05_BLEU_pureSeqlevelTreebased_OneSQLQwen-32b.pkl'
# dir_list = [ '../bird_0_5_lenpen05' , '../bird_5_10_lenpen05' , '../bird_10_15_lenpen05' , '../bird_15_20_lenpen05' , '../bird_20_25_lenpen05' ]
dir_list = [ '../output_codeS_beam5_bird_lenpen05_0-5shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_5-10shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_10-15shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_15-20shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_20-25shot_OneSQLQwen-32b' ]

# tokenizer = AutoTokenizer.from_pretrained('seeklhy/codes-1b')
tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-Coder-7B-Instruct')
# tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8')

input_ids_corss_component_list = []
starting_batch_input_len_cross_comp_lst = []
final_sorted_candidates = []
for the_dir in dir_list:
    # with open( the_dir + '/output_sequences_test_lenpen05_input_ids.pkl' , 'rb' ) as f:
    # with open( the_dir + '_input_ids.pkl' , 'rb' ) as f:
    with open( the_dir + '.pkl' , 'rb' ) as f:
        input_ids_corss_component_list.append( pkl.load(f) )
        
#     # with open( the_dir + '/output_sequences_test_lenpen05_starting_batch_input_len.pkl' , 'rb' ) as f:
#     with open( the_dir + '_starting_batch_input_len.pkl' , 'rb' ) as f:
#         starting_batch_input_len_cross_comp_lst.append( pkl.load(f) )
for index in range( 0, len(input_ids_corss_component_list[0]), 5 ):
    print('progress: ', index/5)
    candidate_list = []
    for component in range( len(input_ids_corss_component_list) ):
#         start = starting_batch_input_len_cross_comp_lst[component][index]
#         the_tensor = input_ids_corss_component_list[component][index][start:, :].transpose(0 , 1)
#         candidate_list.extend( tokenizer.batch_decode( the_tensor , skip_special_tokens=True ) )
        candidate_list.extend(input_ids_corss_component_list[component][index:index+5])
    
    tokenized_responses = []
    for text in candidate_list: #number of candidates we have for each question
        tokenized_responses.append( word_tokenize( text.replace('.' , ' ') ) )

    blue_scores = []
    #Calculating the bleu metrics for each candidate
    for j in range( len( tokenized_responses ) ): #[tok_component1_beam1, tok_component2_beam1, tok_component3_beam1, ..., tok_component1_beam2, tok_component2_beam2 , ...]
        temp_tokenized_responses = tokenized_responses.copy()
        tokenized_response = temp_tokenized_responses.pop(j)
        # Case 1
        blue_score = sentence_bleu( temp_tokenized_responses , tokenized_response )
        # Case 2
        # blue_score = 0
        # for i in temp_tokenized_responses:
        #     blue_score += sentence_bleu( [i] , tokenized_response )
        
        blue_scores.append(blue_score)
        
    sorted_queries = []
    for candid in range( len(candidate_list) ):
        max_bleu_score_value = max( blue_scores )
        max_index_bleu_score = blue_scores.index(max_bleu_score_value)
        blue_scores[ max_index_bleu_score ] = -100
        sorted_queries.append( candidate_list[ max_index_bleu_score ] )
        
    
    final_sorted_candidates.extend(sorted_queries)

with open(output_file , 'wb') as f:
    pkl.dump(final_sorted_candidates , f)


progress:  0.0
progress:  1.0
progress:  2.0
progress:  3.0
progress:  4.0
progress:  5.0
progress:  6.0
progress:  7.0
progress:  8.0
progress:  9.0
progress:  10.0
progress:  11.0
progress:  12.0
progress:  13.0
progress:  14.0
progress:  15.0
progress:  16.0
progress:  17.0
progress:  18.0
progress:  19.0
progress:  20.0
progress:  21.0
progress:  22.0
progress:  23.0
progress:  24.0
progress:  25.0
progress:  26.0
progress:  27.0
progress:  28.0
progress:  29.0
progress:  30.0
progress:  31.0
progress:  32.0
progress:  33.0
progress:  34.0
progress:  35.0
progress:  36.0
progress:  37.0
progress:  38.0
progress:  39.0
progress:  40.0
progress:  41.0
progress:  42.0
progress:  43.0


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


progress:  44.0
progress:  45.0
progress:  46.0
progress:  47.0
progress:  48.0
progress:  49.0
progress:  50.0
progress:  51.0
progress:  52.0
progress:  53.0
progress:  54.0
progress:  55.0
progress:  56.0
progress:  57.0
progress:  58.0
progress:  59.0
progress:  60.0
progress:  61.0
progress:  62.0
progress:  63.0
progress:  64.0
progress:  65.0
progress:  66.0
progress:  67.0
progress:  68.0
progress:  69.0
progress:  70.0
progress:  71.0
progress:  72.0
progress:  73.0
progress:  74.0
progress:  75.0
progress:  76.0
progress:  77.0
progress:  78.0
progress:  79.0
progress:  80.0
progress:  81.0
progress:  82.0
progress:  83.0
progress:  84.0
progress:  85.0
progress:  86.0
progress:  87.0
progress:  88.0
progress:  89.0
progress:  90.0
progress:  91.0
progress:  92.0
progress:  93.0
progress:  94.0
progress:  95.0
progress:  96.0
progress:  97.0
progress:  98.0
progress:  99.0
progress:  100.0
progress:  101.0
progress:  102.0
progress:  103.0
progress:  104.0
progress:  105.0
pr

In [4]:
#Tree-base Sequence level ensemble that is ready to have the codeS ensemble after on the sorted candidates
#input: the input_ids and starting_batch_input_len of the word-level generated components
#output: a list of strings of size 20*dataset_size ordered by the scoring method
from transformers import AutoTokenizer
import SQLtree_based_similarity as sql_scoring
import pickle as pkl
import torch

from utils.post_process import process_duplication
def post_process_get_sql_from_gentext(gen_text):
    # remove \n and extra spaces
    # print(gen_text)
    sql = " ".join(gen_text.replace("\n", " ").split())
    sql = process_duplication(sql)
    # python version should >= 3.8
    if sql.startswith("SELECT"):
        sql = sql
    elif sql.startswith(" "):
        sql = "SELECT" + sql
    else:
        sql = "SELECT " + sql
    return sql
    
output_file = 'output_CodeS_beam5_lenpen05_TreeBased_pureSeqlevelTreebased_OneSQLQwen-32b.pkl'
# dir_list = [ '../bird_0_5_lenpen05' , '../bird_5_10_lenpen05' , '../bird_10_15_lenpen05' , '../bird_15_20_lenpen05' , '../bird_20_25_lenpen05' ]
dir_list = [ '../output_codeS_beam5_bird_lenpen05_0-5shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_5-10shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_10-15shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_15-20shot_OneSQLQwen-32b' , '../output_codeS_beam5_bird_lenpen05_20-25shot_OneSQLQwen-32b' ]

# tokenizer = AutoTokenizer.from_pretrained('seeklhy/codes-1b')
# tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-Coder-7B-Instruct')
# tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8')

input_ids_corss_component_list = []
starting_batch_input_len_cross_comp_lst = []
final_sorted_candidates = []
for the_dir in dir_list:
    # with open( the_dir + '/output_sequences_test_lenpen05_input_ids.pkl' , 'rb' ) as f:
    # with open( the_dir + '_input_ids.pkl' , 'rb' ) as f:
    with open( the_dir + '.pkl' , 'rb' ) as f:
        input_ids_corss_component_list.append( pkl.load(f) )
        
    # with open( the_dir + '/output_sequences_test_lenpen05_starting_batch_input_len.pkl' , 'rb' ) as f:
    # with open( the_dir + '_starting_batch_input_len.pkl' , 'rb' ) as f:
    #     starting_batch_input_len_cross_comp_lst.append( pkl.load(f) )
for index in range( 0, len(input_ids_corss_component_list[0]), 5 ):
    print('progress: ', index)
    candidate_list = []
    for component in range( len(input_ids_corss_component_list) ):
        # start = starting_batch_input_len_cross_comp_lst[component][index]
        # the_tensor = input_ids_corss_component_list[component][index][start:, :].transpose(0 , 1)
        # candidate_list.extend( tokenizer.batch_decode( the_tensor , skip_special_tokens=True ) )
        candidate_list.extend(input_ids_corss_component_list[component][index:index+5])
    candidate_list_1=[]
    for c in candidate_list:
        candidate_list_1.append(post_process_get_sql_from_gentext(c))
    sorted_queries, _  = sql_scoring.sort_queries(candidate_list_1)
    
    final_sorted_candidates.extend(sorted_queries)
with open(output_file , 'wb') as f:
    pkl.dump(final_sorted_candidates , f)


progress:  0
progress:  5
progress:  10
progress:  15
progress:  20
progress:  25
progress:  30
progress:  35
progress:  40


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f3c78835670>>
Traceback (most recent call last):
  File "/home/yadegari/miniconda3/envs/yadegari_cpu/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


progress:  45
progress:  50
progress:  55
progress:  60
progress:  65
progress:  70
progress:  75
progress:  80
progress:  85
progress:  90
progress:  95
progress:  100
progress:  105
progress:  110
progress:  115
progress:  120
progress:  125
progress:  130
progress:  135
progress:  140
progress:  145
progress:  150
progress:  155
progress:  160
progress:  165
progress:  170
progress:  175
progress:  180
progress:  185
progress:  190
progress:  195
progress:  200
progress:  205
progress:  210
progress:  215
progress:  220
progress:  225
progress:  230
progress:  235
progress:  240
progress:  245
progress:  250
progress:  255
progress:  260
progress:  265
progress:  270
progress:  275
progress:  280
progress:  285
progress:  290
progress:  295
progress:  300
progress:  305
progress:  310
progress:  315
progress:  320
progress:  325
progress:  330
progress:  335
progress:  340
progress:  345
progress:  350
progress:  355
progress:  360
progress:  365
progress:  370
progress:  375
progre