In [None]:
import sys
sys.path.append('../')
import pandas as pd
from operator import add
from functools import reduce
from sklearn.metrics import classification_report
import os
from collections import Counter
import gzip
import shutil
from tqdm import tqdm

In [None]:
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=False)


In [None]:
import re 
def clean(text):
    p = re.compile('"|,|\[|\]|')
    cleaned = p.sub('',text)
    cleaned= cleaned.replace("'", "")
    return cleaned.split()

In [None]:
def model_ranking(model_scores):
    model_scores = sorted(model_scores.items(), key=lambda x:x[1])
    return {key_value[0]:rank for rank,key_value in enumerate(model_scores)}

In [None]:
def ensemble_majority_rank(model_tags_map, scores):
    
    tags = list(model_tags_map.values())
#     model_tags_map = x.to_dict()
    scores = model_ranking(scores)
    votes = {}
    for model, tag in model_tags_map.items():
        try:
            votes[tag] += scores[model]
        except:
            votes[tag] = scores[model]
    
    mx = 0
    winner = 'O'
    for key, val in votes.items():
        if val>mx:
            winner = key
            mx = val
        
    return winner

In [None]:
def ensemble_majority_weighted(model_tags_map, scores):
    
    tags = list(model_tags_map.values())
    
    votes = {}
    for model, tag in model_tags_map.items():
        try:
            votes[tag] += scores[model]
        except:
            votes[tag] = scores[model]
    
    mx = 0
    winner = 'O'
    for key, val in votes.items():
        if val>mx:
            winner = key
            mx = val
        
    return winner

In [None]:
def ensemble_majority(x, scores):
    
#     print(x)
    tags = list(x.values())
    votes = Counter(tags)
    mx = 0
    winner = 'O'
    for key, val in votes.items():
        if val>mx:
            winner = key
            mx = val
        
    return winner

In [None]:
def combine(x, func ,scores):
    model_tags_map = x.to_dict()
    length = len(list(model_tags_map.values())[0])
    output = []
    for i in range(length):
        preds = {model:model_tags_map[model][i] for model in model_tags_map.keys()}
        output.append(func(preds, scores))
        
    return output

In [None]:
def merge_df(lang, eval_set):
    
    dir = f'./Test-Dev results/{lang}/{eval_set}'
    files = os.listdir(dir)
    files = [f for f in files if f.endswith('.csv')]
    dfs = [pd.read_csv(os.path.join(dir,f))  for f in files]
    
    merged = pd.DataFrame()
    for i in range(len(dfs)):
        col = f'preds{i}'
        merged[col] = dfs[i].predictions
        merged[col] = merged[col].parallel_apply(lambda x: x.split())
    
    if eval_set=='dev':
        merged['true'] = dfs[0].true
        merged['true'] = merged['true'].parallel_apply(lambda x: clean(x))
        true = reduce(add, merged['true'])
        merged = merged.drop(columns=['true'])
        
    else:
        true = None

    cols = [f.replace('outputs-','').replace('.csv','') for f in files]
    
    merged.columns = cols


    return merged, true, cols

In [None]:
def best_f1(df, true, cols):
    
    preds = [reduce(add, df[col].array) for col in cols]
#     print("Done Reducing")
    
    df = pd.DataFrame(preds)
    df = df.T
    df.columns = cols
#     print(df.head())
    best = 0
    scores = {}
    for col in cols:
        f1 = classification_report(df[col], true, output_dict=True,zero_division=1)['macro avg']['f1-score']
        scores[col] = f1

    return scores

In [None]:
def ensemble(dataframe, true, func, scores, eval_set='test'):
    
    ens = dataframe.parallel_apply(lambda x: combine(x, func ,scores), axis=1)
    
    if eval_set=='dev':
        preds = reduce(add, ens)
        f1 = classification_report(preds, true, output_dict=True,zero_division=1)['macro avg']['f1-score']
    else:
        f1 = None
    
    return ens, f1

In [None]:
def get_dev_scores(lang):
    

    df, true, cols = merge_df(lang, 'dev')
    
    scores = Counter(best_f1(df, true, cols))
    scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    
    
    model, best = scores[0]
    best = best*100
    
    scores = {model:f1 for model,f1 in scores}
    
    return scores, model, best

In [None]:
lang = 'en'
eval_set = 'test'
print(lang)


In [None]:
scores, model, best = get_dev_scores(lang)

df, true, dfs = merge_df(lang, eval_set)

majority, ef1 = ensemble(df, true, ensemble_majority, scores, eval_set)

weighted, ef2 = ensemble(df, true, ensemble_majority_weighted, scores, eval_set)

rank, ef3 = ensemble(df, true, ensemble_majority_rank, scores,  eval_set)


print(f'{lang}: best_f1 = {best*100:.2f} majority: {ef1*100:00.2f} weighted: {ef2*100:00.2f} rank: {ef3*100:00.2f}')


