In [1]:
from pathlib import Path
from itertools import product
from collections import Counter, defaultdict
from scipy.spatial.distance import squareform
from sklearn.metrics import precision_recall_curve, roc_auc_score, roc_curve

import re
import os
import sys
import graco
import numpy as np
import pandas as pd
import networkx as nx
import sklearn.metrics

In [2]:
def get_AUCs(df):

    y_scores = 1-squareform(df)
    y_true   = pd.DataFrame(np.zeros(df.shape, dtype=int), 
                            columns = df.columns, 
                            index   = df.index)

    for mod in df.index.get_level_values(0).unique():
        y_true.loc[mod,mod] = 1

    y_true = squareform(y_true, checks=False)
    prec, reca, thre = precision_recall_curve(y_true=y_true, probas_pred=y_scores)
    AUC = sklearn.metrics.auc(reca, prec)

    fpr, tpr, thre = roc_curve(y_true=y_true, y_score=y_scores)
    ROC_AUC = roc_auc_score(y_true=y_true, y_score=y_scores)
    
    return AUC, ROC_AUC

In [3]:
# =============================================================================
#  ----------------------------------- INIT -----------------------------------
# =============================================================================

NETWORK_NAMES = ['synt', 'real']
FEATURES = ['GCM', 
            'avg_D' , 'avg_A', 'avg_G' , 'avg_O',
            'avg_DA', 'avg_DG' , 'avg_DO',
            'avg_O3', 'avg_DAGO', 'avg_3',
            'avg_clu0', 'avg_clus', 
            'avg_com0', 'avg_comm',
            'avg_Del0', 'avg_Delt',
            'emd_clu0', 'emd_clus', 
            'emd_com0', 'emd_comm',
            'emd_Del0', 'emd_Delt',]
METRICS  = ['city', 'eucl', 'cheb', 'canb', 
            'corr', 'hell', 'jsdi'
            #'braycurtis', 'sqeuclidean', 'cosine'
           ]
AUCS = ['PR','ROC']

In [4]:
def id_metric_cells(html, metric, id_name):
    """ 
    Adds id=id_name to all td tags containing ‘metric' and the one immediately after.
    """
    old_strings = re.findall(rf'<td>{metric}</td>\s*<td>\s*.*</td>', html)
    for old in old_strings:
        new = old.replace('<td>',f'<td id={id_name}>')
        html = html.replace(old,new)
    return html

In [5]:
def prepare_dataframe(df):
    return pd.DataFrame(squareform(squareform(df, checks=False)), 
                        index = df.index,
                        columns = df.columns)

In [6]:
path = Path("/home/clusterduck123/Desktop/transfere/network_distances/")
final_html = ''

for feature in FEATURES:
    GDV_dfs = {}
    for auc in AUCS:
        GDV_dfs[auc] = pd.DataFrame()

    for network_name in NETWORK_NAMES:
        for file in path.glob(f'{network_name}_{feature}_*'):
            metric = file.stem.split('_')[-1]
            df = pd.read_csv(file, header = [0,1], index_col=[0,1])
            df = prepare_dataframe(df)
            try:
                auc_pr, auc_roc =  get_AUCs(df)
            except ValueError:
                auc_pr, auc_roc = -1, -1
            GDV_dfs['PR' ].loc[metric,network_name] = auc_pr
            GDV_dfs['ROC'].loc[metric,network_name] = auc_roc
    
    
    network_df_list = []
    for network in NETWORK_NAMES:
        network_df = pd.concat([GDV_dfs[auc].nlargest(15, network)[[network]].reset_index() for auc in AUCS], axis=1)
        network_df.columns = pd.MultiIndex.from_product([(network,), AUCS, ('metric','AUC')])
        network_df.columns.names = ['network', 'curve', ' ']
        network_df_list.append(network_df)
    
    feature_df = pd.concat(network_df_list, axis=1)
    feature_df.index = pd.MultiIndex.from_product([(feature,), range(1,len(network_df)+1)])
    
    if feature == 'GCM':
        top_metrics = ['GCD11', 'canberra', 'GDV_similarity']
    elif feature.startswith('emd'):
        top_metrics = ['eucl', 'canberra', 'GDV_similarity']
    else:
        top_metrics = ['jsdi', 'hell', 'canb']
        
    html = feature_df.to_html()
    
    for nr, top_metric in enumerate(top_metrics,1):
        html = id_metric_cells(html, top_metric, f'metric{nr}')
        
    final_html += html + '\n\n<hr>\n\n'

In [7]:
header = """
<!DOCTYPE html>
<html>
<style>
#metric1 {background-color:#FFDDC0}
#metric2 {background-color:#B8F4B8}
#metric3 {background-color:#B0E9E9}
</style>

"""

footer = '</html>'

In [8]:
output_path = '/home/clusterduck123/Desktop/test.html'

with open(output_path, 'w') as f:
    f.write(header + final_html + footer)