In [494]:
import sympy as sp
import pandas as pd
import numpy as np

from mlscorecheck.scores import score_specifications

In [495]:
df = pd.DataFrame(score_specifications.values())

In [496]:
df.columns

Index(['name', 'abbreviation', 'lower_bound', 'upper_bound', 'complement',
       'args', 'formula', 'args_standardized', 'formula_standardized',
       'polynomial_equation', 'higher_better', 'description', 'citation',
       'synonyms', 'nans', 'nans_standardized', 'dependency_breaks',
       'args_short', 'formula_short', 'nans_short', 'sqrt',
       'relationship_breaks'],
      dtype='object')

In [497]:
name_width = 10
abbr_width = 4
form_width = 25
short_width = 17
descr_width = 27

total = name_width + abbr_width + form_width + short_width + descr_width
total = total * 1.05
name_width /= total
abbr_width /= total
form_width /= total
short_width /= total
descr_width /= total


In [498]:
df = df[~df['abbreviation'].isin(['err', 'fnr', 'fpr', 'fdr', 'for_', 'f1p', 'f1n'])].reset_index(drop=False)

In [499]:
symbols = {'p': sp.Symbol('p'),
            'n': sp.Symbol('n'),
            'tp': sp.Symbol('tp'),
            'tn': sp.Symbol('tn'),
            'beta_positive': sp.Symbol('beta_+'),
            'beta_negative': sp.Symbol('beta_-'),
            'sqrt': sp.sqrt,
            'f1n': sp.Symbol('f^1_-'),
            'f1p': sp.Symbol('f^1_+'),
            'ppv': sp.Symbol('ppv'),
            'sens': sp.Symbol('sens'),
            'spec': sp.Symbol('spec'),
            'npv': sp.Symbol('npv'),
            'mk': sp.Symbol('mk'),
            'bm': sp.Symbol('bm'),
            'lrp': sp.Symbol('lr_+'),
            'lrn': sp.Symbol('lr_-')}

In [500]:
def formula_to_latex(formula):
    if formula is None or not isinstance(formula, str):
        return ''
    return '$' + sp.latex(eval(formula, symbols))+ '$'

In [501]:
df['short_latex'] = df['formula_short'].apply(formula_to_latex)
df['formula_latex'] = df['formula_standardized'].apply(formula_to_latex)

In [502]:
def rename(name):
    if name == 'fowlkes_mallows_index':
        return 'Fowlkes-Mallows index'
    if name == 'jaccard_index':
        return 'Jaccard index'
    if name == 'cohens_kappa':
        return "Cohen's kappa"
    if name == 'f_beta_positive':
        return '$f^{\\beta}_+$'
    if name == 'f_beta_negative':
        return '$f^{\\beta}_-$'
    if name == 'matthews_correlation_coefficient':
        return 'Matthews correlation coefficient'
    if name == 'delta_p':
        return '$\Delta p$'
    if name == 'err':
        return 'error rate'
    if name == 'fnr':
        return 'false negative rate'
    if name == 'fpr':
        return 'false positive rate'
    if name == 'fdr':
        return 'false discovery rate'
    if name == 'for_':
        return 'false omission rate'
    return name.replace('_', ' ')

def break_name(name):
    return ' \\\\ '.join(name.split(' '))

In [503]:
def name_column(row):
    name = '{' + row['name'] + '}' + ' \cite{'+row["citation"]+'}'
    
    if row['synonyms'] is None or not isinstance(row['synonyms'], list):
        synonyms = []
    else:
        synonyms = row['synonyms']
    all_names = [name] + synonyms
    
    all_names = [name.replace('_', ' ') for name in all_names]
    
    return ', '.join(all_names)

In [504]:
def name_citation(row):
    final = break_name(rename(row['name'])) + ' \cite{'+row["citation"]+'}'
    return '\\parbox{' + str(name_width) + '\\textwidth}{' + final + '}'

In [505]:
def also_known_as(row):
    description = row['description']
    if row['synonyms'] is None or not isinstance(row['synonyms'], list):
        synonyms = []
    else:
        synonyms = row['synonyms']
    
    synonyms = [rename(syn) for syn in synonyms]
    
    if len(synonyms) > 0:
        synonyms = [syn.replace('_', ' ') for syn in synonyms]
        description = description + ' Also known as: %s' % ', '.join(synonyms) + '.'
    if isinstance(row['complement'], str):
        complement = rename(row['complement'])
        description = description + (' Complement: %s' % complement) + '.'
    
    description = description.replace('f_1', '$f_1$')
    description = description.replace('f1_negative', '$f^1_-$')
    description = description.replace('f1_positive', '$f^1_+$')
    
    description = '\\parbox{' + str(descr_width) + '\\textwidth}{' + description + '}'
    
    return description

In [506]:
df['name_extended'] = df.apply(name_column, axis=1)
df['name_citation'] = df.apply(name_citation, axis=1)
df['description_aka'] = df.apply(also_known_as, axis=1)

In [507]:
tmp = df[['name_citation', 'abbreviation', 'formula_latex', 'short_latex', 'description_aka']]
tmp.columns = ['name', 'abbr.', 'standardized form', 'original definition', 'description']

In [508]:
name_width = 'p{' + str(name_width) + '\\textwidth}@{\hspace{2pt}}'
abbr_width = 'p{' + str(abbr_width) + '\\textwidth}@{\hspace{2pt}}'
form_width = 'p{' + str(form_width) + '\\textwidth}@{\hspace{2pt}}'
short_width = 'p{' + str(short_width) + '\\textwidth}@{\hspace{2pt}}'
descr_width = 'p{' + str(descr_width) + '\\textwidth}'
cols = name_width + abbr_width + form_width + short_width + descr_width

In [509]:
latex = tmp.to_latex(index=False).replace('frac', 'dfrac').replace('lllll', cols)
latex = latex.replace('tp tn', 'tp \cdot tn')
latex = latex.replace('tn tp', 'tn \cdot tp')
latex = latex.replace('n tp', 'n \cdot tp')
latex = latex.replace('n tn', 'n \cdot tn')
latex = latex.replace('p tn', 'p \cdot tn')
latex = latex.replace('p tp', 'p \cdot tp')
latex = latex.replace('bm mk', 'bm \cdot mk')
latex = latex.replace('sens spec', 'sens \cdot spec')
latex = latex.replace('ppv sens', 'ppv \cdot sens')
latex = latex.replace('sens spec', 'sens \cdot spec')
latex = latex.replace('f^{1}_{+} f^{1}_{-}', 'f^{1}_{+} \cdot f^{1}_{-}')
latex = latex.replace('\dfrac{\sqrt{tn} \sqrt{tp}}{\sqrt{n} \sqrt{p}}', '\sqrt{\dfrac{tn \cdot tp}{np}}')

In [510]:
print(latex)

\begin{tabular}{p{0.11474469305794606\textwidth}@{\hspace{2pt}}p{0.045897877223178424\textwidth}@{\hspace{2pt}}p{0.28686173264486514\textwidth}@{\hspace{2pt}}p{0.1950659781985083\textwidth}@{\hspace{2pt}}p{0.3098106712564544\textwidth}}
\toprule
name & abbr. & standardized form & original definition & description \\
\midrule
\parbox{0.11474469305794606\textwidth}{accuracy \cite{scores}} & acc & $\dfrac{tn + tp}{n + p}$ &  & \parbox{0.3098106712564544\textwidth}{The proportion of correctly classified items. Complement: error rate.} \\
\parbox{0.11474469305794606\textwidth}{sensitivity \cite{scores}} & sens & $\dfrac{tp}{p}$ &  & \parbox{0.3098106712564544\textwidth}{The proportion of correctly classified positive items. Also known as: recall, true positive rate. Complement: false negative rate.} \\
\parbox{0.11474469305794606\textwidth}{specificity \cite{scores}} & spec & $\dfrac{tn}{n}$ &  & \parbox{0.3098106712564544\textwidth}{The proportion of correctly classified negative items. Al