In [24]:
import json
import Levenshtein
from typing import List, Dict, Tuple
PER = ['Nom-','Prenom-','Age-']
LOC = ['Ville-','Pays-','-rue-','Departement-']
DATE = ['Jour-','Mois-','Annee-','Minute-','Heure-']
JOB = ['Profession-']

MERE_MARIEE = ['mere-mariee']
PERE_MARIEE = ['pere-mariee']
EX_EPOUX = ['ex-epoux','ex-epouse']
PERE_MARI = ['pere-mari']
MERE_MARI = ['mere-mari']
ADMIN = ['mariage','maire']
MARIEE = ['mariee']
MARI = ['mari']
TEMOIN = ['temoin']



def f1_score (labels : dict, golden : dict, levenshtein_treshold : float) -> dict:
    """
    Compute the F1 score for each entity type
    :param labels: dict of labels
    :param golden: dict of golden labels
    :param levenshtein_treshold: treshold of the max ratio between the levenshtein distance and the length of the golden label to consider the label as correct
    :return: dict of F1 scores
    """
    f1_scores = {
    'PER': {
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0,
        'TN' : 0
    },
    'LOC': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0
    },
    'DATE': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0
    },
    'JOB': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0

    },
    'MERE_MARIEE': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'PERE_MARIEE': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'EX_EPOUX': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'PERE_MARI': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'MERE_MARI': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'ADMIN': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS' : 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'MARIEE': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS': 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'MARI': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS': 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'TEMOIN': {
        'TN' : 0,
        'TOTAL': 0,
        'FN': 0,
        'FP': 0,
        'PARTIAL': 0,
        'TP': 0,
        'MISS': 0,
        'details' : {
            'PER': {
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0,
                'TN' : 0
            },
            'LOC': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'DATE': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            },
            'JOB': {
                'TN' : 0,
                'TOTAL': 0,
                'FN': 0,
                'FP': 0,
                'PARTIAL': 0,
                'TP': 0,
                'MISS' : 0
            }
        }
    },
    'TOTAL': 0,
    'FN': 0,
    'FP': 0,
    'PARTIAL': 0,
    'TP': 0,
    'MISS': 0,
    'TN' : 0
}
    
    
    for key in golden.keys():

        if isinstance(golden[key], bool):
            continue
         
        f1_scores['TOTAL'] += 1
        #print(key)

        emplacements = []
        for i in PER :
            if emplacements:
                break
            if i in key :
                emplacements.append('PER')
                break
        for i in LOC :
            if emplacements:
                break
            if i in key :
                emplacements.append('LOC')
                break
        for i in DATE :
            if emplacements:
                break
            if i in key :
                emplacements.append('DATE')
                break
        for i in JOB :
            if emplacements:
                break
            if i in key :
                emplacements.append('JOB')
                break

        for i in MERE_MARIEE :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('MERE_MARIEE')
                break
        for i in PERE_MARIEE :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('PERE_MARIEE')
                break
        for i in EX_EPOUX :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('EX_EPOUX')
                break
        for i in PERE_MARI :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('PERE_MARI')
                break
        for i in MERE_MARI :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('MERE_MARI')
                break
        for i in ADMIN :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('ADMIN')
                break
        for i in MARIEE :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('MARIEE')
                break
        for i in MARI :
            if len(emplacements) == 2 :
                break
            if i in key :
                emplacements.append('MARI')
                break
        
        for i in TEMOIN:
            if len(emplacements) == 2:
                break
            if i in key :
                emplacements.append('TEMOIN')
                break
        


        #print(emplacements)

        f1_scores[emplacements[0]]['TOTAL'] += 1
        f1_scores[emplacements[1]]['TOTAL'] += 1
        f1_scores[emplacements[1]]['details'][emplacements[0]]['TOTAL'] += 1
        if key in labels.keys():

            if not labels[key] and not golden[key]:
                f1_scores[emplacements[0]]['TN'] += 1
                f1_scores[emplacements[1]]['TN'] += 1
                f1_scores[emplacements[1]]['details'][emplacements[0]]['TN'] += 1

                f1_scores['TN'] += 1
            
            elif labels[key] == golden[key]:
                f1_scores[emplacements[0]]['TP'] += 1
                f1_scores[emplacements[1]]['TP'] += 1
                f1_scores[emplacements[1]]['details'][emplacements[0]]['TP'] += 1

                f1_scores['TP'] += 1

            elif labels[key] and not golden[key]:
                f1_scores[emplacements[0]]['FP'] += 1
                f1_scores[emplacements[1]]['FP'] += 1
                f1_scores[emplacements[1]]['details'][emplacements[0]]['FP'] += 1

                f1_scores['FP'] += 1

            elif not labels[key] and golden[key]:
                f1_scores[emplacements[0]]['FN'] += 1
                f1_scores[emplacements[1]]['FN'] += 1
                f1_scores[emplacements[1]]['details'][emplacements[0]]['FN'] += 1
                f1_scores['FN'] += 1
            elif labels[key] != golden[key]:
                if (Levenshtein.distance(labels[key], golden[key]) // len(golden[key])) < levenshtein_treshold:
                    f1_scores[emplacements[0]]['PARTIAL'] += 1
                    f1_scores[emplacements[1]]['PARTIAL'] += 1
                    f1_scores[emplacements[1]]['details'][emplacements[0]]['PARTIAL'] += 1
                    f1_scores['PARTIAL'] += 1
                else:
                    f1_scores[emplacements[0]]['MISS'] += 1
                    f1_scores[emplacements[1]]['MISS'] += 1
                    f1_scores[emplacements[1]]['details'][emplacements[0]]['MISS'] += 1
                    f1_scores['MISS'] += 1
        else:
            f1_scores[emplacements[0]]['FN'] += 1
            f1_scores[emplacements[1]]['FN'] += 1
            f1_scores[emplacements[1]]['details'][emplacements[0]]['FN'] += 1
            f1_scores['FN'] += 1

    return f1_scores


def test():
    golden_set = {}
    labels_set = {}

    with open('donnees-test.json', 'r') as file:
        golden_set = json.load(file)

    with open('donnees-test-labels-gpt35.json', 'r') as file:
        labels_set = json.load(file)

    results = {}
    for archive in golden_set.keys():
        if archive not in labels_set.keys():
            #print('Archive ' + archive + ' not found in labels_set')
            continue

        f1_scores = f1_score(labels_set[archive], golden_set[archive]['questions'], 0.5)
        results[archive] = f1_scores
    #print(results)

    #dump json file
    with open('results_extended.json', 'w') as file:
        json.dump(results, file, indent=4)
    #print('--------------------------------------------')


def precision_recall(labels : dict, weight : float, count_tn : bool) -> dict:
    scores = {}
    for archive in labels.keys():
        scores[archive] = { name :{'F1_strict' : 0, 'F1_weighted' : 0, 'F1_ok' : 0} for name in ['PER', 'LOC', 'DATE', 'JOB', 'MERE_MARIEE', 'PERE_MARIEE', 'EX_EPOUX', 'PERE_MARI', 'MERE_MARI', 'ADMIN', 'MARIEE', 'MARI', 'TEMOIN']}

        if not count_tn:
            for tag in scores[archive].keys():

                
                #strict mode
                precision_strict = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                recall_strict = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else (labels[archive][tag]['TP'] + labels[archive][tag]['TN']  / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                f1_strict = 0 if (precision_strict + recall_strict) == 0 else 2 * (precision_strict * recall_strict) / (precision_strict + recall_strict)

                #weighted mode
                precision_weighted = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + weight * labels[archive][tag]['PARTIAL'] )/ (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ))
                recall_weighted = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + weight * labels[archive][tag]['PARTIAL'])  / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ))
                f1_weighted = 0 if (precision_weighted + recall_weighted) == 0 else 2 * (precision_weighted * recall_weighted) / (precision_weighted + recall_weighted)

                #ok mode
                precision_ok = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['PARTIAL']) / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                recall_ok = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['PARTIAL'])  / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                f1_ok = 0 if (precision_ok + recall_ok) == 0 else 2 * (precision_ok * recall_ok) / (precision_ok + recall_ok)

                scores[archive][tag]['F1_strict'] = f1_strict
                scores[archive][tag]['F1_weighted'] = f1_weighted
                scores[archive][tag]['F1_ok'] = f1_ok

            precision_strict = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else (labels[archive]['TP'] + labels[archive]['TN'] / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ))
            recall_strict = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else (labels[archive]['TP'] + labels[archive]['TN']  / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ))
            f1_strict = 0 if (precision_strict + recall_strict) == 0 else 2 * (precision_strict * recall_strict) / (precision_strict + recall_strict)

            #weighted mode
            precision_weighted = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + weight * labels[archive]['PARTIAL']) / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ))
            recall_weighted = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + weight * labels[archive]['PARTIAL'])  / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ))
            f1_weighted = 0 if (precision_weighted + recall_weighted) == 0 else 2 * (precision_weighted * recall_weighted) / (precision_weighted + recall_weighted)

            #ok mode
            precision_ok = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['PARTIAL']) / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL']))
            recall_ok = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['PARTIAL'])  / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ))
            f1_ok = 0 if (precision_ok + recall_ok) == 0 else 2 * (precision_ok * recall_ok) / (precision_ok + recall_ok)

            scores[archive]['F1_strict'] = f1_strict
            scores[archive]['F1_weighted'] = f1_weighted
            scores[archive]['F1_ok'] = f1_ok
            #print(scores[archive]['F1_strict'], scores[archive]['F1_weighted'], scores[archive]['F1_ok'])

        else:
            for tag in scores[archive].keys():

                print(tag)
                #strict mode
                precision_strict = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN']) / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                recall_strict = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'])  / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                f1_strict = 0 if (precision_strict + recall_strict) == 0 else 2 * (precision_strict * recall_strict) / (precision_strict + recall_strict)

                #weighted mode
                precision_weighted = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + weight * labels[archive][tag]['PARTIAL'] )/ (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ))
                recall_weighted = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + weight * labels[archive][tag]['PARTIAL'])  / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + weight * labels[archive][tag]['PARTIAL'] ))
                f1_weighted = 0 if (precision_weighted + recall_weighted) == 0 else 2 * (precision_weighted * recall_weighted) / (precision_weighted + recall_weighted)

                #ok mode
                precision_ok = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['PARTIAL']) / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FP'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                recall_ok = 0 if (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ) == 0 else ((labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['PARTIAL'])  / (labels[archive][tag]['TP'] + labels[archive][tag]['TN'] + labels[archive][tag]['FN'] + labels[archive][tag]['MISS'] + labels[archive][tag]['PARTIAL'] ))
                f1_ok = 0 if (precision_ok + recall_ok) == 0 else 2 * (precision_ok * recall_ok) / (precision_ok + recall_ok)

                scores[archive][tag]['F1_strict'] = f1_strict
                scores[archive][tag]['F1_weighted'] = f1_weighted
                scores[archive][tag]['F1_ok'] = f1_ok

            precision_strict = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN']) / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ))
            recall_strict = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'])  / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ))
            f1_strict = 0 if (precision_strict + recall_strict) == 0 else 2 * (precision_strict * recall_strict) / (precision_strict + recall_strict)

            #weighted mode
            precision_weighted = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + weight * labels[archive]['PARTIAL']) / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ))
            recall_weighted = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + weight * labels[archive]['PARTIAL'])  / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + weight * labels[archive]['PARTIAL'] ))
            f1_weighted = 0 if (precision_weighted + recall_weighted) == 0 else 2 * (precision_weighted * recall_weighted) / (precision_weighted + recall_weighted)

            #ok mode
            precision_ok = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['PARTIAL']) / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FP'] + labels[archive]['MISS'] + labels[archive]['PARTIAL']))
            recall_ok = 0 if (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ) == 0 else ((labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['PARTIAL'])  / (labels[archive]['TP'] + labels[archive]['TN'] + labels[archive]['FN'] + labels[archive]['MISS'] + labels[archive]['PARTIAL'] ))
            f1_ok = 0 if (precision_ok + recall_ok) == 0 else 2 * (precision_ok * recall_ok) / (precision_ok + recall_ok)

            scores[archive]['F1_strict'] = f1_strict
            scores[archive]['F1_weighted'] = f1_weighted
            scores[archive]['F1_ok'] = f1_ok
            #print(scores[archive]['F1_strict'], scores[archive]['F1_weighted'], scores[archive]['F1_ok'])

    return scores


import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

def make_df(scores : dict):
    #create a DataFrame
    #Each line is an archive
    #Index is the archive name


    #Now append multi_index columns : 
    # 1st layer is : ['PER', 'LOC', 'DATE', 'JOB', 'MERE_MARIEE', 'PERE_MARIEE', 'EX_EPOUX', 'PERE_MARI', 'MERE_MARI', 'ADMIN', 'MARIEE', 'MARI', 'TEMOIN']
    # 2nd layer is : ['F1_strict', 'F1_weighted', 'F1_ok']
    #So we have 39 columns
    #We need to create a MultiIndex
    #Then we can append it to the DataFrame

    #Create the MultiIndex
    #First layer
    first_layer = []
    for tag in scores['archives_AD075EC_11M549_0094-right.png-0'].keys():
        if tag == 'F1_strict' or tag == 'F1_weighted' or tag == 'F1_ok':
            continue
        first_layer.append(tag)

    #Second layer
    second_layer = ['F1_strict', 'F1_weighted', 'F1_ok']

    #Create the MultiIndex
    multi_index = pd.MultiIndex.from_product([first_layer, second_layer], names=['tag', 'F1'])
    #print(multi_index)

    #Create the DataFrame
    df2 = pd.DataFrame(columns=multi_index)

    #Fill the DataFrame
    for archive in scores.keys():
        for tag in scores[archive].keys():
            if tag == 'F1_strict' or tag == 'F1_weighted' or tag == 'F1_ok':
                continue
            for f1 in scores[archive][tag].keys():
                df2.loc[archive, (tag, f1)] = scores[archive][tag][f1]
        for f1 in ['F1_strict', 'F1_weighted', 'F1_ok']:
            df2.loc[archive, ('TOTAL', f1)] = scores[archive][f1]

    return df2
 


In [26]:
def test2():
    golden_set = {}
    labels_set = {}

    with open('donnees-test.json', 'r') as file:
        golden_set = json.load(file)

    with open('donnees-test-labels-gpt35.json', 'r') as file:
        labels_set = json.load(file)

    results = {}
    for archive in golden_set.keys():
        if archive not in labels_set.keys():
            #print('Archive ' + archive + ' not found in labels_set')
            continue

        f1_scores = f1_score(labels_set[archive], golden_set[archive]['questions'], 0.5)
        results[archive] = f1_scores
        
    scores = precision_recall(results, 0.5, True)
    #save scores in a json file
    with open('scores_extended.json', 'w') as file:
        json.dump(scores, file, indent=4)
    df = make_df(scores)
    #plot(df)
    return df

df = test2()
df

PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX
PERE_MARI
MERE_MARI
ADMIN
MARIEE
MARI
TEMOIN
PER
LOC
DATE
JOB
MERE_MARIEE
PERE_MARIEE
EX_EPOUX


tag,PER,PER,PER,LOC,LOC,LOC,DATE,DATE,DATE,JOB,...,MARIEE,MARI,MARI,MARI,TEMOIN,TEMOIN,TEMOIN,TOTAL,TOTAL,TOTAL
F1,F1_strict,F1_weighted,F1_ok,F1_strict,F1_weighted,F1_ok,F1_strict,F1_weighted,F1_ok,F1_strict,...,F1_ok,F1_strict,F1_weighted,F1_ok,F1_strict,F1_weighted,F1_ok,F1_strict,F1_weighted,F1_ok
archives_AD075EC_01M1930_0023-left.png-0,0.935484,1.0,1.0,0.925,0.925,0.925,1.0,1.0,1.0,0.875,...,1.0,0.9375,1.0,1.0,0.8,0.862069,0.866667,0.933333,0.966102,0.966667
archives_AD075EC_01M1930_0023-left.png-1,0.967742,1.0,1.0,0.97561,1.0,1.0,0.909091,1.0,1.0,0.75,...,1.0,0.875,1.0,1.0,1.0,1.0,1.0,0.945055,1.0,1.0
archives_AD075EC_01M1930_0023-left.png-2,0.862069,0.928571,0.931034,0.675676,0.722222,0.72973,1.0,1.0,1.0,0.875,...,0.827586,0.689655,0.884615,0.896552,0.714286,0.777778,0.785714,0.8,0.854545,0.858824
archives_AD075EC_01M1930_0023-right.png-0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.933333,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.994475,0.994475,0.994475
archives_AD075EC_01M1930_0023-right.png-1,0.967742,1.0,1.0,0.97561,1.0,1.0,1.0,1.0,1.0,0.875,...,1.0,0.875,1.0,1.0,1.0,1.0,1.0,0.967033,1.0,1.0
archives_AD075EC_01M1930_0023-right.png-2,0.967742,1.0,1.0,0.962963,0.9875,0.987654,0.909091,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.961326,0.994382,0.994475
archives_AD075EC_01M1940_0020-left.png-0,0.967742,1.0,1.0,0.962963,0.9875,0.987654,1.0,1.0,1.0,1.0,...,0.967742,0.9375,1.0,1.0,1.0,1.0,1.0,0.972376,0.994413,0.994475
archives_AD075EC_01M1940_0020-left.png-3,0.923077,0.923077,0.923077,0.885714,0.885714,0.885714,1.0,1.0,1.0,0.833333,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.910256,0.910256,0.910256
archives_AD075EC_01M1940_0020-right.png-0,0.872727,0.872727,0.872727,0.906667,0.906667,0.906667,1.0,1.0,1.0,0.857143,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.903614,0.903614,0.903614
archives_AD075EC_01M1940_0020-right.png-1,0.915254,0.948276,0.949153,0.938272,0.9625,0.962963,0.909091,1.0,1.0,0.875,...,1.0,0.875,1.0,1.0,0.9375,1.0,1.0,0.921348,0.965517,0.966292


In [46]:
import numpy as np
def plot(scores : pd.DataFrame):
    
    #for each archive, keep only F1_ok in multi_index
    scores_ok = scores.xs('F1_ok', level='F1', axis=1)
    scores_sctrict = scores.xs('F1_strict', level='F1', axis=1)

    #rename index : remove archives_AD075EC and .png
    scores_ok.index = scores_ok.index.str.replace('archives_AD075EC_', '')
    scores_ok.index = scores_ok.index.str.replace('.png', '')
    scores_sctrict.index = scores_sctrict.index.str.replace('archives_AD075EC_', '')
    scores_sctrict.index = scores_sctrict.index.str.replace('.png', '')


    #plot : x_axis = archives, y_axis = F1 between 0 and 1. For each archive, plot bars for F1_ok (blue) and F1_strict (red) in the same graph
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=scores_ok.index,
        y=scores_ok['TOTAL'],
        name='F1_ok',
        marker_color='blue',
        opacity=0.5
    ))
    fig.add_trace(go.Bar(
        x=scores_sctrict.index,
        y=scores_sctrict['TOTAL'],
        name='F1_strict',
        marker_color='red',
        opacity=0.5
    ))

    #add a line for the macro average of F1_ok and F1_strict
    fig.add_trace(go.Scatter(
        x=scores_ok.index,
        y=np.full(len(scores_ok['TOTAL']), scores_ok['TOTAL'].mean()),
        name='F1_ok macro average',
        marker_color='blue',
        mode='lines',
        line=dict(
            dash='dash',
            width=4
        )
    ))
    fig.add_trace(go.Scatter(
        x=scores_sctrict.index,
        y=np.full(len(scores_sctrict['TOTAL']), scores_sctrict['TOTAL'].mean()),
        name='F1_strict macro average',
        marker_color='red',
        mode='lines',
        line=dict(
            dash='dash',
            width=4
        )
    ))



    fig.update_layout(
        title='F1 scores',
        xaxis_tickfont_size=14,
        yaxis=dict(
            title='F1',
            titlefont_size=16,
            tickfont_size=14,
        ),
        legend=dict(
            x=0.0,
            y=0.0,
            bgcolor='rgba(255, 255, 255, 0)',
            bordercolor='rgba(255, 255, 255, 0)'
        ),
        barmode='group',
        bargap=0.15, # gap between bars of adjacent location coordinates.
        bargroupgap=0.1 # gap between bars of the same location coordinate.
    )

    fig.show()

plot(df)

In [65]:
import numpy as np
def plot(scores : pd.DataFrame):
    
    score_mean_ok = scores.xs('F1_ok', level='F1', axis=1).mean()
    score_mean_weighted = scores.xs('F1_weighted', level='F1', axis=1).mean()
    score_mean_strict = scores.xs('F1_strict', level='F1', axis=1).mean()

    concat = pd.concat([score_mean_ok, score_mean_weighted, score_mean_strict], axis=1)
    concat.columns = ['F1_ok', 'F1_weighted', 'F1_strict']

    scores = concat.copy()

    #Plot : x_axis = tags, y_axis = F1 between 0 and 1. For each tag, plot bars for F1_ok (blue), F1_weighted (green) and F1_strict (red) in the same graph
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=scores.index,
        y=scores['F1_ok'],
        name='F1_ok',
        marker_color='blue',
        opacity=0.5
    ))
    fig.add_trace(go.Bar(
        x=scores.index,
        y=scores['F1_strict'],
        name='F1_strict',
        marker_color='red',
        opacity=0.5
    ))

    #add a line for the macro average of F1_ok and F1_strict
    fig.add_trace(go.Scatter(
        x=scores.index,
        y=np.full(len(scores['F1_ok']), scores['F1_ok'].mean()),
        name='F1_ok macro average',
        marker_color='blue',
        mode='lines',
        line=dict(
            dash='dash',
            width=4
        )
    ))
    fig.add_trace(go.Scatter(
        x=scores.index,
        y=np.full(len(scores['F1_strict']), scores['F1_strict'].mean()),
        name='F1_strict macro average',
        marker_color='red',
        mode='lines',
        line=dict(
            dash='dash',
            width=4
        )
    ))

    fig.update_layout(
        title='F1 scores',
        xaxis_tickfont_size=14,
        yaxis=dict(
            title='F1',
            titlefont_size=16,
            tickfont_size=14,
        ),
        legend=dict(
            x=0.0,
            y=0.0,
            bgcolor='rgba(255, 255, 255, 0)',
            bordercolor='rgba(255, 255, 255, 0)'
        ),
        barmode='group',
        bargap=0.15, # gap between bars of adjacent location coordinates.
        bargroupgap=0.1 # gap between bars of the same location coordinate.
    )

    fig.show()


plot(df)


In [89]:
def make_average_for_each_people():
    donnees = {}
    new_results = {}
    with open("results_extended.json", "r") as f:
        donnees = json.load(f)

    for archive in donnees:
        arch_dict = {}
        for people in donnees[archive]:
            if people in ['MERE_MARIEE', 'PERE_MARIEE', 'EX_EPOUX', 'PERE_MARI', 'MERE_MARI', 'ADMIN', 'MARIEE', 'MARI', 'TEMOIN']:
                details = donnees[archive][people]['details']
                arch_dict[people] = details
        
        if new_results == {}:
            new_results = arch_dict
        else:

            def merge_dicts(a, b, op=sum):
                """Merge dictionaries and apply an operator to duplicate keys' values."""
                result = {}
                for key in a.keys() | b.keys():
                    if key in a and key in b:
                        if isinstance(a[key], dict) and isinstance(b[key], dict):
                            result[key] = merge_dicts(a[key], b[key], op)
                        else:
                            result[key] = op([a[key], b[key]])
                    elif key in a:
                        result[key] = a[key]
                    elif key in b:
                        result[key] = b[key]
                return result
            
            new_results = merge_dicts(new_results, arch_dict, op=sum)

    #save the new results
    with open("results_extended_details.json", "w") as f:
        json.dump(new_results, f, indent=4)

    detailes_f1 = {}
    for people in new_results:
        detailes_f1[people] = {}
        for tag in new_results[people]:
            detailes_f1[people][tag] = {}
            #calculate F1_ok :
            TP = new_results[people][tag]['TP'] + new_results[people][tag]['TN'] + new_results[people][tag]['PARTIAL']
            FP = new_results[people][tag]['FP'] + new_results[people][tag]['MISS']
            FN = new_results[people][tag]['FN'] + new_results[people][tag]['MISS']
            precision = 0 if (TP + FP) == 0 else (TP / (TP + FP))
            recall = 0 if (TP + FN) == 0 else (TP / (TP + FN))
            F1_ok = 0 if (precision + recall) == 0 else (2 * (precision * recall) / (precision + recall))

            #calculate F1_strict :
            TP = new_results[people][tag]['TP'] + new_results[people][tag]['TN']
            FP = new_results[people][tag]['FP'] + new_results[people][tag]['MISS']  + new_results[people][tag]['PARTIAL']
            FN = new_results[people][tag]['FN'] + new_results[people][tag]['MISS']  + new_results[people][tag]['PARTIAL']
            precision = 0 if (TP + FP) == 0 else (TP / (TP + FP))
            recall = 0 if (TP + FN) == 0 else TP / (TP + FN)
            F1_strict = 0 if (precision + recall) == 0 else (2 * (precision * recall) / (precision + recall))

            detailes_f1[people][tag]['F1_ok'] = F1_ok
            detailes_f1[people][tag]['F1_strict'] = F1_strict

    #save the new results
    with open("scores_extended_details.json", "w") as f:
        json.dump(detailes_f1, f, indent=4)

    #create a dataframe with the new results.
    #Use a multiindex : 1st layer = tag [per,loc,date,job], 2nd layer = f1 type (F1_ok or F1_strict)
    #index = people
   
    #create the multiindex
    index = []
    for people in detailes_f1:
        for tag in detailes_f1[people]:
            index.append((people, tag))
    index = pd.MultiIndex.from_tuples(index, names=['people', 'tag'])

    #create the dataframe
    scores = pd.DataFrame(columns=['F1_ok', 'F1_strict'], index=index)
    for people in detailes_f1:
        for tag in detailes_f1[people]:
            scores.loc[(people, tag), 'F1_ok'] = detailes_f1[people][tag]['F1_ok']
            scores.loc[(people, tag), 'F1_strict'] = detailes_f1[people][tag]['F1_strict']

    #put tag in columns
    scores = scores.unstack(level=1)

    #PLot bar chart
    #Y-axis : F1 score from 0 to 1, X-axis : people splitted in DATE, LOC, PER, JOB for each people. Use only F1_ok
    #use a color gradient for each people
    
    scores = scores.xs('F1_ok', axis=1, level=0)

    #scores = scores.transpose()

    #use plotly graph objects
    fig = go.Figure()

    people_colors = {
        'JOB': 'rgb(255, 0, 0)',
        'LOC': 'rgb(0, 255, 0)',
        'PER': 'rgb(0, 0, 255)',
        'DATE': 'rgb(255, 255, 0)'
    }



    #add traces
    for people in scores:
        fig.add_trace(go.Bar(
            x=scores.index,
            y=scores[people],
            name=people,
            marker_color=people_colors[people]
        ))


    #update layout
    fig.update_layout(
        title='F1 score for each people',
        xaxis_tickfont_size=14,
        yaxis=dict(
            title='F1 score',
            titlefont_size=16,
            tickfont_size=14,
        ),
        legend=dict(
            x=0,
            y=1.0,
            bgcolor='rgba(255, 255, 255, 0)',
            bordercolor='rgba(255, 255, 255, 0)'
        ),
        barmode='group',
        bargap=0.15,
        bargroupgap=0.1
    )

    fig.show()







    

    
    return scores






make_average_for_each_people()

tag,DATE,JOB,LOC,PER
people,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ADMIN,1.0,0.0,0.83871,0.909091
EX_EPOUX,0.0,0.0,0.0,0.971429
MARI,1.0,0.972222,0.953069,0.968421
MARIEE,0.990741,1.0,0.967857,0.989547
MERE_MARI,0.0,0.971429,0.96,0.971698
MERE_MARIEE,0.0,0.972222,0.938776,0.985915
PERE_MARI,0.0,1.0,0.971429,0.995349
PERE_MARIEE,0.0,0.971429,0.913858,0.971429
TEMOIN,0.0,0.846154,0.834532,0.892779
