## Scan for Yaml files:

In [83]:
from pathlib import Path

benefits_folder = Path('./aides')

def extract_benefits_paths(benefits_folder):
    isYAMLfile = lambda path : str(path).endswith('.yml') or str(path).endswith('.yaml')
    liste_fichiers = [str(benefit) for benefit in benefits_folder.iterdir() if isYAMLfile(benefit)]
    return liste_fichiers

benefits_files_paths = extract_benefits_paths(benefits_folder)
benefits_files_paths

['aides/haut-de-france-aide-transports-salaries.yml',
 'aides/region-auvergne-rhone-alpes-billet-illico-autocars.yml',
 'aides/hauts-de-france-mon-abo-etudiant-ter.yml',
 'aides/region_bretagne-b-mouve-–-mobilité-des-formations-sanitaires-et-sociales.yml',
 'aides/region_nouvelle_aquitaine-stages-à-létranger-–-infra-bac.yml',
 'aides/caf-territoire-de-belfort-aide-bafa-approfondissement.yml',
 'aides/region-nouvelle-aquitaine-sas-jeunes-orientation-active-vers-emploi.yml',
 'aides/region-auvergne-rhone-alpes-abonnement-illico.yml',
 'aides/caf-haute-loire-aide-bafa-approfondissement.yml',
 'aides/region_la_reunion-bourse-régionale-sanitaire-et-sociale.yml',
 'aides/caf-sarthe-aide-bafa-generale.yml',
 'aides/caf-doubs-aide-bafa-generale.yml',
 'aides/hauts-de-france-ma-carte-ter.yml',
 'aides/hauts-de-france-indemnites-protec-soc-formation-pro.yml',
 'aides/caf-haute-saone-aide-bafa.yml',
 'aides/caf-haute-loire-aide-bafa-generale.yml',
 'aides/caf-pyrenees-orientales-aide-bafa-approfo

## Open and parse files: make a list of Dict

In [84]:
import yaml

def extract_benefit_file_content(aide_path):
    return yaml.safe_load(open(aide_path))

def extract_benefits_from_yamls(path):
    benefits_files_paths = extract_benefits_paths(path)
    benefits = [extract_benefit_file_content(path) for path in benefits_files_paths]
    return benefits

In [85]:
benefits = extract_benefits_from_yamls(benefits_folder)

##  Make a list of [profils] [type] in all aides given

In [86]:
def get_profils_names_from_aides_list(path : str):
    
    get_aide_profiles_names = lambda aide: [profile['type'] for profile in aide['profils']]
    
    return [profile for benefit in benefits for profile in get_aide_profiles_names(benefit)]

profiles_names = get_profils_names_from_aides_list(benefits)
profiles_names

['salarie',
 'apprenti',
 'apprenti',
 'apprenti',
 'enseignement_superieur',
 'enseignement_superieur',
 'stagiaire',
 'stagiaire',
 'enseignement_superieur',
 'lyceen',
 'lyceen',
 'apprenti',
 'chomeur',
 'enseignement_superieur',
 'stagiaire',
 'chomeur',
 'enseignement_superieur',
 'enseignement_superieur',
 'etudiant',
 'enseignement_superieur',
 'stagiaire',
 'apprenti',
 'professionnalisation',
 'chomeur',
 'service_civique',
 'apprenti',
 'lyceen',
 'apprenti',
 'lyceen',
 'enseignement_superieur',
 'chomeur',
 'enseignement_superieur',
 'chomeur',
 'lyceen',
 'enseignement_superieur',
 'etudiant',
 'lyceen',
 'apprenti',
 'stagiaire',
 'professionnalisation',
 'etudiant',
 'apprenti',
 'enseignement_superieur',
 'lyceen',
 'salarie',
 'independant',
 'service_civique',
 'apprenti',
 'stagiaire',
 'professionnalisation',
 'chomeur',
 'apprenti',
 'chomeur',
 'enseignement_superieur',
 'enseignement_superieur',
 'apprenti',
 'chomeur',
 'etudiant',
 'service_civique',
 'chomeur

# Make a list with all conditons:
	- conditions generales
	- conditions in [profils][type]

In [91]:
from functools import reduce


def extract_conditions(benefit):
    
    conditions = [condition['type'] for condition in benefit['conditions_generales']]
    profil_conditions = [condition['type'] for profil in benefit['profils'] if 'conditions' in profil for condition in profil['conditions'] ]
    
    return conditions + profil_conditions

# def accumulate_conditions(conditions: list, benefit: dict):
#     return conditions + extract_conditions(benefit)

def get_all_conditions_from_benefits(path: str):    
    return [condition for benefit in benefits for condition in extract_conditions(benefit)]

# conditions_names = get_all_conditions_from_benefits(benefits)

conditions_names = reduce(accumulate_conditions, benefits, [])
conditions_names

['regions',
 'regions',
 'regions',
 'age',
 'boursier',
 'age',
 'boursier',
 'age',
 'boursier',
 'regions',
 'formation_sanitaire_social',
 'regions',
 'age',
 'age',
 'departements',
 'departements',
 'age',
 'age',
 'age',
 'regions',
 'age',
 'departements',
 'regions',
 'formation_sanitaire_social',
 'age',
 'departements',
 'quotient_familial',
 'age',
 'departements',
 'regions',
 'age',
 'regions',
 'age',
 'age',
 'departements',
 'age',
 'departements',
 'regime_securite_sociale',
 'age',
 'age',
 'regime_securite_sociale',
 'departements',
 'age',
 'age',
 'departements',
 'quotient_familial',
 'regions',
 'age',
 'age',
 'age',
 'departements',
 'quotient_familial',
 'age',
 'age',
 'departements',
 'quotient_familial',
 'age',
 'departements',
 'quotient_familial',
 'regions',
 'formation_sanitaire_social',
 'age',
 'departements',
 'regions',
 'age',
 'age',
 'regions',
 'age',
 'departements',
 'age',
 'departements',
 'quotient_familial',
 'regime_securite_sociale',
 

# Count frequence of profils and conditions appearances

In [92]:
def count_list_item_frequence(list : list):
    elements_counter = {}
    for element in list:
        if element in elements_counter:
            elements_counter[element] +=1
        else:
            elements_counter[element] = 1
    return (sorted(elements_counter.items(), key=lambda x:x[1]))

profil_names_frequence = count_list_item_frequence(profiles_names)
condition_frequence = count_list_item_frequence(conditions_names)

In [93]:
condition_frequence

[('beneficiaire_rsa', 1),
 ('mention_baccalaureat', 5),
 ('boursier', 12),
 ('regime_securite_sociale', 12),
 ('annee_etude', 12),
 ('formation_sanitaire_social', 34),
 ('quotient_familial', 62),
 ('departements', 127),
 ('regions', 184),
 ('age', 272)]

In [94]:
profil_names_frequence

[('beneficiaire_rsa', 1),
 ('inactif', 1),
 ('independant', 7),
 ('salarie', 11),
 ('professionnalisation', 11),
 ('service_civique', 12),
 ('stagiaire', 21),
 ('etudiant', 23),
 ('lyceen', 37),
 ('apprenti', 41),
 ('chomeur', 51),
 ('enseignement_superieur', 54)]