## **3. Pondération statistique** (TF-IDF / OKapiBM25)  

https://stackoverflow.com/questions/46580932/calculate-tf-idf-using-sklearn-for-n-grams-in-python  
http://scikit-learn.sourceforge.net/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html#sklearn-feature-extraction-text-tfidfvectorizer  
https://scikit-learn.org/stable/modules/feature_extraction.html#text-feature-extraction

https://pypi.org/project/rank-bm25/

In [37]:
path = '../04-filtrage/output/'
acteur = 'msss'
tag = 'deficiences-et-handicaps'

if tag:
    csv_file = acteur + '_' + tag + '_significant-collocations.csv'

else:
    csv_file = acteur + '_significant-collocations.csv'

### **Lire le vocabulaire** (termes retenus au prétraitement)

In [38]:
from pandas import *

with open(path+csv_file, encoding='utf-8') as f:
    csv = read_csv(f).drop(columns = ['Unnamed: 0'])
    # On va traiter les apostrophes et les parenthèses avec le underscore ("_") ; le tokenizer de sklearn ne les aime pas 
    csv['Terme formatté'] = csv["Terme"].apply(lambda x: x.replace("'", "_").replace("(", "_").replace(")", "_").replace("-", "_"))

csv

Unnamed: 0,Terme,Structure syntaxique,Fréquence (TF),LLR,p-value,Fréquence documentaire (DF),isMeSHTerm,isTaxoTerm,Terme formatté
0,services,NOM,1071,-,-,94,False,False,services
1,domicile,NOM,440,-,-,41,False,False,domicile
2,santé,NOM,405,-,-,72,False,False,santé
3,déficience,NOM,393,-,-,81,False,True,déficience
4,pmatcom,NOM,350,-,-,32,False,False,pmatcom
...,...,...,...,...,...,...,...,...,...
605,composant,NOM,16,-,-,27,False,False,composant
606,déploiement,NOM,16,-,-,14,False,False,déploiement
607,gestionnaire secondaire daniel garneau,NOM ADJ ADJ NOM,16,272.9682413133986,2.5586261803234317e-61,16,False,False,gestionnaire secondaire daniel garneau
608,types d'intervention,NOM PRP NOM,16,187.82116953292174,9.50466374208189e-43,6,False,False,types d_intervention


In [39]:
vocabulaire = [t.lower() for t in list(csv['Terme formatté'])]

In [40]:
print('On a un vocabulaire de {} formes.'.format(len(vocabulaire)))

On a un vocabulaire de 610 formes.


In [41]:
vocabulaire

['services',
 'domicile',
 'santé',
 'déficience',
 'pmatcom',
 'tsa',
 'soutien',
 'accès',
 'intervention',
 'services sociaux',
 'sce',
 'trouble',
 'autisme',
 'projet',
 'spectre de l_autisme',
 'spectre',
 'trouble du spectre de l_autisme',
 'trouble du spectre',
 'usager',
 'nombre',
 'besoins',
 'aide',
 'indicateur',
 'données',
 'programme',
 'usagers',
 'communication',
 'ramq',
 'famille',
 'année',
 'type',
 'profil',
 'intellectuelle',
 'système',
 'contrôle',
 'déficience physique',
 'soutien à domicile',
 'msss',
 'évaluation',
 'clsc',
 'aides',
 'équipements',
 'québec',
 'cours',
 'cadre',
 'déficience intellectuelle',
 'adultes',
 'commande',
 'durée',
 'santé physique',
 'forum',
 'pilote',
 'projet pilote',
 'appareil',
 'dispositifs',
 'environnement',
 'service',
 'bluetooth',
 'appareils',
 'gestion',
 'indicateurs',
 'enfants',
 'utilisation',
 'programmes',
 'ministère',
 'alimentation',
 'maladies',
 'établissement',
 'maladies chroniques',
 'calcul',
 'attr

### **Lire le corpus**

In [42]:
import os, shutil, re
from pathlib import Path
from os import path
from pandas import *

base_path = '../03-corpus/2-data/1-fr/'
if tag:
    base_path = path.join(base_path, acteur, acteur + '_' + tag + '.csv')

else:
    base_path = path.join(base_path, acteur +  '.csv')
        
with open(base_path, "r", encoding = "UTF-8") as f:
    data = read_csv(base_path, sep=',')
    text = data['text'].tolist()

In [43]:
text = text[:round(len(text))]

nb_docs = len(text)

print("On a donc un corpus de {} documents.".format(nb_docs))

On a donc un corpus de 112 documents.


### **Nettoyage**

In [44]:
corpus = [str(t).strip('\n').lower().replace('’', '\'') for t in text]
    
punct = '[!#$%&•►*+,;<=>?@[\]^_{|}~©«»—“”–—]'
spaces = '\s+'
postals = '([a-zA-Z]+\d+|\d+[a-zA-Z]+)+'
phones = '\d{3}\s\d{3}-\d{4}' #très simple (trop)

corpus = [str(t).strip('\n').lower().replace('’', '\'') for t in corpus]
corpus = [re.sub(spaces, ' ', t) for t in corpus]
corpus = [re.sub(punct, ' ', t).replace("' ", "'" ) for t in corpus]
corpus = [re.sub(phones, ' STOP ', t) for t in corpus]
corpus = [re.sub(postals, ' STOP ', t) for t in corpus]
corpus = [t.replace("  ", " " ) for t in corpus]
corpus = [t.replace("'", "_").replace("(", "_").replace(")", "_").replace("-", "_") for t in corpus]


On va commencer par utiliser le CountVectorizer pour valider que l'implémentation de sklearn arrive bien au même compte que nous 

In [45]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(vocabulary=vocabulaire, ngram_range=(1,10), token_pattern='\w+')
X = vectorizer.fit_transform(corpus)
vectorizer.get_feature_names_out()

features_names = vectorizer.get_feature_names_out()
corpus_index = [corpus.index(n) for n in corpus]

import pandas as pd
df = pd.DataFrame(X.T.todense(), index=features_names, columns=corpus_index).transpose()

In [46]:
csv['TF (sklearn)'] = 0
for t in vocabulaire:
    csv.loc[csv['Terme formatté'] == t, 'TF (sklearn)'] = df[t].sum()


csv['DF (sklearn)'] = 0
for t in vocabulaire:
    freqdoc = len(df[df[t] != 0])
    csv.loc[csv['Terme formatté'] == t, 'DF (sklearn)'] = freqdoc
csv

Unnamed: 0,Terme,Structure syntaxique,Fréquence (TF),LLR,p-value,Fréquence documentaire (DF),isMeSHTerm,isTaxoTerm,Terme formatté,TF (sklearn),DF (sklearn)
0,services,NOM,1071,-,-,94,False,False,services,1080,92
1,domicile,NOM,440,-,-,41,False,False,domicile,424,41
2,santé,NOM,405,-,-,72,False,False,santé,401,72
3,déficience,NOM,393,-,-,81,False,True,déficience,372,81
4,pmatcom,NOM,350,-,-,32,False,False,pmatcom,317,30
...,...,...,...,...,...,...,...,...,...,...,...
605,composant,NOM,16,-,-,27,False,False,composant,14,10
606,déploiement,NOM,16,-,-,14,False,False,déploiement,16,14
607,gestionnaire secondaire daniel garneau,NOM ADJ ADJ NOM,16,272.9682413133986,2.5586261803234317e-61,16,False,False,gestionnaire secondaire daniel garneau,16,16
608,types d'intervention,NOM PRP NOM,16,187.82116953292174,9.50466374208189e-43,6,False,False,types d_intervention,16,6


In [47]:
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk import RegexpTokenizer

#toknizer = RegexpTokenizer()

# max_df : ignore words that appear in 85% of documents, 
# min df:  ignore words that appear in less than 1% of documents 
# vocabulary = vocabulaire

# Sans utiliser le vocabulaire
# tfidf = TfidfVectorizer(min_df=0.1, stop_words=None, ngram_range=(2,4), max_df=0.85, use_idf=True)

# vocabulary = vocabulaire
tfidf_vectorizer = TfidfVectorizer(vocabulary=vocabulaire, ngram_range=(1,10), lowercase=False, token_pattern='\w+')
tfidf = tfidf_vectorizer.fit_transform(corpus)

In [48]:
features_names = tfidf_vectorizer.get_feature_names_out()
corpus_index = [corpus.index(n) for n in corpus]

import pandas as pd
df = pd.DataFrame(tfidf.T.todense(), index=features_names, columns=corpus_index).transpose()

In [49]:
terms_tfidf = {term: df[term].max() for term in df}
zeros = {term : value for term, value in terms_tfidf.items() if terms_tfidf[term] == 0}

In [50]:
zeros

{'alimentation': 0.0,
 'maladies': 0.0,
 'maladies chroniques': 0.0,
 'autisme _tsa_': 0.0,
 'élément': 0.0,
 'autonomie des personnes âgées': 0.0,
 'attribution du pmatcom': 0.0,
 'alimentation en données': 0.0,
 'accompagnement': 0.0,
 'heures de service de soutien à domicile': 0.0,
 'accès à un système': 0.0,
 'heures de service de soutien': 0.0,
 'intervenant': 0.0,
 'accès à un système de contrôle de l_environnement': 0.0,
 'accès à un système de contrôle': 0.0,
 'amélioration': 0.0,
 'assurance maladie': 0.0,
 'assurance maladie du québec': 0.0,
 'alimentation électrique': 0.0,
 'autisme gestionnaire principal': 0.0,
 'autisme gestionnaire': 0.0,
 'avenir': 0.0,
 'admissibilité': 0.0,
 'autisme _di_tsa_': 0.0,
 'résidence privée pour personnes âgées': 0.0,
 'alimentation en données i_clsc': 0.0}

In [51]:
len(zeros) / len(vocabulaire) *100

4.2622950819672125

In [52]:
terms_tfidf

{'services': 0.486798735552427,
 'domicile': 0.5423351999916936,
 'santé': 0.350816682261383,
 'déficience': 0.47519445591044845,
 'pmatcom': 0.6175766558373629,
 'tsa': 0.31615062191467064,
 'soutien': 0.3196192993608795,
 'accès': 0.06911579851836507,
 'intervention': 0.2678193547128193,
 'services sociaux': 0.23444672002822486,
 'sce': 0.39120151675435616,
 'trouble': 0.3696061519248175,
 'autisme': 0.08425447664999924,
 'projet': 0.44094524304619453,
 'spectre de l_autisme': 0.2449362659104815,
 'spectre': 0.2449362659104815,
 'trouble du spectre de l_autisme': 0.2449362659104815,
 'trouble du spectre': 0.2449362659104815,
 'usager': 0.09408291448125927,
 'nombre': 0.19033132860658344,
 'besoins': 0.2642209341821946,
 'aide': 0.18464042889862048,
 'indicateur': 0.13123498447473264,
 'données': 0.22450396147132662,
 'programme': 0.45558401831393014,
 'usagers': 0.18624188515210338,
 'communication': 0.29531164761264295,
 'ramq': 0.2309240462235741,
 'famille': 0.24261495912632208,
 

In [53]:
terms_weighted = DataFrame(terms_tfidf.items(), columns=['Terme formatté', 'TF-IDF'])

In [54]:
terms_weighted.sort_values(["TF-IDF"], 
                    axis=0,
                    ascending=[False], 
                    inplace=True)

terms_weighted = terms_weighted.drop_duplicates(keep='first')
terms_weighted

Unnamed: 0,Terme formatté,TF-IDF
120,guide,0.647128
4,pmatcom,0.617577
236,cliniques,0.614804
305,dépistage,0.609639
54,dispositifs,0.580162
...,...,...
398,intervenant,0.000000
393,heures de service de soutien,0.000000
577,admissibilité,0.000000
182,autonomie des personnes âgées,0.000000


In [55]:
terms_weighted = pd.merge(csv, terms_weighted, on='Terme formatté').drop_duplicates(
  subset = ['Terme', 'Fréquence (TF)'],
  keep = 'first').reset_index(drop = True)

terms_weighted

Unnamed: 0,Terme,Structure syntaxique,Fréquence (TF),LLR,p-value,Fréquence documentaire (DF),isMeSHTerm,isTaxoTerm,Terme formatté,TF (sklearn),DF (sklearn),TF-IDF
0,services,NOM,1071,-,-,94,False,False,services,1080,92,0.486799
1,domicile,NOM,440,-,-,41,False,False,domicile,424,41,0.542335
2,santé,NOM,405,-,-,72,False,False,santé,401,72,0.350817
3,déficience,NOM,393,-,-,81,False,True,déficience,372,81,0.475194
4,pmatcom,NOM,350,-,-,32,False,False,pmatcom,317,30,0.617577
...,...,...,...,...,...,...,...,...,...,...,...,...
605,composant,NOM,16,-,-,27,False,False,composant,14,10,0.136940
606,déploiement,NOM,16,-,-,14,False,False,déploiement,16,14,0.079037
607,gestionnaire secondaire daniel garneau,NOM ADJ ADJ NOM,16,272.9682413133986,2.5586261803234317e-61,16,False,False,gestionnaire secondaire daniel garneau,16,16,0.045687
608,types d'intervention,NOM PRP NOM,16,187.82116953292174,9.50466374208189e-43,6,False,False,types d_intervention,16,6,0.092086


## **OKapi BM25**
https://hal.archives-ouvertes.fr/hal-00760158 

In [56]:
from rank_bm25 import BM25Okapi

In [57]:
bm25 = BM25Okapi([t.split() for t in corpus])

In [58]:
vocabulaire

['services',
 'domicile',
 'santé',
 'déficience',
 'pmatcom',
 'tsa',
 'soutien',
 'accès',
 'intervention',
 'services sociaux',
 'sce',
 'trouble',
 'autisme',
 'projet',
 'spectre de l_autisme',
 'spectre',
 'trouble du spectre de l_autisme',
 'trouble du spectre',
 'usager',
 'nombre',
 'besoins',
 'aide',
 'indicateur',
 'données',
 'programme',
 'usagers',
 'communication',
 'ramq',
 'famille',
 'année',
 'type',
 'profil',
 'intellectuelle',
 'système',
 'contrôle',
 'déficience physique',
 'soutien à domicile',
 'msss',
 'évaluation',
 'clsc',
 'aides',
 'équipements',
 'québec',
 'cours',
 'cadre',
 'déficience intellectuelle',
 'adultes',
 'commande',
 'durée',
 'santé physique',
 'forum',
 'pilote',
 'projet pilote',
 'appareil',
 'dispositifs',
 'environnement',
 'service',
 'bluetooth',
 'appareils',
 'gestion',
 'indicateurs',
 'enfants',
 'utilisation',
 'programmes',
 'ministère',
 'alimentation',
 'maladies',
 'établissement',
 'maladies chroniques',
 'calcul',
 'attr

In [59]:
#tokenizer = RegexpTokenizer(r"\w\'|\w+")
## Revoir ça
tokenized_queries = [t.split() for t in set(vocabulaire)]

features_names = [t for t in set(vocabulaire)]
corpus_index = [corpus.index(n) for n in corpus]

tab = [bm25.get_scores(query) for query in tokenized_queries]
df = pd.DataFrame(tab, index=features_names, columns=corpus_index).transpose()

In [60]:
print(len(tokenized_queries))

tokenized_queries

610


[['transfert', 'des', 'connaissances'],
 ['garde'],
 ['android'],
 ['santé', 'et', 'services'],
 ['suppléance', 'à', 'la', 'communication'],
 ['clsc', 'responsable'],
 ['assignation'],
 ['boîtier'],
 ['bilan', 'msss'],
 ['statut', 'de', 'l_indicateur'],
 ['services', 'intégrés'],
 ['communication', 'orale'],
 ['assurance', 'maladie'],
 ['couple'],
 ['experts'],
 ['bluetooth'],
 ['réseau', 'de', 'services'],
 ['services', 'communautaires'],
 ['inhalothérapie'],
 ['fauteuils', 'roulants', 'à', 'propulsion'],
 ['demande', 'de', 'services'],
 ['assurance'],
 ['services', 'professionnels'],
 ['intervenant'],
 ['guide', 'd_attribution', 'du', 'pmatcom'],
 ['services', 'de', 'soutien'],
 ['porte'],
 ['usagers'],
 ['cours', 'de', 'la', 'période'],
 ['base'],
 ['autisme', '_tsa_'],
 ['ergothérapie'],
 ['contrôle'],
 ['chèque'],
 ['pilote'],
 ['modes'],
 ['infirmiers'],
 ['centre'],
 ['évaluation', 'à', 'jour'],
 ['réseau', 'de', 'la', 'santé', 'et', 'des', 'services'],
 ['frais'],
 ['aides', 't

In [61]:
df

Unnamed: 0,transfert des connaissances,garde,android,santé et services,suppléance à la communication,clsc responsable,assignation,boîtier,bilan msss,statut de l_indicateur,...,soins infirmiers,services offerts,réseau,utilisateurs,appel,familles,résultats,résidence privée pour personnes âgées,partie,services sociaux conception
0,1.731188,0.417647,0.000000,4.576606,3.558740,0.378822,2.137819,0.000000,0.515781,3.580367,...,0.000000,2.155848,0.000000,0.000000,0.0000,0.470054,0.616583,3.111465,0.000000,2.968945
1,1.794433,0.399912,0.000000,4.541450,3.613131,0.362735,0.000000,0.000000,0.778570,3.675673,...,0.000000,1.484518,0.000000,0.000000,0.0000,0.000000,0.840083,1.622433,0.000000,3.364545
2,1.790139,0.000000,0.000000,5.084851,3.604857,2.268316,0.000000,0.000000,0.000000,3.574532,...,4.757835,2.153367,0.000000,0.000000,0.0000,0.974129,0.429689,11.021032,0.000000,3.355448
3,1.807064,0.427114,0.000000,5.003811,3.393907,0.540635,0.000000,0.000000,0.831527,4.055542,...,0.000000,1.755573,0.000000,0.000000,0.0000,0.000000,0.630558,2.858098,0.000000,3.907600
4,1.833185,0.371919,0.000000,4.972838,3.540804,2.331024,0.000000,0.000000,0.724071,3.255012,...,0.000000,2.358110,0.000000,0.000000,0.0000,0.659878,0.549073,6.549693,0.000000,4.027228
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,1.758198,0.316819,0.000000,3.037784,4.054078,0.434974,0.000000,1.905920,0.933621,1.863440,...,0.000000,0.616799,0.000000,0.000000,0.0000,0.000000,0.000000,1.695793,1.477208,1.233598
59,1.804990,0.363121,0.000000,3.521097,5.043281,0.000000,0.000000,0.000000,1.429293,1.865432,...,0.000000,1.033343,0.000000,0.000000,0.0000,0.000000,0.000000,1.722661,0.000000,1.740285
60,1.749294,0.000000,0.000000,2.694497,4.841282,1.313210,0.000000,1.767272,1.471726,1.874913,...,0.000000,0.865704,0.598366,0.000000,2.3164,0.000000,0.423796,2.308161,0.000000,0.865704
61,3.009585,0.000000,1.970767,1.815620,6.247402,0.593785,0.000000,2.601780,0.953897,1.842988,...,0.000000,0.000000,0.000000,1.982656,0.0000,0.000000,0.000000,1.774038,0.000000,0.000000


In [62]:
#df.to_csv(base_path + titre + '_matrice-OkapiBM25.csv') # Si on veut avoir la matrice (mais le fichier peut être très volumineux)

In [63]:
terms_okapi = {term: df[term].max() for term in df}

In [64]:
terms_okapi

{'transfert des connaissances': 8.323777940638806,
 'garde': 0.7177100508994146,
 'android': 3.478458392281331,
 'santé et services': 5.256753615484904,
 'suppléance à la communication': 8.498715984901073,
 'clsc responsable': 2.5429659504466375,
 'assignation': 3.381703807414674,
 'boîtier': 3.1539234995775134,
 'bilan msss': 3.371837561820424,
 'statut de l_indicateur': 4.355964437205828,
 'services intégrés': 4.231037195822164,
 'communication orale': 5.23534331506052,
 'assurance maladie': 5.210778868029406,
 'couple': 4.53168815881725,
 'experts': 2.6182771092048025,
 'bluetooth': 2.407604520604217,
 'réseau de services': 5.3942989397061485,
 'services communautaires': 3.286243329187581,
 'inhalothérapie': 2.098095955506028,
 'fauteuils roulants à propulsion': 8.357554397600824,
 'demande de services': 6.259161779688881,
 'assurance': 5.210778868029406,
 'services professionnels': 4.579194109356917,
 'intervenant': 0.0,
 'guide d_attribution du pmatcom': 8.535725272309271,
 'servi

In [65]:
tab = DataFrame(terms_okapi.items(), columns=['Terme formatté', 'OkapiBM25'])
tab

Unnamed: 0,Terme formatté,OkapiBM25
0,transfert des connaissances,8.323778
1,garde,0.717710
2,android,3.478458
3,santé et services,5.256754
4,suppléance à la communication,8.498716
...,...,...
605,familles,1.564441
606,résultats,0.881212
607,résidence privée pour personnes âgées,11.021032
608,partie,3.206483


In [66]:
tab.sort_values(["OkapiBM25"], 
                    axis=0,
                    ascending=[False], 
                    inplace=True)

tab

Unnamed: 0,Terme formatté,OkapiBM25
563,services psychosociaux pour les jeunes en diff...,17.975347
252,contrôle de l_environnement et à la technologi...,14.940977
58,forum québécois sur le trouble du spectre de l...,14.674800
421,heures de service de soutien à domicile,14.149364
214,réseau de la santé et des services sociaux,14.056642
...,...,...
277,admissibilité,0.000000
550,environnement,0.000000
23,intervenant,0.000000
293,amélioration,0.000000


In [67]:
terms_weighted

Unnamed: 0,Terme,Structure syntaxique,Fréquence (TF),LLR,p-value,Fréquence documentaire (DF),isMeSHTerm,isTaxoTerm,Terme formatté,TF (sklearn),DF (sklearn),TF-IDF
0,services,NOM,1071,-,-,94,False,False,services,1080,92,0.486799
1,domicile,NOM,440,-,-,41,False,False,domicile,424,41,0.542335
2,santé,NOM,405,-,-,72,False,False,santé,401,72,0.350817
3,déficience,NOM,393,-,-,81,False,True,déficience,372,81,0.475194
4,pmatcom,NOM,350,-,-,32,False,False,pmatcom,317,30,0.617577
...,...,...,...,...,...,...,...,...,...,...,...,...
605,composant,NOM,16,-,-,27,False,False,composant,14,10,0.136940
606,déploiement,NOM,16,-,-,14,False,False,déploiement,16,14,0.079037
607,gestionnaire secondaire daniel garneau,NOM ADJ ADJ NOM,16,272.9682413133986,2.5586261803234317e-61,16,False,False,gestionnaire secondaire daniel garneau,16,16,0.045687
608,types d'intervention,NOM PRP NOM,16,187.82116953292174,9.50466374208189e-43,6,False,False,types d_intervention,16,6,0.092086


In [68]:
tab = pd.merge(terms_weighted, tab, on="Terme formatté")
tab.sort_values(["OkapiBM25"], 
                    axis=0,
                    ascending=[False], 
                    inplace=True)

In [69]:
base_path = '../05-transformation/'

In [70]:
if tag:
    file_path = base_path + acteur + '_' + tag + '_weighting_OKapiBM25.csv'

else: 
    file_path = base_path + acteur  + '_weighting_OKapiBM25.csv'
tab.to_csv(file_path)

In [71]:
tab

Unnamed: 0,Terme,Structure syntaxique,Fréquence (TF),LLR,p-value,Fréquence documentaire (DF),isMeSHTerm,isTaxoTerm,Terme formatté,TF (sklearn),DF (sklearn),TF-IDF,OkapiBM25
390,services psychosociaux pour les jeunes en diff...,NOM ADJ PRP DET:ART NOM PRP NOM,24,408.6314092088783,7.278380157027306e-91,4,False,False,services psychosociaux pour les jeunes en diff...,24,4,0.200560,17.975347
380,contrôle de l'environnement et à la technologi...,NOM PRP DET:ART NOM KON PRP DET:ART NOM ADJ,24,330.6300707157602,7.0074096349940544e-74,24,False,False,contrôle de l_environnement et à la technologi...,24,24,0.243830,14.940977
154,forum québécois sur le trouble du spectre de l...,NOM ADJ PRP DET:ART NOM PRP:det NOM PRP DET:AR...,53,648.7195797703292,4.242069274655342e-143,34,False,False,forum québécois sur le trouble du spectre de l...,53,34,0.234287,14.674800
377,heures de service de soutien à domicile,NOM PRP NOM PRP NOM PRP NOM,24,263.5405998774374,2.9024822958443215e-59,6,False,False,heures de service de soutien à domicile,0,0,0.000000,14.149364
283,réseau de la santé et des services sociaux,NOM PRP DET:ART NOM KON PRP:det NOM ADJ,33,389.11574663313723,1.2893797391311264e-86,19,False,False,réseau de la santé et des services sociaux,33,19,0.233344,14.056642
...,...,...,...,...,...,...,...,...,...,...,...,...,...
373,accompagnement,NOM,24,-,-,15,False,False,accompagnement,0,0,0.000000,0.000000
65,alimentation,NOM,107,-,-,49,False,False,alimentation,0,0,0.000000,0.000000
19,nombre,NOM,218,-,-,62,False,False,nombre,212,56,0.190331,0.000000
55,environnement,NOM,121,-,-,34,False,False,environnement,2,2,0.038328,0.000000
