<a href="https://colab.research.google.com/github/MouadFiali/Health-Recommendation-System/blob/main/Notebooks/collaborative_filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <center>Patient - Disease Recommendation system</center>

In [1]:
# import necessary libraries
import pandas as pd
import dask.dataframe as dd
import numpy as np
import ast
from sklearn.preprocessing import MultiLabelBinarizer

## Loading dataset using Dask

In [2]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)

Mounted at /content/gdrive


In [None]:
# load the dataset
# json files
release_evidences_df = pd.read_json('/content/gdrive/MyDrive/Colab/release_evidences.json')
release_conditions_df = pd.read_json('/content/gdrive/MyDrive/Colab/release_conditions.json')
# csv files (the files we need to train the models)
training_df = dd.read_csv('/content/gdrive/MyDrive/Colab/release_train_patients.csv')
testing_df = dd.read_csv('/content/gdrive/MyDrive/Colab/release_test_patients.csv')
validate_df = dd.read_csv('/content/gdrive/MyDrive/Colab/release_validate_patients.csv')

In [None]:
# save the dataset (training, testing and validation) into one file
dataset_df = dd.concat([training_df, testing_df, validate_df])
dataset_df.to_csv('/content/gdrive/MyDrive/Colab/dataset.csv', single_file=True, index=False)

['/content/gdrive/MyDrive/Colab/dataset.csv']

## Exploring data

In [None]:
# Let's see the shape of the dataframes
print('release_evidences_df shape: ', release_evidences_df.shape)
print('release_conditions_df shape: ', release_conditions_df.shape)
print(f'training_df shape: ({training_df.shape[0].compute()}, {training_df.shape[1]})')
print(f'testing_df shape: ({testing_df.shape[0].compute()}, {testing_df.shape[1]})')
print(f'validate_df shape: ({validate_df.shape[0].compute()}, {validate_df.shape[1]})')

release_evidences_df shape:  (9, 223)
release_conditions_df shape:  (7, 49)
training_df shape: (1025602, 6)
testing_df shape: (134529, 6)
validate_df shape: (132448, 6)


In [None]:
training_df.head()

Unnamed: 0,AGE,DIFFERENTIAL_DIAGNOSIS,SEX,PATHOLOGY,EVIDENCES,INITIAL_EVIDENCE
0,18,"[['Bronchite', 0.19171203430383882], ['Pneumon...",M,IVRS ou virémie,"['crowd', 'diaph', 'douleurxx', 'douleurxx_car...",fievre
1,21,"[['VIH (Primo-infection)', 0.5189500564407601]...",M,VIH (Primo-infection),"['adp_dlr', 'atcd_its', 'diaph', 'diarrhee', '...",diaph
2,19,"[['Bronchite', 0.11278064619119596], ['Pneumon...",F,Pneumonie,"['douleurxx', 'douleurxx_carac_@_un_coup_de_co...",expecto
3,34,"[['IVRS ou virémie', 0.23859396799565236], ['C...",F,IVRS ou virémie,"['crowd', 'douleurxx', 'douleurxx_carac_@_une_...",douleurxx
4,36,"[['IVRS ou virémie', 0.23677812769175735], ['P...",M,IVRS ou virémie,"['dayc', 'diaph', 'douleurxx', 'douleurxx_cara...",toux


In [None]:
testing_df.head()

Unnamed: 0,AGE,DIFFERENTIAL_DIAGNOSIS,SEX,PATHOLOGY,EVIDENCES,INITIAL_EVIDENCE
0,49,"[['Bronchite', 0.20230062181160519], ['RGO', 0...",F,RGO,"['douleurxx', 'douleurxx_carac_@_lancinante_/_...",toux
1,2,"[['Asthme exacerbé ou bronchospasme', 0.080220...",M,Bronchite,"['douleurxx', 'douleurxx_carac_@_une_brûlure_o...",douleurxx
2,49,"[['Réaction dystonique aïgue', 0.6267050848165...",M,Réaction dystonique aïgue,"['antipsy_récent', 'laryngospasme', 'nau_psy_r...",laryngospasme
3,64,"[['Bronchite', 0.2748608320637265], ['Laryngit...",M,Laryngite aigue,"['crowd', 'dayc', 'douleurxx', 'douleurxx_cara...",douleurxx
4,70,"[['IVRS ou virémie', 0.21257615919851483], ['P...",F,IVRS ou virémie,"['contact', 'diaph', 'douleurxx', 'douleurxx_c...",toux


In [None]:
validate_df.head()

Unnamed: 0,AGE,DIFFERENTIAL_DIAGNOSIS,SEX,PATHOLOGY,EVIDENCES,INITIAL_EVIDENCE
0,55,"[['Anémie', 0.25071110167158567], ['Fibrillati...",F,Anémie,"['Mauv_aliment', 'atcd_anem', 'atcd_fam_anem',...",pale
1,10,"[['Syndrome de Guillain-Barré', 0.135558991316...",F,Attaque de panique,"['anxiete_s', 'atcdpsyfam', 'diaph', 'douleurx...",psy_depers
2,68,[['Possible influenza ou syndrome virémique ty...,F,Possible influenza ou syndrome virémique typique,"['diaph', 'douleurxx', 'douleurxx_carac_@_une_...",douleurxx
3,13,"[['Anémie', 0.18697604010451876], ['Fibrillati...",M,Anémie,"['Mauv_aliment', 'atcd_anem', 'atcd_fam_anem',...",douleurxx
4,48,"[['Syndrome de Boerhaave', 1.0]]",M,Syndrome de Boerhaave,"['douleurxx', 'douleurxx_carac_@_déchirante', ...",douleurxx


In [None]:
# Concat all the dfs into one df to make the processing easier
df = dd.concat([validate_df, training_df, testing_df])

In [None]:
df.to_csv('/content/gdrive/MyDrive/Colab/dataset.csv', single_file=True, index=False)

## Pre-processing

In [None]:
dataset_df = pd.read_csv('/content/gdrive/MyDrive/Colab/dataset.csv', chunksize=129257) # data size / 10

In [None]:
# first let's get all the possible evidences
# read the csv using dask for faster performance
temp_df = dd.read_csv('/content/gdrive/MyDrive/Colab/dataset.csv')

# get the evidences
all_evidences = temp_df['EVIDENCES'].apply(ast.literal_eval, meta=('object'))

unique_values = set()
for sublist in all_evidences:
    unique_values.update(sublist)

temp_df = None



In [None]:
# Create a MultiLabelBinarizer object
mlb = MultiLabelBinarizer(classes=list(unique_values))

In [None]:
chunk_number = 1

for chunk in dataset_df:
    print('Processing chunk', chunk_number, '----------------------------------')
    
    # Create a MultiLabelBinarizer object
    mlb = MultiLabelBinarizer(classes=list(unique_values))

    # Convert the evidence column into a list of lists
    evidence_lists = chunk['EVIDENCES'].apply(ast.literal_eval)

    # Apply one-hot encoding to the evidence lists
    print('Creating encoded evidences dataframe using one-hot...')
    evidence_encoded = pd.DataFrame(mlb.fit_transform(evidence_lists), columns=mlb.classes_)
    print('Encoded evidences dataframe created.')

    print('size of chunk: ', chunk.shape)
    print('size of encoded: ', evidence_encoded.shape)

    # drop indexes to avoid null values
    chunk.reset_index(drop=True, inplace=True)
    evidence_encoded.reset_index(drop=True, inplace=True)

    # Concatenate the encoded evidence columns with the chunk
    processed_chunk = pd.concat([chunk, evidence_encoded], axis=1)
    print('Chunk processed as pandas dataframe')

    processed_chunk.to_csv('/content/gdrive/MyDrive/Colab/Dataset chunks/chunk_'+ str(chunk_number) + '.csv', index=False)
    print('Chunk added to csv.')
    chunk_number+=1

Processing chunk 1 ----------------------------------
Creating encoded evidences dataframe using one-hot...
Encoded evidences dataframe created.
size of chunk:  (129257, 6)
size of encoded:  (129257, 516)
Chunk processed as pandas dataframe
Chunk added to csv.
Processing chunk 2 ----------------------------------
Creating encoded evidences dataframe using one-hot...
Encoded evidences dataframe created.
size of chunk:  (129257, 6)
size of encoded:  (129257, 516)
Chunk processed as pandas dataframe
Chunk added to csv.
Processing chunk 3 ----------------------------------
Creating encoded evidences dataframe using one-hot...
Encoded evidences dataframe created.
size of chunk:  (129257, 6)
size of encoded:  (129257, 516)
Chunk processed as pandas dataframe
Chunk added to csv.
Processing chunk 4 ----------------------------------
Creating encoded evidences dataframe using one-hot...
Encoded evidences dataframe created.
size of chunk:  (129257, 6)
size of encoded:  (129257, 516)
Chunk proces

In [47]:
ddf = dd.read_csv('/content/gdrive/MyDrive/Colab/Dataset chunks/*.csv')

In [None]:
ddf.isnull().values.any().compute()

False

In [None]:
ddf.shape[0].compute(), ddf.shape[1]

(1292579, 522)

In [None]:
ddf.head()

Unnamed: 0,AGE,DIFFERENTIAL_DIAGNOSIS,SEX,PATHOLOGY,EVIDENCES,INITIAL_EVIDENCE,z80.0,trav1_@_AmerN,protu_langue,lesions_peau_elevee_@_7,...,lesions_peau_prurit_@_2,oedeme_endroitducorps_@_coté_lateral_du_pied_G_,douleurxx_endroitducorps_@_aisselle_D_,ains,douleurxx_endroitducorps_@_amygdale_D_,cont_coq,oedeme,boire_ped,douleurxx_endroitducorps_@_face_dorsale_du_poignet_D_,oedeme_endroitducorps_@_plante_du_pied_G_
0,18,"[['Bronchite', 0.19171203430383882], ['Pneumon...",M,IVRS ou virémie,"['crowd', 'diaph', 'douleurxx', 'douleurxx_car...",fievre,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,21,"[['VIH (Primo-infection)', 0.5189500564407601]...",M,VIH (Primo-infection),"['adp_dlr', 'atcd_its', 'diaph', 'diarrhee', '...",diaph,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,19,"[['Bronchite', 0.11278064619119596], ['Pneumon...",F,Pneumonie,"['douleurxx', 'douleurxx_carac_@_un_coup_de_co...",expecto,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,34,"[['IVRS ou virémie', 0.23859396799565236], ['C...",F,IVRS ou virémie,"['crowd', 'douleurxx', 'douleurxx_carac_@_une_...",douleurxx,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,36,"[['IVRS ou virémie', 0.23677812769175735], ['P...",M,IVRS ou virémie,"['dayc', 'diaph', 'douleurxx', 'douleurxx_cara...",toux,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
ddf['diaph'].compute().iloc[4]

1

In [None]:
# check the shape of the combined data
print(f'Total number of patients: {training_df.shape[0].compute() + testing_df.shape[0].compute() + validate_df.shape[0].compute()}')
print(f'combined df shape: ({ddf.shape[0].compute()}, {ddf.shape[1]})')

Total number of patients: 1292579
combined df shape: (1292579, 6)


In [4]:
# get the data size
data_size = ddf.shape[0].compute()

In [None]:
ddf.repartition(partition_size="100MB")

Unnamed: 0_level_0,AGE,DIFFERENTIAL_DIAGNOSIS,SEX,PATHOLOGY,EVIDENCES,INITIAL_EVIDENCE,z80.0,trav1_@_AmerN,protu_langue,lesions_peau_elevee_@_7,z77.22,lesions_peau_elevee_@_6,perinatality,f17.210,douleurxx_endroitducorps_@_orteil__3__G_,j33,crach_sg,douleurxx_endroitducorps_@_hanche_G_,pertes_vag,douleurxx_irrad_@_arrière_de_tête,trav1_@_AsieSSE,douleurxx_endroitducorps_@_pouce_D_,lesions_peau_endroitducorps_@_vagin,rds_anorexie,douleurxx_endroitducorps_@_petit_orteil__4__G_,psy_depers,douleurxx_endroitducorps_@_oreille_G_,B34.9,douleurxx_endroitducorps_@_côté_du_thorax_G_,histfammigraine,douleurxx_endroitducorps_@_sein_G_,douleurxx_endroitducorps_@_sous_la_machoire,melena,vaccination,dysarthrie,douleurxx_endroitducorps_@_pubis,sialorhee,douleurxx_soudain_@_0,douleurxx_endroitducorps_@_côté_du_cou_G_,I30,douleurxx_endroitducorps_@_creux_poplité_D_,douleurxx_endroitducorps_@_plante_du_pied_D_,rural,lesions_peau_endroitducorps_@_bouche,scombroide,rx_vasodil,tmine,faiblesse_msmi,lesions_peau_endroitducorps_@_scrotum,lesions_peau_plusqu1cm_@_N,contact,douleurxx_endroitducorps_@_tibia_G_,wheez,lesions_peau_couleur_@_jaune,z84.89,allait_prol,perte_poids,douleurxx_intens_@_4,drogues_stimul,douleurxx_carac_@_sensible,douleurxx_endroitducorps_@_biceps_G_,lesions_peau_intens_@_5,move,i25.1,diplopie,lesions_peau_endroitducorps_@_cartilage_thyroidien,oedeme_endroitducorps_@_joue_G_,dayc,J81,regard_dévié,lesions_peau_intens_@_6,dyspn,douleurxx_carac_@_pénible,douleurxx_endroitducorps_@_doigt_majeur__G_,posttus_emesis,atcd_fam_anem,lesions_peau_prurit_@_4,douleurxx_endroitducorps_@_mollet_D_,f41.9,fievre,douleurxx_carac_@_une_crampe,j34.2,lesions_peau_endroitducorps_@_biceps_G_,larmes,ap_fk,douleurxx_carac_@_une_brûlure_ou_chaleur,expecto,dysphagie,ptose,douleurxx_endroitducorps_@_triceps_D_,lesions_peau_endroitducorps_@_ventre,faiblesse faciale,oedeme_endroitducorps_@_mollet_D_,douleurxx_precis_@_0,lesions_peau_prurit_@_10,i50,rectorragie,oedeme_endroitducorps_@_face_dorsale_du_pied_D_,douleurxx_irrad_@_nulle_part,volume_parole,oedeme_endroitducorps_@_tibia_G_,fatig_mod,lesions_peau_endroitducorps_@_fosse_iliaque_D_,douleurxx_endroitducorps_@_épaule_G_,nau_psy_recent,palpit,douleurxx_irrad_@_front,douleurxx_endroitducorps_@_cuisse_G_,douleurxx_endroitducorps_@_doigt_auriculaire__G_,douleurxx_intens_@_2,cortico,douleurxx_endroitducorps_@_cheville_G_,z82.49,lesions_peau_endroitducorps_@_thorax_postérieur_G_,trav1_@_AfriqN,prurit_occ,norvasc,douleurxx_endroitducorps_@_trachée,rhino_clair,apnee,oedeme_endroitducorps_@_coté_lateral_du_pied_D_,douleurxx_soudain_@_7,douleurxx_precis_@_10,drogues_IV,trav1_@_AfriqO,antipsy_récent,douleurxx_intens_@_8,k21,douleurxx_endroitducorps_@_joue_G_,atcdfam_mg,douleurxx_endroitducorps_@_orteil__2__G_,spasmes_msk,k74,douleurxx_endroitducorps_@_face_dorsale_du_pied_G_,vo_sg,douleurxx_endroitducorps_@_doigt_annulaire__D_,douleurxx_endroitducorps_@_omoplate_G_,toux_sev,pdc,drink_energie,douleurxx_carac_@_un_coup_de_couteau,lesions_peau_endroitducorps_@_colonne_dorsale,lesions_peau_endroitducorps_@_cheville_D_,lesions_peau_intens_@_3,laryngospasme,douleurxx_endroitducorps_@_épaule_D_,trav1_@_AfriqSS,sex_vih,douleurxx_endroitducorps_@_orteil__1__G_,tconst,douleurxx_intens_@_0,lesions_peau_endroitducorps_@_colonne_cervicale,lesions_peau_plusqu1cm_@_O,douleurxx_soudain_@_5,oedeme_endroitducorps_@_nez,lesions_peau_endroitducorps_@_fesse_D_,allergie_sev,lesions_peau_endroitducorps_@_cuisse_G_,douleurxx_endroitducorps_@_dessus_de_tête,dyspn_noct,lesions_peau_couleur_@_NA,lesions_peau_endroitducorps_@_cuisse_D_,oedeme_endroitducorps_@_cheville_G_,douleurxx_precis_@_2,lesions_peau_prurit_@_5,lesions_peau_couleur_@_rouge,douleurxx_endroitducorps_@_biceps_D_,douleurxx_carac_@_un_tiraillement,ap_valve,douleurxx_precis_@_9,douleurxx_irrad_@_épaule_G_,douleurxx,e10_e11,douleurxx_endroitducorps_@_omoplate_D_,douleurxx_endroitducorps_@_ischio_G_,itss_risque,lesions_peau_endroitducorps_@_gencive_supérieure,lesions_peau_endroitducorps_@_joue_interne_G_,oedeme_endroitducorps_@_orteil__2__D_,douleurxx_irrad_@_triceps_G_,pls_irreg,douleurxx_endroitducorps_@_genou_D_,lesions_peau_endroitducorps_@_dessus_de_la_langue,c00-d48,lesions_peau_endroitducorps_@_face_dorsale_main_G_,paralysie_visage,cancer_méta,angor_accelere,anxiete_s,douleurxx_irrad_@_sein_G_,lesions_peau_endroitducorps_@_fosse_iliaque_G_,oedeme_endroitducorps_@_cheville_D_,douleurxx_carac_@_écoeurante,lesions_peau_endroitducorps_@_fesse_G_,sahs,douleurxx_intens_@_9,hosptisasm,lesions_peau_endroitducorps_@_épaule_G_,lesions_peau_endroitducorps_@_colonne_lombaire,douleurxx_endroitducorps_@_triceps_G_,J05.0,lesions_peau_prurit_@_9,douleurxx_irrad_@_omoplate_D_,lesions_peau_elevee_@_0,lesions_peau_endroitducorps_@_palais,douleurxx_soudain_@_9,crohn_cu,trav1_@_N,selles_pale,j06.9,lesions_peau_endroitducorps_@_côté_du_cou_D_,douleurxx_endroitducorps_@_oreille_D_,obstipation,lesions_peau_endroitducorps_@_avant-bras_G_,atcd_its,dysp_effort,douleurxx_soudain_@_6,v85.0,douleurxx_soudain_@_4,gain_poids,diaph,msk_dlr,douleurxx_endroitducorps_@_face_dorsale_du_poignet_G_,douleurxx_endroitducorps_@_face_dorsale_du_pied_D_,toux,douleurxx_endroitducorps_@_oeil_D_,ww_bouffe,douleurxx_endroitducorps_@_testicule_G_,douleurxx_endroitducorps_@_arrière_de_tête,atcd_anem,douleurxx_irrad_@_bas_du_thorax,oedeme_endroitducorps_@_joue_D_,etouff,douleurxx_precis_@_5,surg1,lesions_peau_prurit_@_7,urban1,douleurxx_intens_@_10,j44_j42,trav1_@_Asie,douleurxx_endroitducorps_@_pouce_G_,douleurxx_endroitducorps_@_épigastre,hta,douleurxx_endroitducorps_@_creux_poplité_G_,ap_asian,ap_pneumothorax,pyrosis,rds_sg,douleurxx_irrad_@_biceps_D_,lesions_peau_elevee_@_5,douleurxx_endroitducorps_@_doigt_auriculaire__D_,immob1,pale,lesions_peau_endroitducorps_@_lèvre_supérieure_D_,douleurxx_precis_@_3,lesions_peau_endroitducorps_@_sous_la_langue,lesions_peau_endroitducorps_@_arrière_du_cou,m79.7,spasme_trapeze,ménorr,douleurxx_irrad_@_tempe_G_,lesions_peau_endroitducorps_@_petite_lèvre_G_,synd_nephro,angor_repos,fam_atopie,ww_dd,lesions_peau_intens_@_7,lesions_peau_endroitducorps_@_commissure_D_,irc,Mauv_aliment,douleurxx_endroitducorps_@_flanc_D_,lesions_peau_endroitducorps_@_nulle_part,douleurxx_irrad_@_nez,douleurxx_precis_@_4,douleurxx_endroitducorps_@_fosse_iliaque_D_,lesions_peau_intens_@_4,footnumb,rhino_pur,lesions_peau_endroitducorps_@_biceps_D_,lesions_peau_prurit_@_3,oedeme_endroitducorps_@_plante_du_pied_D_,flushing,lesions_peau_intens_@_8,lesions_peau_intens_@_1,douleurxx_irrad_@_thorax_postérieur_G_,K86.1,trav1_@_Cara,douleurxx_endroitducorps_@_ischio_D_,contact_allergie,douleurxx_carac_@_déchirante,oedeme_endroitducorps_@_cuisse_D_,lesions_peau_endroitducorps_@_testicule_D_,douleurxx_endroitducorps_@_testicule_D_,douleurxx_irrad_@_tempe_D_,cafe,douleurxx_irrad_@_omoplate_G_,douleurxx_soudain_@_10,lesions_peau_endroitducorps_@_grande_lèvre_G_,douleurxx_irrad_@_côté_du_thorax_G_,lesions_peau_desquame_@_O,douleurxx_endroitducorps_@_hypochondre_D_,atcd_cluster,douleurxx_endroitducorps_@_hypochondre_G_,lesions_peau_endroitducorps_@_grande_lèvre_D_,douleurxx_soudain_@_2,atcdpsyfam,lesions_peau_prurit_@_0,douleurxx_endroitducorps_@_palais,lesions_peau_endroitducorps_@_avant-bras_D_,douleurxx_endroitducorps_@_orteil__3__D_,rds_paralys_gen,lesions_peau_endroitducorps_@_épigastre,lesions_peau_endroitducorps_@_gencive_inférieure,ww_nuit,s09.90,lesions_peau_prurit_@_8,douleurxx_endroitducorps_@_coté_lateral_du_pied_G_,adp_dlr,etourdissement,oedeme_endroitducorps_@_tibia_D_,oedeme_endroitducorps_@_arrière_de_la_cheville_D_,e78.5,H6690,pneumothorax,trav1_@_AmerS,douleurxx_irrad_@_épaule_D_,douleurxx_carac_@_une_lourdeur_ou_serrement,douleurxx_endroitducorps_@_colonne_cervicale,naco,i60-i69,ww_valsalva,nausee,eampoc1,impression_mort,oedeme_endroitducorps_@_orteil__1__G_,hyponos,douleurxx_endroitducorps_@_talon_D_,douleurxx_endroitducorps_@_occiput,friss,douleurxx_soudain_@_1,douleurxx_endroitducorps_@_flanc_G_,trav1_@_Euro,j32,douleurxx_endroitducorps_@_ventre,oedeme_endroitducorps_@_face_dorsale_du_pied_G_,Z99.2,lesions_peau_endroitducorps_@_face_dorsale_main_D_,z80.1,ap_par,patho_endo,ulcères_bouche,lesions_peau_endroitducorps_@_lèvre_inferieure_D_,douleurxx_endroitducorps_@_mollet_G_,oedeme_endroitducorps_@_nulle_part,lesions_peau_endroitducorps_@_flanc_G_,douleurxx_soudain_@_3,lesions_peau_endroitducorps_@_front,douleurxx_endroitducorps_@_nulle_part,lesions_peau_endroitducorps_@_joue_D_,douleurxx_irrad_@_sein_D_,douleurxx_precis_@_7,vo_violent,e66,douleurxx_endroitducorps_@_tempe_G_,douleurxx_endroitducorps_@_thorax_postérieur_D_,douleurxx_carac_@_violente,lesions_peau_elevee_@_2,tagri,hernie_hiatale,douleurxx_irrad_@_biceps_G_,i10,ebolacase,lesions_peau,douleurxx_intens_@_3,rds_deg,oedeme_endroitducorps_@_mollet_G_,lesions_peau_elevee_@_1,diarrhee,lesions_peau_elevee_@_4,douleurxx_endroitducorps_@_coude_D_,douleurxx_irrad_@_menton,douleurxx_endroitducorps_@_arrière_de_la_cheville_D_,douleurxx_irrad_@_colonne_dorsale,lymphoedème,lesions_peau_intens_@_0,douleurxx_irrad_@_cartilage_thyroidien,lesions_peau_endroitducorps_@_côté_du_cou_G_,douleurxx_endroitducorps_@_doigt_index__G_,douleurxx_irrad_@_côté_du_thorax_D_,lesions_peau_endroitducorps_@_épaule_D_,douleurxx_endroitducorps_@_orteil__2__D_,douleurxx_endroitducorps_@_front,douleurxx_endroitducorps_@_nez,oedeme_endroitducorps_@_arrière_de_la_cheville_G_,douleurxx_endroitducorps_@_bas_du_thorax,ap_hypert4,i80,lesions_peau_endroitducorps_@_joue_G_,trismus,douleurxx_endroitducorps_@_face_palmaire_du_poignet_G_,douleurxx_endroitducorps_@_haut_du_thorax,douleurxx_endroitducorps_@_pharynx,douleurxx_endroitducorps_@_côté_du_cou_D_,ww_bouger,j17_j18,douleurxx_endroitducorps_@_arrière_du_cou,douleurxx_carac_@_NA,douleurxx_endroitducorps_@_coude_G_,douleurxx_carac_@_lancinante_/_choc_électrique,douleurxx_precis_@_6,lesions_peau_endroitducorps_@_joue_interne_D_,lesions_peau_desquame_@_N,smokingpast,douleurxx_irrad_@_trachée,douleurxx_endroitducorps_@_tibia_D_,oedeme_endroitducorps_@_orteil__1__D_,faible,douleurxx_irrad_@_thorax_postérieur_D_,oedeme_endroitducorps_@_cuisse_G_,douleurxx_endroitducorps_@_fosse_iliaque_G_,douleurxx_endroitducorps_@_oeil_G_,douleurxx_endroitducorps_@_petit_orteil__4__D_,lesions_peau_endroitducorps_@_pénis,HIV,douleurxx_irrad_@_mâchoire,lesions_peau_endroitducorps_@_cheville_G_,douleurxx_endroitducorps_@_côté_du_thorax_D_,criseasthm,bw_bending,f32,osteoporose,douleurxx_endroitducorps_@_cheville_D_,douleurxx_irrad_@_triceps_D_,z92.25,oedeme_endroitducorps_@_front,douleurxx_intens_@_5,douleurxx_endroitducorps_@_hanche_D_,douleurxx_endroitducorps_@_thorax_postérieur_G_,douleurxx_endroitducorps_@_tempe_D_,lesions_peau_endroitducorps_@_nez,ww_effort,douleurxx_endroitducorps_@_amygdale_G_,claud_mâchoire,crowd,douleurxx_endroitducorps_@_colonne_dorsale,menarche_hat,confusion,toux_Aboy,rds_paresthesie_gen,douleurxx_carac_@_une_pulsation,lesions_peau_prurit_@_1,douleurxx_endroitducorps_@_plante_du_pied_G_,gorge_dlr,j45,douleurxx_soudain_@_8,irritable,preg1,lesions_peau_couleur_@_pale,lesions_peau_prurit_@_6,fam_j45,douleurxx_endroitducorps_@_doigt_index__D_,lesions_peau_elevee_@_3,perte_appet,douleurxx_intens_@_7,douleurxx_intens_@_1,paresthesies_bilat,douleurxx_endroitducorps_@_doigt_majeur__D_,bode,lesions_peau_intens_@_2,douleurxx_irrad_@_haut_du_thorax,malf_cardiaque,lesions_peau_endroitducorps_@_petite_lèvre_D_,douleurxx_endroitducorps_@_coté_lateral_du_pied_D_,suburb,douleurxx_precis_@_8,stridor,convulsion,douleurxx_endroitducorps_@_cuisse_D_,douleurxx_endroitducorps_@_bouche,douleurxx_endroitducorps_@_face_palmaire_du_poignet_D_,douleurxx_intens_@_6,lesions_peau_endroitducorps_@_flanc_D_,douleurxx_endroitducorps_@_doigt_annulaire__G_,f10.129,douleurxx_carac_@_vive,lesions_peau_endroitducorps_@_testicule_G_,lesions_peau_endroitducorps_@_commissure_G_,wakeup,fatigabilité_msk,douleurxx_carac_@_épuisante,momasthma,douleurxx_carac_@_épeurante,horm1,ww_respi,prurit_nasal,douleurxx_precis_@_1,lesions_peau_endroitducorps_@_thorax_postérieur_D_,douleurxx_endroitducorps_@_arrière_de_la_cheville_G_,erytheme_occ,ballon_abdo,douleurxx_endroitducorps_@_joue_D_,trav1_@_AmerC,douleurxx_irrad_@_sous_la_machoire,douleurxx_endroitducorps_@_sein_D_,douleurxx_endroitducorps_@_orteil__1__D_,i73.9,fatig_ext,douleurxx_endroitducorps_@_genou_G_,lesions_peau_couleur_@_rose,douleurxx_endroitducorps_@_cartilage_thyroidien,g20,insp_siffla,lesions_peau_prurit_@_2,oedeme_endroitducorps_@_coté_lateral_du_pied_G_,douleurxx_endroitducorps_@_aisselle_D_,ains,douleurxx_endroitducorps_@_amygdale_D_,cont_coq,oedeme,boire_ped,douleurxx_endroitducorps_@_face_dorsale_du_poignet_D_,oedeme_endroitducorps_@_plante_du_pied_G_
npartitions=90,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1,Unnamed: 502_level_1,Unnamed: 503_level_1,Unnamed: 504_level_1,Unnamed: 505_level_1,Unnamed: 506_level_1,Unnamed: 507_level_1,Unnamed: 508_level_1,Unnamed: 509_level_1,Unnamed: 510_level_1,Unnamed: 511_level_1,Unnamed: 512_level_1,Unnamed: 513_level_1,Unnamed: 514_level_1,Unnamed: 515_level_1,Unnamed: 516_level_1,Unnamed: 517_level_1,Unnamed: 518_level_1,Unnamed: 519_level_1,Unnamed: 520_level_1,Unnamed: 521_level_1,Unnamed: 522_level_1
,int64,object,object,object,object,object,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64,int64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [5]:
print('ddf npartitions: ', ddf.npartitions)
print('ddf divisions: ', ddf.divisions)

ddf npartitions:  31
ddf divisions:  (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)


In [None]:
# Save the list of the evidences in case we need them
with open('/content/gdrive/MyDrive/Colab/evidences.txt', 'a', encoding='utf-8') as f:
  f.write(str(list(unique_values)))

In [None]:
# check if the evidences are correctly distributed (naive check)
print('Patient 0 - Evidence (adp_dlr) : ')
print(ddf['adp_dlr'].compute().iloc[0])
print('Patient 1 - Evidence (adp_dlr) : ')
print(ddf['adp_dlr'].compute().iloc[1])
print('Patient 0 - Evidence (fievre) : ')
print(ddf['fievre'].compute().iloc[0])
print('Patient 1 - Evidence (fievre) : ')
print(ddf['fievre'].compute().iloc[1])

Patient 0 - Evidence (adp_dlr) : 
0
Patient 1 - Evidence (adp_dlr) : 
1
Patient 0 - Evidence (fievre) : 
1
Patient 1 - Evidence (fievre) : 
1


We can say that the evidences are correctly distributed using these examples. In fact we are sure that the evidence 'fievre' exists for patient 0 as it's an INITIAL_EVIDENCE. Same thing for 'adp_dlr' and the patient 1.
We can naively say that things went well and we can count on our binarizer.

In [None]:
# Let's make another verif
ddf['INITIAL_EVIDENCE'].compute().iloc[67293] # Let's check an evidence that we are sure to exist

'rds_deg'

In [None]:
# Let's check now for the value of this column
ddf['rds_deg'].compute().iloc[67293]

1

In [51]:
# replace M, F values by 0 and 1
ddf = ddf.map_partitions(pd.DataFrame.replace, {'M': 1, 'F': 0})

In [48]:
# We won't need the INITIAL_EVIDENCE & DIFFERENTIAL_DIAGNOSIS
# The 1st is just a redundant information (we already have it in the other columns)
# The 2nd is just a probability of each pathology possible, that we can use in another type of training
ddf = ddf.drop("INITIAL_EVIDENCE", axis=1)
ddf = ddf.drop("DIFFERENTIAL_DIAGNOSIS", axis=1)
# And of course the EVIDENCES column
ddf = ddf.drop("EVIDENCES", axis=1)

In [52]:
ddf.head()

Unnamed: 0,AGE,SEX,PATHOLOGY,z80.0,trav1_@_AmerN,protu_langue,lesions_peau_elevee_@_7,z77.22,lesions_peau_elevee_@_6,perinatality,...,lesions_peau_prurit_@_2,oedeme_endroitducorps_@_coté_lateral_du_pied_G_,douleurxx_endroitducorps_@_aisselle_D_,ains,douleurxx_endroitducorps_@_amygdale_D_,cont_coq,oedeme,boire_ped,douleurxx_endroitducorps_@_face_dorsale_du_poignet_D_,oedeme_endroitducorps_@_plante_du_pied_G_
0,18,1,IVRS ou virémie,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,21,1,VIH (Primo-infection),0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,19,0,Pneumonie,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,34,0,IVRS ou virémie,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,36,1,IVRS ou virémie,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [53]:
# Save the dataset into one csv for better control with pandas
ddf.to_csv('/content/gdrive/MyDrive/Colab/processed_dataset.csv', single_file=True, index=False)

['/content/gdrive/MyDrive/Colab/processed_dataset.csv']

The collaborative dataset is ready

## Model selection

In [4]:
# import necessary libraries
from sklearn.model_selection import train_test_split

from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import BernoulliNB
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, classification_report

In [3]:
df = pd.read_csv('/content/gdrive/MyDrive/Colab/processed_dataset.csv', chunksize=129257)

In [4]:
classes = set()
for chunk in df:
  classes.update(chunk['PATHOLOGY'].unique())
classes = list(classes)

In [5]:
# Split the chunks to training and testing sets
df = pd.read_csv('/content/gdrive/MyDrive/Colab/processed_dataset.csv', chunksize=129257)
X_train_global = []
y_train_global = []
X_test_global = []
y_test_global = []
for chunk in df:
    X = chunk.drop('PATHOLOGY', axis=1)
    y = chunk['PATHOLOGY']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

    # save the training data
    X_train_global.append(X_train)
    y_train_global.append(y_train)

    # save the test data
    X_test_global.append(X_test)
    y_test_global.append(y_test)

# concat the test data as we will need it together for testing
X_test_global = pd.concat(X_test_global)
y_test_global = pd.concat(y_test_global)

In [12]:
def fit_by_chunks(model, features, targets):
  if(len(features) != len(targets)):
    raise Exception("features should be compatible with targets")

  for i in range(len(features)):
    print('Training model on chunk ', i+1)
    # Train the model by parts
    if i == 0: # pass the classes list in the 1st iteration
      model.partial_fit(features[i], targets[i], classes=classes)
    else:
      model.partial_fit(features[i], targets[i])
    print('Training on chunk ', i+1, ' finished')

    y_pred_test = model.predict(X_test_global)
    test_accuracy = accuracy_score(y_test_global, y_pred_test)
    print('Test Accuracy:', test_accuracy)
    print('-------------------------------------------------------')

### SGDClassifier

In [13]:
sgd = SGDClassifier()

In [14]:
fit_by_chunks(sgd, X_train_global, y_train_global)

Training model on chunk  1
Training on chunk  1  finished
Test Accuracy: 0.9155004216275597
-------------------------------------------------------
Training model on chunk  2
Training on chunk  2  finished
Test Accuracy: 0.9325589311547954
-------------------------------------------------------
Training model on chunk  3
Training on chunk  3  finished
Test Accuracy: 0.9282382157031123
-------------------------------------------------------
Training model on chunk  4
Training on chunk  4  finished
Test Accuracy: 0.9489327794152915
-------------------------------------------------------
Training model on chunk  5
Training on chunk  5  finished
Test Accuracy: 0.9671053140545098
-------------------------------------------------------
Training model on chunk  6
Training on chunk  6  finished
Test Accuracy: 0.9671285229110095
-------------------------------------------------------
Training model on chunk  7
Training on chunk  7  finished
Test Accuracy: 0.9489830652710408
--------------------

### Perceptron

In [15]:
pm = Perceptron()

In [16]:
fit_by_chunks(pm, X_train_global, y_train_global)

Training model on chunk  1
Training on chunk  1  finished
Test Accuracy: 0.9564988666341743
-------------------------------------------------------
Training model on chunk  2
Training on chunk  2  finished
Test Accuracy: 0.9635891722948143
-------------------------------------------------------
Training model on chunk  3
Training on chunk  3  finished
Test Accuracy: 0.9726947803281732
-------------------------------------------------------
Training model on chunk  4
Training on chunk  4  finished
Test Accuracy: 0.974582433990144
-------------------------------------------------------
Training model on chunk  5
Training on chunk  5  finished
Test Accuracy: 0.9786207750210814
-------------------------------------------------------
Training model on chunk  6
Training on chunk  6  finished
Test Accuracy: 0.9814174422292881
-------------------------------------------------------
Training model on chunk  7
Training on chunk  7  finished
Test Accuracy: 0.980450406541803
----------------------

### Naive Bayes

In [17]:
nb = BernoulliNB()

In [18]:
fit_by_chunks(nb, X_train_global, y_train_global)

Training model on chunk  1
Training on chunk  1  finished
Test Accuracy: 0.9973696629300408
-------------------------------------------------------
Training model on chunk  2
Training on chunk  2  finished
Test Accuracy: 0.9974160806430401
-------------------------------------------------------
Training model on chunk  3
Training on chunk  3  finished
Test Accuracy: 0.9974006080720403
-------------------------------------------------------
Training model on chunk  4
Training on chunk  4  finished
Test Accuracy: 0.9974160806430401
-------------------------------------------------------
Training model on chunk  5
Training on chunk  5  finished
Test Accuracy: 0.9974392894995396
-------------------------------------------------------
Training model on chunk  6
Training on chunk  6  finished
Test Accuracy: 0.9974392894995396
-------------------------------------------------------
Training model on chunk  7
Training on chunk  7  finished
Test Accuracy: 0.99742381692854
----------------------

### Multi-Layer Perceptron

In [19]:
mlp = MLPClassifier()

In [20]:
fit_by_chunks(mlp, X_train_global, y_train_global)

Training model on chunk  1
Training on chunk  1  finished
Test Accuracy: 0.9960893076798106
-------------------------------------------------------
Training model on chunk  2
Training on chunk  2  finished
Test Accuracy: 0.993358398898353
-------------------------------------------------------
Training model on chunk  3
Training on chunk  3  finished
Test Accuracy: 0.9973464540735412
-------------------------------------------------------
Training model on chunk  4
Training on chunk  4  finished
Test Accuracy: 0.9973619266445409
-------------------------------------------------------
Training model on chunk  5
Training on chunk  5  finished
Test Accuracy: 0.9973464540735412
-------------------------------------------------------
Training model on chunk  6
Training on chunk  6  finished
Test Accuracy: 0.9974624983560393
-------------------------------------------------------
Training model on chunk  7
Training on chunk  7  finished
Test Accuracy: 0.9954046464130712
---------------------

### Compare models

In [21]:
# list of models
models = [sgd, pm, nb, mlp]
results = []

# Evaluate each model on the test set
for model in models:
    y_pred = model.predict(X_test_global)
    accuracy = accuracy_score(y_test_global, y_pred)
    report = classification_report(y_test_global, y_pred, output_dict=True)
    
    results.append({
        'Model': type(model).__name__,
        'Accuracy': accuracy,
        'Precision': report['weighted avg']['precision'],
        'Recall': report['weighted avg']['recall'],
        'F1-Score': report['weighted avg']['f1-score']
    })

# Create a DataFrame from the results
df_results = pd.DataFrame(results)

In [22]:
df_results.head()

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,SGDClassifier,0.959369,0.989798,0.959369,0.969244
1,Perceptron,0.982106,0.988327,0.982106,0.981558
2,BernoulliNB,0.997428,0.997577,0.997428,0.997396
3,MLPClassifier,0.997451,0.997631,0.997451,0.997417


In [24]:
# save the models
import pickle
pickle.dump(sgd, open('/content/gdrive/MyDrive/Colab/models/sgd_model.pkl', 'wb'))
pickle.dump(pm, open('/content/gdrive/MyDrive/Colab/models/pm_model.pkl', 'wb'))
pickle.dump(nb, open('/content/gdrive/MyDrive/Colab/models/nb_model.pkl', 'wb'))
pickle.dump(mlp, open('/content/gdrive/MyDrive/Colab/models/mlp_model.pkl', 'wb'))

# Test the models (independent part)

In [8]:
import random
import pandas as pd
import numpy as np

In [16]:
import pickle
sgd = pickle.load(open('/content/gdrive/MyDrive/Colab/models/sgd_model.pkl', 'rb'))
pm = pickle.load(open('/content/gdrive/MyDrive/Colab/models/pm_model.pkl', 'rb'))
nb = pickle.load(open('/content/gdrive/MyDrive/Colab/models/nb_model.pkl', 'rb'))
mlp = pickle.load(open('/content/gdrive/MyDrive/Colab/models/mlp_model.pkl', 'rb'))

In [42]:
scores = pd.DataFrame(columns=['Model', 'Score'])
scores.loc[0] = [type(sgd).__name__, sgd.score(X_test_global, y_test_global)]
scores.loc[1] = [type(pm).__name__, pm.score(X_test_global, y_test_global)]
scores.loc[2] = [type(nb).__name__, nb.score(X_test_global, y_test_global)]
scores.loc[3] = [type(mlp).__name__, mlp.score(X_test_global, y_test_global)]
scores

Unnamed: 0,Model,Score
0,SGDClassifier,0.959369
1,Perceptron,0.982106
2,BernoulliNB,0.997428
3,MLPClassifier,0.997451


In [43]:
# save scores for later use
scores.to_csv('/content/gdrive/MyDrive/Colab/models/models_score.csv', index=False)

In [13]:
columns = []
with open('/content/gdrive/MyDrive/Colab/evidences.txt', 'r', encoding='utf-8') as f:
    # the evidences are stored as a list in the file, so we need to convert it to a list
    columns = ast.literal_eval(f.read())

In [39]:
# random number of symptoms but  not equal to the number of symptoms in the dataset
symptoms_num = 7
#random.randint(1, 30)
# random symptoms
symptoms = []
for i in range(symptoms_num):
    symptoms.append(columns[random.randint(0, len(columns)-1)])
# we need to create a dataframe with the same columns as the dataset
symptoms_df = pd.DataFrame(columns=X_test_global.columns)
# we need to add the symptoms to the dataframe
symptoms_df.loc[0] = 0
for symptom in symptoms:
    symptoms_df[symptom] = 1

# Other columns
symptoms_df['AGE'] = random.randint(5, 60)
symptoms_df['SEX'] = random.randint(0, 1)

symptoms

['oedeme_endroitducorps_@_mollet_D_',
 'i10',
 'lesions_peau_endroitducorps_@_palais',
 'douleurxx_carac_@_une_brûlure_ou_chaleur',
 'douleurxx_endroitducorps_@_omoplate_G_',
 'douleurxx_intens_@_2',
 'douleurxx_endroitducorps_@_tibia_G_']

In [40]:
# predict the disease using both models
disease = sgd.predict(symptoms_df)
# confidence = np.max(sgd.predict_proba(symptoms_df)) 

disease1 = pm.predict(symptoms_df)
# confidence1 = np.max(pm.predict_proba(symptoms_df))

disease2 = nb.predict(symptoms_df)
# confidence2 = np.max(nb.predict_proba(symptoms_df))

disease3 = mlp.predict(symptoms_df)
# confidence3 = np.max(mlp.predict_proba(symptoms_df))

In [41]:
# Let's plot a table with the results of the 5 models and the confidence of each model
results = pd.DataFrame(columns=['Model', 'Predicted Disease'])
results.loc[0] = [type(sgd).__name__, disease[0]]
results.loc[1] = [type(pm).__name__, disease1[0]]
results.loc[2] = [type(nb).__name__, disease2[0]]
results.loc[3] = [type(mlp).__name__, disease3[0]]
results

Unnamed: 0,Model,Predicted Disease
0,SGDClassifier,Laryngo-trachéo-bronchite (Croup)
1,Perceptron,Fibrillation auriculaire/Flutter auriculaire
2,BernoulliNB,Laryngo-trachéo-bronchite (Croup)
3,MLPClassifier,Fibrillation auriculaire/Flutter auriculaire
