In [55]:
import pandas as pd
import graphviz
import os
from pyagrum_extra import gum
from pyagrum_extra import predict
import pyAgrum.lib.notebook as gnb 


ot_odr_filename = os.path.join(".", "donnees/OT_ODR.csv.bz2")
ot_odr_df = pd.read_csv(ot_odr_filename,
                        compression="bz2",
                        sep=";")

equipements_filename = os.path.join(".", 'donnees/EQUIPEMENTS.csv')
equipements_df = pd.read_csv(equipements_filename,
                             sep=";")

# on fait un seul tableau à partir de nos 2 csv
merged_df = pd.merge(ot_odr_df, equipements_df, on='EQU_ID')

#on supprime une ligne qui possède des valeurs non définit (kilométrage)
merged_df.drop(178662, inplace=True)

# Définition des limites des intervalles pour chaque classe
kilometrage_bins = [-1, 50000, 100000,150000,200000,250000,float('inf')]

# Définition des étiquettes pour chaque classe
kilometrage_labels = ['0-50000', '50000-100000','100000-150000','150000-200000','200000-250000','250000+']

# Transformation de la variable "kilométrage" en classe
merged_df['KILOMETRAGE_CLASSE'] = pd.cut(merged_df['KILOMETRAGE'], bins=kilometrage_bins, labels=kilometrage_labels)
merged_df = merged_df.sort_values('KILOMETRAGE')

# Conversion de la nouvelle variable en catégorie
merged_df['KILOMETRAGE_CLASSE'] = merged_df['KILOMETRAGE_CLASSE'].astype('category')

"""
COLONNE EN BINAIRE
"""

# Séparation des valeurs de la colonne en utilisant le séparateur "/"
split_values = merged_df["SIG_CONTEXTE"].str.split("/")

# Création des colonnes binaires pour chaque valeur unique
unique_values = set(value for values in split_values for value in values)
var_to_model = []
bin_model    = []
for value in unique_values:
    merged_df[f"{value}_BIN"] = split_values.apply(lambda x: int(value in x))
    var_to_model.append(f"{value}_BIN")
    bin_model.append(f"{value}_BIN")
other_var_to_model = ["SYSTEM_N1", "SIG_OBS","SIG_ORGANE","SYSTEM_N2","SYSTEM_N3","TYPE_TRAVAIL","KILOMETRAGE_CLASSE"]
for var in other_var_to_model:
    var_to_model.append(var)

for var in var_to_model:
    merged_df[var] = merged_df[var].astype('category')

var_bn = {}
for var in var_to_model:
    nb_values = len(merged_df[var].cat.categories)
    var_bn[var] = gum.LabelizedVariable(var, var, nb_values)
    
for var in var_bn:
    for i, modalite in enumerate(merged_df[var].cat.categories):
        var_bn[var].changeLabel(i, str(modalite))
merged_df

Unnamed: 0,OT_ID,ODR_ID,ODR_LIBELLE,TYPE_TRAVAIL,DUREE_TRAVAIL,SYSTEM_N1,SYSTEM_N2,SYSTEM_N3,EQU_ID,DATE_OT,...,HAUT_BIN,A FROID_BIN,EN MONTEE_BIN,REMORQUE_BIN,DROIT_BIN,CENTRALE_BIN,A L'OUVERTURE_BIN,INTERIEUR_BIN,AU POINT MORT_BIN,POSTE CONDUITE_BIN
505655,OT000326415,OM000404264,REMPLACEMENT DU BOITIER TECHNIQUE SAM,EQUIPEMENT EMBARQUE,0.08,EQUIPEMENT EMBARQUE,AIDE A L'EXPLOITATION-MAINTENANCE,BOITIER TECHNIQUE SAM,E00340050,2018-09-04 17:56:07,...,0,0,0,0,0,0,0,0,0,1
433726,OT000031333,OM000036159,REMPLACEMENT D'UN ECLAIRAGE EXT,MECANIQUE,0.25,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00070292,2014-04-10 21:50:55,...,0,0,0,0,0,0,0,0,0,0
439684,OT000038308,OM000045741,REMPLACEMENT D'UNE COURROIE D'ALTERNATEUR,MECANIQUE,0.33,EQUIPEMENT ELECTRIQUE,CIRCUIT DE CHARGE,ORGANE DU CIRCUIT DE CHARGE,E00073750,2014-05-30 19:03:18,...,0,0,0,0,0,0,0,0,1,0
439685,OT000053873,OM000065763,REMPLACEMENT DU BOITIER PUPITRE ICS,EQUIPEMENT EMBARQUE,0.08,EQUIPEMENT EMBARQUE,AIDE A L'EXPLOITATION-MAINTENANCE,PUPITRE ICS,E00073750,2014-09-22 19:11:40,...,0,0,0,0,0,0,0,1,0,0
497676,OT000205971,OM000251438,REMPLACEMENT D'UN BALAI D'ESSUIE-GLACE,MECANIQUE,0.16,EQUIPEMENT DE CARROSSERIE,ACCESSOIRE DE VISIBILITE,ESSUIE-GLACE,E00302275,2017-01-10 21:33:15,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10494,OT000019365,OM000021853,REMPLACEMENT D'UNE LAMPE DE CODE / PHARE,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-01-24 19:05:37,...,0,0,0,0,1,0,0,0,0,0
10495,OT000021641,OM000024406,REMPLACEMENT D'UNE LAMPE DE FEU CLIGNOTANT,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-02-06 23:15:18,...,0,0,0,0,1,0,0,0,0,0
10496,OT000021642,OM000024405,REMPLACEMENT D'UNE LAMPE DE FEU STOP,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-02-06 23:15:27,...,0,0,0,0,0,0,0,0,0,0
10497,OT000023623,OM000026980,REMPLACEMENT D'UNE LAMPE DE FEU STOP,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-02-21 20:41:49,...,0,0,0,0,1,1,0,0,0,0


In [56]:
bn = gum.BayesNet("modèle simple")
for var in var_bn.values():
    bn.add(var)
for i in range(len(bin_model)):
    bn.addArc("SYSTEM_N2",str(bin_model[i]))
bn.addArc("SYSTEM_N1", "SIG_OBS")
bn.addArc("SYSTEM_N1", "SIG_ORGANE")
bn.addArc("SYSTEM_N2", "SYSTEM_N1")
bn.addArc("SYSTEM_N3", "SYSTEM_N2")
bn.addArc("TYPE_TRAVAIL", "SYSTEM_N3")




#vérification de notre RB
import pyAgrum.lib.notebook as gnb 
bn

# bn.cpt("SYSTEM_N1")

In [57]:
# bn.cpt("SIG_OBS")

In [58]:
bn.fit_bis(merged_df, verbose_mode=True)
# bn.cpt("SIG_OBS")


- Learn CPT BAS_BIN
- Learn CPT ROTONDE_BIN
- Learn CPT AVANT_BIN
- Learn CPT SYSTEM_N3
- Learn CPT EN VIRAGE_BIN
- Learn CPT ARRIERE_BIN
- Learn CPT EN CHARGE_BIN
- Learn CPT EN DESCENTE_BIN
- Learn CPT CENTRALE_BIN
- Learn CPT PLATE FORME_BIN
- Learn CPT INTERIEUR_BIN
- Learn CPT EXTERIEUR_BIN
- Learn CPT A FROID_BIN
- Learn CPT AU RALENTI_BIN
- Learn CPT AU FREINAGE_BIN
- Learn CPT AU POINT MORT_BIN
- Learn CPT KILOMETRAGE_CLASSE
- Learn CPT A L'OUVERTURE_BIN
- Learn CPT POSTE CONDUITE_BIN
- Learn CPT SIG_ORGANE
- Learn CPT TABLEAU DE BORD_BIN
- Learn CPT LATERAL_BIN
- Learn CPT EN MONTEE_BIN
- Learn CPT DROIT_BIN
- Learn CPT HAUT_BIN
- Learn CPT A LA FERMETURE_BIN
- Learn CPT REMORQUE_BIN
- Learn CPT AU DEMARRAGE_BIN
- Learn CPT A VIDE_BIN
- Learn CPT SYSTEM_N1
- Learn CPT SIG_OBS
- Learn CPT CENTRE_BIN
- Learn CPT PORTE_BIN
- Learn CPT A L'ACCELERATION_BIN
- Learn CPT A L'ARRET_BIN
- Learn CPT SYSTEM_N2
- Learn CPT GAUCHE_BIN
- Learn CPT TYPE_TRAVAIL
- Learn CPT A CHAUD_BIN
- Lear

In [59]:
# bn.cpt("SYSTEM_N1")



In [60]:
bin_model.append("SIG_OBS")
bin_model.append("SIG_ORGANE")
bin_model.append("KILOMETRAGE_CLASSE")
pred_SystemN1 = bn.predict(merged_df[bin_model].iloc[-5000:], 
                  var_target="SYSTEM_N1",
                  show_progress=True)
pred_SystemN2 = bn.predict(merged_df[bin_model].iloc[-5000:], 
                  var_target="SYSTEM_N2",
                  show_progress=True)
pred_SystemN3 = bn.predict(merged_df[bin_model].iloc[-5000:], 
                  var_target="SYSTEM_N3",
                  show_progress=True)
pred_TypeTravail = bn.predict(merged_df[bin_model].iloc[-5000:], 
                  var_target="TYPE_TRAVAIL",
                  show_progress=True)
print((merged_df["SYSTEM_N1"].iloc[-5000:] == pred_SystemN1).mean())
print((merged_df["SYSTEM_N2"].iloc[-5000:] == pred_SystemN2).mean())
print((merged_df["SYSTEM_N3"].iloc[-5000:] == pred_SystemN3).mean())
print((merged_df["TYPE_TRAVAIL"].iloc[-5000:] == pred_TypeTravail).mean())



0.8076t progress: 100%
0.6728
0.5448
0.7022


In [61]:
bn
