In [23]:
import pandas as pd
import graphviz
import numpy as np
import os
from pyagrum_extra import gum
from pyagrum_extra import predict
import pyAgrum.lib.notebook as gnb 


ot_odr_filename = os.path.join(".", "donnees/OT_ODR.csv.bz2")
ot_odr_df = pd.read_csv(ot_odr_filename,
                        compression="bz2",
                        sep=";")

equipements_filename = os.path.join(".", 'donnees/EQUIPEMENTS.csv')
equipements_df = pd.read_csv(equipements_filename,
                             sep=";")

# on fait un seul tableau à partir de nos 2 csv
merged_df = pd.merge(ot_odr_df, equipements_df, on='EQU_ID')
#on supprime une ligne qui possède des valeurs non définit (kilométrage)
merged_df.drop(178662, inplace=True)

# Définition des limites des intervalles pour chaque classe
kilometrage_bins = [-1, 50000, 100000,150000,200000,250000,float('inf')]

# Définition des étiquettes pour chaque classe
kilometrage_labels = ['0-50000', '50000-100000','100000-150000','150000-200000','200000-250000','250000+']

# Transformation de la variable "kilométrage" en classe
merged_df['KILOMETRAGE_CLASSE'] = pd.cut(merged_df['KILOMETRAGE'], bins=kilometrage_bins, labels=kilometrage_labels)
merged_df = merged_df.sort_values('KILOMETRAGE')

# Conversion de la nouvelle variable en catégorie
merged_df['KILOMETRAGE_CLASSE'] = merged_df['KILOMETRAGE_CLASSE'].astype('category')

"""
COLONNE EN BINAIRE
"""
# Séparation des valeurs de la colonne en utilisant le séparateur "/"
split_values = merged_df["SIG_CONTEXTE"].str.split("/")

# Création des colonnes binaires pour chaque valeur unique
unique_values = set(value for values in split_values for value in values)
var_to_model = []
bin_model    = []
for value in unique_values:
    merged_df[f"{value}_BIN"] = split_values.apply(lambda x: int(value in x))
    var_to_model.append(f"{value}_BIN")

bin_model = var_to_model.copy()     
binForWeb = bin_model.copy()
other_var_to_model = ["SYSTEM_N1", "SIG_OBS","SIG_ORGANE","SYSTEM_N2","SYSTEM_N3","TYPE_TRAVAIL","MODELE","KILOMETRAGE_CLASSE","SIG_CONTEXTE"]
for var in other_var_to_model:
    var_to_model.append(var)

for var in other_var_to_model:
    merged_df[var] = merged_df[var].astype('category')

var_bn = {}
for var in other_var_to_model:
    nb_values = len(merged_df[var].cat.categories)
    var_bn[var] = gum.LabelizedVariable(var, var, nb_values)
    
for var in var_bn:
    for i, modalite in enumerate(merged_df[var].cat.categories):
        var_bn[var].changeLabel(i, str(modalite))
merged_df

Unnamed: 0,OT_ID,ODR_ID,ODR_LIBELLE,TYPE_TRAVAIL,DUREE_TRAVAIL,SYSTEM_N1,SYSTEM_N2,SYSTEM_N3,EQU_ID,DATE_OT,...,A LA FERMETURE_BIN,EN MONTEE_BIN,PORTE_BIN,INTERIEUR_BIN,AU RALENTI_BIN,EN DESCENTE_BIN,EN CHARGE_BIN,A L'ARRET_BIN,AU FREINAGE_BIN,BAS_BIN
505655,OT000326415,OM000404264,REMPLACEMENT DU BOITIER TECHNIQUE SAM,EQUIPEMENT EMBARQUE,0.08,EQUIPEMENT EMBARQUE,AIDE A L'EXPLOITATION-MAINTENANCE,BOITIER TECHNIQUE SAM,E00340050,2018-09-04 17:56:07,...,0,0,0,0,0,0,0,0,0,0
433726,OT000031333,OM000036159,REMPLACEMENT D'UN ECLAIRAGE EXT,MECANIQUE,0.25,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00070292,2014-04-10 21:50:55,...,0,0,0,0,0,0,0,0,0,0
439684,OT000038308,OM000045741,REMPLACEMENT D'UNE COURROIE D'ALTERNATEUR,MECANIQUE,0.33,EQUIPEMENT ELECTRIQUE,CIRCUIT DE CHARGE,ORGANE DU CIRCUIT DE CHARGE,E00073750,2014-05-30 19:03:18,...,0,0,0,0,0,0,0,0,0,0
439685,OT000053873,OM000065763,REMPLACEMENT DU BOITIER PUPITRE ICS,EQUIPEMENT EMBARQUE,0.08,EQUIPEMENT EMBARQUE,AIDE A L'EXPLOITATION-MAINTENANCE,PUPITRE ICS,E00073750,2014-09-22 19:11:40,...,0,0,0,1,0,0,0,0,0,0
497676,OT000205971,OM000251438,REMPLACEMENT D'UN BALAI D'ESSUIE-GLACE,MECANIQUE,0.16,EQUIPEMENT DE CARROSSERIE,ACCESSOIRE DE VISIBILITE,ESSUIE-GLACE,E00302275,2017-01-10 21:33:15,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10494,OT000019365,OM000021853,REMPLACEMENT D'UNE LAMPE DE CODE / PHARE,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-01-24 19:05:37,...,0,0,0,0,0,0,0,0,0,0
10495,OT000021641,OM000024406,REMPLACEMENT D'UNE LAMPE DE FEU CLIGNOTANT,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-02-06 23:15:18,...,0,0,0,0,0,0,0,0,0,0
10496,OT000021642,OM000024405,REMPLACEMENT D'UNE LAMPE DE FEU STOP,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-02-06 23:15:27,...,0,0,0,0,0,0,0,0,0,0
10497,OT000023623,OM000026980,REMPLACEMENT D'UNE LAMPE DE FEU STOP,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004844,2014-02-21 20:41:49,...,0,0,0,0,0,0,0,0,0,1


In [24]:
bn = gum.BayesNet("modèle simple")
for var in var_bn.values():
    bn.add(var)
bn.addArc("SYSTEM_N2","SIG_CONTEXTE")
bn.addArc("SIG_OBS", "SYSTEM_N1")
bn.addArc("SIG_ORGANE", "SYSTEM_N1")
bn.addArc("MODELE", "SYSTEM_N1")
bn.addArc("SYSTEM_N1", "SYSTEM_N2")
bn.addArc("SYSTEM_N2", "SYSTEM_N3")
bn.addArc("SYSTEM_N3", "TYPE_TRAVAIL")
bn.addArc("TYPE_TRAVAIL", "KILOMETRAGE_CLASSE")

#vérification de notre RB
import pyAgrum.lib.notebook as gnb 
bn

# bn.cpt("SYSTEM_N1")

NotFound: [pyAgrum] Object not found: No element with the key <SIG_CONTEXT>

In [16]:
# bn.cpt("SIG_OBS")

In [17]:
bn.fit(merged_df, verbose_mode=True)
# bn.cpt("SIG_OBS")


- Learn CPT AVANT_BIN
- Learn CPT PLATE FORME_BIN
- Learn CPT BAS_BIN
- Learn CPT MODELE
- Learn CPT CENTRE_BIN
- Learn CPT A L'OUVERTURE_BIN
- Learn CPT POSTE CONDUITE_BIN
- Learn CPT REMORQUE_BIN
- Learn CPT A L'ACCELERATION_BIN
- Learn CPT SIG_OBS
- Learn CPT GAUCHE_BIN
- Learn CPT SYSTEM_N1
- Learn CPT HAUT_BIN
- Learn CPT A VIDE_BIN
- Learn CPT DESSOUS_BIN
- Learn CPT EN MONTEE_BIN
- Learn CPT INTERIEUR_BIN
- Learn CPT AU RALENTI_BIN
- Learn CPT A L'ARRET_BIN
- Learn CPT TYPE_TRAVAIL
- Learn CPT ARRIERE_BIN
- Learn CPT AU FREINAGE_BIN
- Learn CPT SYSTEM_N3
- Learn CPT SIG_ORGANE
- Learn CPT A FROID_BIN
- Learn CPT PORTE_BIN
- Learn CPT EN CHARGE_BIN
- Learn CPT ROTONDE_BIN
- Learn CPT A CHAUD_BIN
- Learn CPT EXTERIEUR_BIN
- Learn CPT LATERAL_BIN
- Learn CPT AU DEMARRAGE_BIN
- Learn CPT DROIT_BIN
- Learn CPT EN DESCENTE_BIN
- Learn CPT PLAFOND_BIN
- Learn CPT SYSTEM_N2
- Learn CPT CENTRALE_BIN
- Learn CPT A LA FERMETURE_BIN
- Learn CPT TABLEAU DE BORD_BIN
- Learn CPT EN VIRAGE_BIN


In [18]:
# bn.cpt("SYSTEM_N1")



In [19]:


proba = bn.predict_proba(merged_df[["SIG_OBS","SIG_ORGANE","MODELE"]].iloc[-1:], 
                var_target="SYSTEM_N1",
                show_progress=True)
test2 = bn.predict(merged_df[["SIG_OBS","SIG_ORGANE","MODELE"]].iloc[-1:], 
                var_target="SYSTEM_N1",
                show_progress=True)

labels = var_bn["SYSTEM_N1"].labels()
def cinqMeilleur(labels,proba):
    indices_plus_haut = np.argsort(proba[0])[-5:]
    tabProba = []
    tabLabel = []
    for ind in indices_plus_haut:
        #Récupération des valeurs possibles de la variable cible
        tabProba.append(proba[0][ind])
        tabLabel.append(labels[ind])
    return tabLabel,tabProba
cinqMeilleur(labels,proba)

predict progress:  0%

(['EQUIPEMENT CLIMATIQUE',
  'EQUIPEMENT DE MOTORISATION',
  'EQUIPEMENT EMBARQUE',
  'EQUIPEMENT DE CARROSSERIE',
  'EQUIPEMENT ELECTRIQUE'],
 [0.000589101620029455,
  0.000589101620029455,
  0.000589101620029455,
  0.02297496318114875,
  0.9749631811487481])

In [20]:
pred_SystemN1 = bn.predict(merged_df[["SIG_OBS","SIG_ORGANE","MODELE"]].iloc[-5000:], 
                  var_target="SYSTEM_N1",
                  show_progress=True)
bin_model.append("SIG_OBS")
bin_model.append("SIG_ORGANE")
bin_model.append("MODELE")
bin_model.append("SYSTEM_N1")
pred_SystemN2 = bn.predict(merged_df[bin_model].iloc[-5000:], 
                  var_target="SYSTEM_N2",
                  show_progress=True)
print(bin_model)
bin_model.append("SYSTEM_N2")
pred_SystemN3 = bn.predict(merged_df[bin_model].iloc[-5000:], 
                  var_target="SYSTEM_N3",
                  show_progress=True)
bin_model.append("SYSTEM_N3")
pred_TypeTravail = bn.predict(merged_df[bin_model].iloc[-5000:], 
                  var_target="TYPE_TRAVAIL",
                  show_progress=True)

print((merged_df["SYSTEM_N1"].iloc[-5000:] == pred_SystemN1).mean())
print((merged_df["SYSTEM_N2"].iloc[-5000:] == pred_SystemN2).mean())
print((merged_df["SYSTEM_N3"].iloc[-5000:] == pred_SystemN3).mean())
print((merged_df["TYPE_TRAVAIL"].iloc[-5000:] == pred_TypeTravail).mean())




['PLATE FORME_BIN', 'ROTONDE_BIN', 'A VIDE_BIN', 'POSTE CONDUITE_BIN', 'PLAFOND_BIN', 'ARRIERE_BIN', 'TABLEAU DE BORD_BIN', 'DESSOUS_BIN', 'CENTRE_BIN', 'GAUCHE_BIN', 'EXTERIEUR_BIN', 'EN VIRAGE_BIN', "A L'OUVERTURE_BIN", 'AU DEMARRAGE_BIN', 'HAUT_BIN', 'AU POINT MORT_BIN', 'A FROID_BIN', 'A CHAUD_BIN', 'DROIT_BIN', 'REMORQUE_BIN', "A L'ACCELERATION_BIN", 'LATERAL_BIN', 'CENTRALE_BIN', 'AVANT_BIN', 'A LA FERMETURE_BIN', 'EN MONTEE_BIN', 'PORTE_BIN', 'INTERIEUR_BIN', 'AU RALENTI_BIN', 'EN DESCENTE_BIN', 'EN CHARGE_BIN', "A L'ARRET_BIN", 'AU FREINAGE_BIN', 'BAS_BIN', 'SIG_OBS', 'SIG_ORGANE', 'MODELE', 'SYSTEM_N1']
0.881ct progress: 100%
0.7634
0.7502
0.7874


In [21]:
print(bn)


BN{nodes: 41, arcs: 40, domainSize: 10^21.7641, dim: 3767914, mem: 31Mo 968Ko 0o}


In [22]:
"""
WEB CONTENT
"""
def getAllSig_Obs():
    return set(merged_df["SIG_OBS"])
    
def getAllSig_Organe():
    return set(merged_df["SIG_ORGANE"])

def getAllConstructeur():
    return set(merged_df["CONSTRUCTEUR"])

def getAllModeleWithConstruct(constructeurValue):
    allLineWithConstructeur = merged_df.loc[merged_df['CONSTRUCTEUR'] == constructeurValue]
    return set(allLineWithConstructeur["MODELE"])

def getAllModele():
    return set(merged_df["MODELE"])

def getAllSigContexte():
    return set(merged_df["SIG_CONTEXTE"])

def getAllKilometrage():
    return set(merged_df["KILOMETRAGE_CLASSE"])

def predictionWeb(bn,dict,bin_web):
    VAR_PRED = ["MODELE","KILOMETRAGE_CLASSE","SIG_OBS","SIG_ORGANE"]
    dictPrediction = dict()
    context_ToBinary(dictPrediction,bin_web)
    for var in VAR_PRED:
        dictPrediction[var] = dict[var]
        bin_web.append(var)
    # res = bn.predict_proba(dictPrediction[[bin_web]],var_target="SYSTEM_N1",show_progress=True)
    # print(res)
def context_ToBinary(dict,bin_web):
    # Séparation des valeurs de la colonne en utilisant le séparateur "/"
    split_values = dict["SIG_CONTEXTE"].str.split("/")
    # Création des colonnes binaires pour chaque valeur unique
    for value in bin_web:
        dict[f"{value}_BIN"] = split_values.apply(lambda x: int(value in x))