### Chargement des données
Dans cette section, nous allons charger les données à partir d'un fichier Excel. Nous utiliserons la bibliothèque `ExcelReaders` pour lire le fichier et la bibliothèque `DataFrames` pour manipuler les données.


In [1]:
using DataFrames
using ExcelReaders
using Plots
using DataValues
using XLSX
using Statistics
using Tables
import Plots: plot!, vline!


In [2]:
mutable struct GV
    numero::Float64
    palier::String
    unite::String
    sous_unite::String
    circuit::String
    ref::String
    reg_ref::Vector{String}
    avant_RGV::Bool
    numero_suc_ou_pred::Union{Float64, Missing}
    maintenances::DataFrame
    IND_COL_1::DataFrame
    PE_max_IND_COL_1::String
    IND_COL_2::DataFrame
    IND_COL_3::DataFrame
    IND_ENC::DataFrame
    date_max::Int

    function GV(numero::Float64, palier::String, unite::String, sous_unite::String, circuit::String, 
                ref::String, reg_ref::Vector{String}, avant_RGV::Bool, 
                numero_suc_ou_pred::Union{Float64, Missing}, maintenances::DataFrame, 
                IND_COL_1::DataFrame, PE_max_IND_COL_1::String, IND_COL_2::DataFrame, 
                IND_COL_3::DataFrame, IND_ENC::DataFrame, date_max::Int)
        new(numero, palier, unite, sous_unite, circuit, ref, reg_ref, avant_RGV, numero_suc_ou_pred, 
        maintenances, IND_COL_1, PE_max_IND_COL_1, IND_COL_2, IND_COL_3, IND_ENC, date_max)
    end
end


In [3]:
GV(0.0, "", "", "", "", "", Vector([""]), false, missing, DataFrame(), DataFrame(), "", DataFrame(), DataFrame(), DataFrame(), 0)

GV(0.0, "", "", "", "", "", [""], false, missing, [1m0×0 DataFrame[0m, [1m0×0 DataFrame[0m, "", [1m0×0 DataFrame[0m, [1m0×0 DataFrame[0m, [1m0×0 DataFrame[0m, 0)

In [4]:
mutable struct NAryTreeNode
    parent::Union{NAryTreeNode, Missing}
    value::Union{String, GV, Missing}
    children::Vector{NAryTreeNode}

    function NAryTreeNode(parent::Union{NAryTreeNode, Missing}, value::Union{String, GV, Missing}, children::Vector{NAryTreeNode})
        new(parent, value, children)
    end
end

### Avant de commencer à éxécuter les cellules du code
- Copier dans la variable `chemin_données`, le chemin d'accès vers le dossier `Données_EDF_240611`
- Copier dans la variable `chemin_projet`, le chemin d'accès vers le dossier `JuliaStatsProject`

In [5]:
chemin_données = "/home/AD/faidy/JuliaStatsProject/data/real_data/"
chemin_projet = "/home/AD/faidy/JuliaStatsProject/"

"/home/AD/faidy/JuliaStatsProject/"

Dans cette section, nous chargeons les données à partir des fichier Excel `IND-COL-1.xls` et `IND-COL-2.xls` situé dans le répertoire spécifié.

In [6]:
# Transformer le contenu de la feuille de IND-COL-1.xls en une matrice
data_matrix_ind_col1 = readxlsheet(string(chemin_données, "Données_EDF_240611/Colmatage/IND-COL-1.xls"), "IND-COL-1")

1083×17 Matrix{Any}:
 "UNITE"  "SOUS_UNITE"  "CIRCUIT"   "NUMERO"  …  "TUBISTE"  "CONSTITUTION"
 "U4"     "S1"          "C3"       1.27          "S"        "CONST2"
 "U4"     "S1"          "C3"       1.27          "S"        "CONST2"
 "U4"     "S1"          "C3"       1.27          "S"        "CONST2"
 "U4"     "S1"          "C3"       1.27          "S"        "CONST2"
 "U4"     "S1"          "C3"       1.27       …  "S"        "CONST2"
 "U4"     "S1"          "C3"       1.27          "S"        "CONST2"
 "U4"     "S1"          "C3"       1.27          "S"        "CONST2"
 "U4"     "S2"          "C1"       1.43          "V"        "CONST2"
 "U4"     "S2"          "C1"       1.43          "V"        "CONST2"
 ⋮                                            ⋱  ⋮          
 "U19"    "S3"          "C2"       2.56          "V"        "CONST3"
 "U19"    "S3"          "C2"       2.56       …  "V"        "CONST3"
 "U19"    "S3"          "C2"       2.56          "V"        "CONST3"
 "U19"    "S4" 

In [7]:
# Transformer le contenu de la feuille de IND-COL-2.xls en une matrice
data_matrix_ind_col2 = readxlsheet(string(chemin_données, "Données_EDF_240611/Colmatage/IND-COL-2.xls"), "IND-COL-2")

18722×17 Matrix{Any}:
 "UNITE"  "SOUS_UNITE"  "CIRCUIT"   "NUMERO"  …  "TUBISTE"  "CONSTITUTION"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4        …  "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 ⋮                                            ⋱  ⋮          
 "U19"    "S4"          "C3"       2.8           "V"        "CONST3"
 "U19"    "S4"          "C3"       2.8           "V"        "CONST3"
 "U19"    "S4"          "C3"       2.8        …  "V"        "CONST3"
 "U19"    "S4"

In [8]:
# Les cases vides de la colonne VALEUR des fichiers IND-COL-1.xls et IND-COL-2.xls ont été remplis par un objet DataValue{Union{}} 
# qu'on le remplace par l'objet missing qui représente une donnée manquante en Julia
isNA(x) = typeof(x) == DataValue{Union{}}
valeurs = replace( x -> isNA(x) ? missing : x, data_matrix_ind_col1)

data_matrix_ind_col1 = replace( x -> isNA(x) ? missing : x, data_matrix_ind_col1)
data_matrix_ind_col2 = replace( x -> isNA(x) ? missing : x, data_matrix_ind_col2)

18722×17 Matrix{Any}:
 "UNITE"  "SOUS_UNITE"  "CIRCUIT"   "NUMERO"  …  "TUBISTE"  "CONSTITUTION"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4        …  "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 "U1"     "S1"          "C1"       1.4           "W"        "CONST2"
 ⋮                                            ⋱  ⋮          
 "U19"    "S4"          "C3"       2.8           "V"        "CONST3"
 "U19"    "S4"          "C3"       2.8           "V"        "CONST3"
 "U19"    "S4"          "C3"       2.8        …  "V"        "CONST3"
 "U19"    "S4"

### Conversion de Matrice en DataFrame

La fonction `MatrixToDataFrame(mat)` prend en entrée une matrice `mat` et retourne un DataFrame `DF_mat` en excluant la première ligne de `mat` comme les noms de colonnes.

#### Paramètres :

- `mat` : Une matrice représentant les données à convertir en DataFrame.

#### Sortie :

Un objet DataFrame `DF_mat` où les données de la première ligne de `mat` sont utilisées comme noms de colonnes.

Cette fonction est utile pour convertir des données structurées sous forme de matrice en un format plus facile à manipuler et à analyser à l'aide de la bibliothèque `DataFrames` en Julia.


In [9]:
function MatrixToDataFrame(mat)
    DF_mat = DataFrame(
        mat[2:end, 1:end],
        string.(mat[1, 1:end])
    )
    return DF_mat
end

MatrixToDataFrame (generic function with 1 method)

In [10]:
df1 = MatrixToDataFrame(data_matrix_ind_col1)

Row,UNITE,SOUS_UNITE,CIRCUIT,NUMERO,BR,HEURES_MAT,PE,AVANT_NET,APRES_NET,INFER,VALEUR,INCERTITUDE,TAUX_H,REFERENCE,ACIERISTE,TUBISTE,CONSTITUTION
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,U4,S1,C3,1.27,F,215612.0,E9,false,false,false,28.0,5.0,6.0,R12,S,S,CONST2
2,U4,S1,C3,1.27,F,224510.0,E9,true,false,false,30.0,5.0,missing,R12,S,S,CONST2
3,U4,S1,C3,1.27,F,237085.0,E9,false,false,true,15.0,missing,missing,R12,S,S,CONST2
4,U4,S1,C3,1.27,F,245389.0,E5,false,false,true,10.0,missing,missing,R12,S,S,CONST2
5,U4,S1,C3,1.27,F,245389.0,E6,false,false,false,missing,missing,missing,R12,S,S,CONST2
6,U4,S1,C3,1.27,F,245389.0,E1,false,false,false,missing,missing,missing,R12,S,S,CONST2
7,U4,S1,C3,1.27,F,245389.0,E9,false,false,true,15.0,missing,missing,R12,S,S,CONST2
8,U4,S2,C1,1.43,F,148255.0,E9,false,false,true,10.0,missing,missing,R12,HI,V,CONST2
9,U4,S2,C1,1.43,F,148255.0,E5,false,false,false,missing,missing,missing,R12,HI,V,CONST2
10,U4,S2,C1,1.43,F,148255.0,E4,false,false,false,missing,missing,missing,R12,HI,V,CONST2


In [11]:
df2 = MatrixToDataFrame(data_matrix_ind_col2)

Row,UNITE,SOUS_UNITE,CIRCUIT,NUMERO,BR,HEURES_MAT,PE,AVANT_NET,APRES_NET,SONDE,INFER,VALEUR,INCERTITUDE,REFERENCE,ACIERISTE,TUBISTE,CONSTITUTION
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,U1,S1,C1,1.4,C,128388.0,E2,false,false,IND-COL-2,false,25.9,missing,R12,H,W,CONST2
2,U1,S1,C1,1.4,C,128388.0,E5,false,false,IND-COL-2,false,25.7,missing,R12,H,W,CONST2
3,U1,S1,C1,1.4,C,128388.0,E7,false,false,IND-COL-2,false,25.8,missing,R12,H,W,CONST2
4,U1,S1,C1,1.4,C,128388.0,E1,false,false,IND-COL-2,false,23.7,missing,R12,H,W,CONST2
5,U1,S1,C1,1.4,C,128388.0,E9,false,false,IND-COL-2,false,20.7,missing,R12,H,W,CONST2
6,U1,S1,C1,1.4,C,128388.0,E3,false,false,IND-COL-2,false,27.8,missing,R12,H,W,CONST2
7,U1,S1,C1,1.4,C,128388.0,E4,false,false,IND-COL-2,false,26.6,missing,R12,H,W,CONST2
8,U1,S1,C1,1.4,C,128388.0,E6,false,false,IND-COL-2,false,25.9,missing,R12,H,W,CONST2
9,U1,S1,C1,1.4,C,128388.0,E8,false,false,IND-COL-2,false,19.5,missing,R12,H,W,CONST2
10,U1,S1,C1,1.4,C,189934.0,E9,false,false,IND-COL-2,false,31.2,missing,R12,H,W,CONST2


### Suppression des valeurs manquantes

Les lignes suivantes suppriment les lignes de `df1` et `df2` où la colonne `:VALEUR` contient des valeurs manquantes.


In [12]:
df1 = dropmissing(df1, :VALEUR)

Row,UNITE,SOUS_UNITE,CIRCUIT,NUMERO,BR,HEURES_MAT,PE,AVANT_NET,APRES_NET,INFER,VALEUR,INCERTITUDE,TAUX_H,REFERENCE,ACIERISTE,TUBISTE,CONSTITUTION
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,U4,S1,C3,1.27,F,215612.0,E9,false,false,false,28.0,5.0,6.0,R12,S,S,CONST2
2,U4,S1,C3,1.27,F,224510.0,E9,true,false,false,30.0,5.0,missing,R12,S,S,CONST2
3,U4,S1,C3,1.27,F,237085.0,E9,false,false,true,15.0,missing,missing,R12,S,S,CONST2
4,U4,S1,C3,1.27,F,245389.0,E5,false,false,true,10.0,missing,missing,R12,S,S,CONST2
5,U4,S1,C3,1.27,F,245389.0,E9,false,false,true,15.0,missing,missing,R12,S,S,CONST2
6,U4,S2,C1,1.43,F,148255.0,E9,false,false,true,10.0,missing,missing,R12,HI,V,CONST2
7,U4,S2,C1,1.43,F,158664.0,E1,false,false,true,10.0,missing,missing,R12,HI,V,CONST2
8,U4,S2,C1,1.43,F,158664.0,E9,false,false,true,10.0,missing,missing,R12,HI,V,CONST2
9,U4,S2,C1,1.43,F,179900.0,E1,false,false,true,10.0,missing,missing,R12,HI,V,CONST2
10,U4,S2,C1,1.43,F,179900.0,E5,false,false,true,10.0,missing,missing,R12,HI,V,CONST2


In [13]:
df2 = dropmissing(df2, :VALEUR)

Row,UNITE,SOUS_UNITE,CIRCUIT,NUMERO,BR,HEURES_MAT,PE,AVANT_NET,APRES_NET,SONDE,INFER,VALEUR,INCERTITUDE,REFERENCE,ACIERISTE,TUBISTE,CONSTITUTION
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,U1,S1,C1,1.4,C,128388.0,E2,false,false,IND-COL-2,false,25.9,missing,R12,H,W,CONST2
2,U1,S1,C1,1.4,C,128388.0,E5,false,false,IND-COL-2,false,25.7,missing,R12,H,W,CONST2
3,U1,S1,C1,1.4,C,128388.0,E7,false,false,IND-COL-2,false,25.8,missing,R12,H,W,CONST2
4,U1,S1,C1,1.4,C,128388.0,E1,false,false,IND-COL-2,false,23.7,missing,R12,H,W,CONST2
5,U1,S1,C1,1.4,C,128388.0,E9,false,false,IND-COL-2,false,20.7,missing,R12,H,W,CONST2
6,U1,S1,C1,1.4,C,128388.0,E3,false,false,IND-COL-2,false,27.8,missing,R12,H,W,CONST2
7,U1,S1,C1,1.4,C,128388.0,E4,false,false,IND-COL-2,false,26.6,missing,R12,H,W,CONST2
8,U1,S1,C1,1.4,C,128388.0,E6,false,false,IND-COL-2,false,25.9,missing,R12,H,W,CONST2
9,U1,S1,C1,1.4,C,128388.0,E8,false,false,IND-COL-2,false,19.5,missing,R12,H,W,CONST2
10,U1,S1,C1,1.4,C,189934.0,E9,false,false,IND-COL-2,false,31.2,missing,R12,H,W,CONST2


In [14]:
nettoyages = readxlsheet(string(chemin_données, "Données_EDF_240611/Nettoyages/NETTOYAGES.xls"), "NETTOYAGES")
df_nettoyages = MatrixToDataFrame(nettoyages)

Row,UNITE,SOUS_UNITE,CIRCUIT,REFERENCE,NUMERO,HEURES_MAT,Plaque à tube - curatif,Plaque à tube - préventif,PE_CURATIF,PE_PREVENTIF,INTERVENANT,ZONE,PROCEDE,BOUE_KG,CUIVRE_KG,EFFICACITE_DECOLMATAGE_ESTIMEE_MIN,EFFICACITE_DECOLMATAGE_ESTIMEE_MAX,ACIERISTE,TUBISTE,CONSTITUTION,INTERET ANALYSE,FAMILLE PROCEDE
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,U14,S1,C1,R12,1.5,12110.0,true,false,true,false,I3,GV Entier,PROC5,1827.0,3.0,,,I,V,CONST2,HORS SCOPE,
2,U14,S1,C2,R12,1.48,12110.0,true,false,true,false,I3,GV Entier,PROC5,1565.0,6.0,,,H,W,CONST2,HORS SCOPE,
3,U14,S1,C3,R12,1.47,12110.0,true,false,true,false,I3,GV Entier,PROC5,1882.0,6.0,,,I,V,CONST2,HORS SCOPE,
4,U14,S1,C4,R12,1.52,12110.0,true,false,true,false,I3,GV Entier,PROC5,1788.0,1.0,,,I,V,CONST2,HORS SCOPE,
5,U17,S2,C1,R12,1.35,23613.0,true,false,false,false,I3,GV Partiel,PROC5,348.0,1.0,,,I,V,CONST2,HORS SCOPE,
6,U17,S2,C2,R12,1.34,23613.0,true,false,false,false,I3,GV Partiel,PROC5,334.0,2.0,,,I,V,CONST2,HORS SCOPE,
7,U17,S2,C3,R12,1.32,23613.0,true,false,false,false,I3,GV Partiel,PROC5,285.0,3.0,,,H,W,CONST2,HORS SCOPE,
8,U17,S2,C4,R12,1.33,23613.0,true,false,false,false,I3,GV Partiel,PROC5,259.0,2.0,,,S,S,CONST2,HORS SCOPE,
9,U10,S2,C1,R3,0.04,101049.0,true,false,true,false,I3,GV Entier,PROC5,3028.0,155.0,,,S,S,CONST1,HORS SCOPE,
10,U10,S2,C2,R3,0.05,101049.0,true,false,true,false,I3,GV Entier,PROC5,3183.0,190.0,,,S,S,CONST1,HORS SCOPE,


In [15]:
df_Infos_gen = readxlsheet(string(chemin_données, "Données_EDF_240611/Informations générales 9-7/HEURES.xls"), "HEURES")
df_Infos_gen = MatrixToDataFrame(df_Infos_gen)

Row,UNITE,SOUS_UNITE,CIRCUIT,NUMERO,REFERENCE,ACIERISTE,TUBISTE,CONSTITUTION,HEURES_MAT,D'ORIGINE ou REMPLACEMENT ?
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,U1,S1,C1,1.4,R12,H,W,CONST2,0.0,ORIGINE
2,U1,S1,C1,1.4,R12,H,W,CONST2,12224.0,ORIGINE
3,U1,S1,C1,1.4,R12,H,W,CONST2,19844.0,ORIGINE
4,U1,S1,C1,1.4,R12,H,W,CONST2,27585.0,ORIGINE
5,U1,S1,C1,1.4,R12,H,W,CONST2,37660.0,ORIGINE
6,U1,S1,C1,1.4,R12,H,W,CONST2,45787.0,ORIGINE
7,U1,S1,C1,1.4,R12,H,W,CONST2,54790.0,ORIGINE
8,U1,S1,C1,1.4,R12,H,W,CONST2,62897.0,ORIGINE
9,U1,S1,C1,1.4,R12,H,W,CONST2,74223.0,ORIGINE
10,U1,S1,C1,1.4,R12,H,W,CONST2,84306.0,ORIGINE


In [16]:
function IND_COL_3_and_EncMatToDataFrame(mat, circuit::String)
    # Extraire les noms des colonnes
    column_names = [mat[1,1],"$(mat[2,3])_ENC", "$(mat[2,4])_ENC", "$(mat[2,5])_ENC", "$(mat[2,6])_ENC", "$(mat[2,15])_IND_COL_3", "$(mat[2,16])_IND_COL_3", "$(mat[2,17])_IND_COL_3", "$(mat[2,18])_IND_COL_3"]

    # Créer un DataFrame à partir des données extraites
    df_enc = DataFrame()
    df_ind_col_3 = DataFrame()

    # Ajouter les données au DataFrame
    df_enc[!, column_names[1]] = mat[3:end,1]

    if circuit == "C1"
        df_enc[!, column_names[2]] = mat[3:end,3]
    elseif circuit == "C2"
        df_enc[!, column_names[3]] = mat[3:end,4]
    elseif circuit == "C3"
        df_enc[!, column_names[4]] = mat[3:end,5]
    else
        df_enc[!, column_names[5]] = mat[3:end,6]
    end
    
    df_ind_col_3[!, column_names[1]] = mat[3:end,1]
    if circuit == "C1"
        df_ind_col_3[!, column_names[6]] = mat[3:end,15]
    elseif circuit == "C2"
        df_ind_col_3[!, column_names[7]] = mat[3:end,16]
    elseif circuit == "C3"
        df_ind_col_3[!, column_names[8]] = mat[3:end,17]
    else
        df_ind_col_3[!, column_names[9]] = mat[3:end,18]
    end

    return df_enc, df_ind_col_3
end


IND_COL_3_and_EncMatToDataFrame (generic function with 1 method)

In [17]:
# Création des enfants
U2 = NAryTreeNode(missing, "U2", Vector{NAryTreeNode}())
U3 = NAryTreeNode(missing, "U3", Vector{NAryTreeNode}())
U5 = NAryTreeNode(missing, "U5", Vector{NAryTreeNode}())
U9 = NAryTreeNode(missing, "U9", Vector{NAryTreeNode}())
U10 = NAryTreeNode(missing, "U10", Vector{NAryTreeNode}())
U18 = NAryTreeNode(missing, "U18", Vector{NAryTreeNode}())
U19 = NAryTreeNode(missing, "U19", Vector{NAryTreeNode}())
U8 = NAryTreeNode(missing, "U8", Vector{NAryTreeNode}())
U13 = NAryTreeNode(missing, "U13", Vector{NAryTreeNode}())

# Création du parent avec les enfants
P1 = NAryTreeNode(missing, "P1", Vector([U2, U3, U5, U9, U10, U18, U19, U8, U13]))

# Mettre à jour les parents des enfants
for child in P1.children
    child.parent = P1
end


In [18]:
# Création des enfants
U1 = NAryTreeNode(missing, "U1", Vector{NAryTreeNode}())
U4 = NAryTreeNode(missing, "U4", Vector{NAryTreeNode}())
U11 = NAryTreeNode(missing, "U11", Vector{NAryTreeNode}())
U12 = NAryTreeNode(missing, "U12", Vector{NAryTreeNode}())
U14 = NAryTreeNode(missing, "U14", Vector{NAryTreeNode}())
U15 = NAryTreeNode(missing, "U15", Vector{NAryTreeNode}())
U16 = NAryTreeNode(missing, "U16", Vector{NAryTreeNode}())
U17 = NAryTreeNode(missing, "U17", Vector{NAryTreeNode}())

# Création du parent avec les enfants
P2 = NAryTreeNode(missing, "P2", Vector([U1, U4, U11, U12, U14, U15, U16, U17]))

# Mettre à jour les parents des enfants
for child in P2.children
    child.parent = P2
end

In [19]:
# Création des enfants
U6 = NAryTreeNode(missing, "U6", Vector{NAryTreeNode}())
U7 = NAryTreeNode(missing, "U7", Vector{NAryTreeNode}())

# Création du parent avec les enfants
P3 = NAryTreeNode(missing, "P3", Vector([U6, U7]))

# Mettre à jour les parents des enfants
for child in P3.children
    child.parent = P3
end

In [20]:
paliers = [P1, P2, P3]

for palier in paliers
    unites = palier.children
    for unite in unites
        sous_unites = Vector(unique(df_Infos_gen[df_Infos_gen.UNITE .== unite.value, :].SOUS_UNITE))
        for sous_unite in sous_unites
            push!(unite.children, NAryTreeNode(unite, sous_unite, Vector{NAryTreeNode}()))
        end
    end
end

In [21]:

# Fonction récursive pour afficher l'arbre à partir des paliers dans un fichier texte
function print_tree_from_paliers(paliers::Vector{NAryTreeNode}, filename::String)
    open(filename, "w") do file
        for palier in paliers
            write(file, "Palier: ", string(palier.value), "\n")
            for unite in palier.children
                write(file, "   |- Unité: ", string(unite.value), "\n")
                for sous_unite in unite.children
                    write(file, "       |- Sous-unité: ", string(sous_unite.value), "\n")
                    for etat in sous_unite.children
                        write(file, "           |- ", string(etat.value), "\n")
                        for circuit in etat.children
                            write(file, "               |- ", string(circuit.value), "\n")
                            for gv in circuit.children
                                if !ismissing(gv.value)
                                    write(file, "                 |- ", string(gv.value.numero), "\n")
                                end
                            end
                        end
                    end
                end
            end
        end
    end
end



print_tree_from_paliers (generic function with 1 method)

In [22]:
for palier in paliers
    unites = palier.children
    for unite in unites
        sous_unites = unite.children
        for sous_unite in sous_unites
            push!(sous_unite.children, NAryTreeNode(sous_unite, "avant_RGV", Vector{NAryTreeNode}()))
            push!(sous_unite.children, NAryTreeNode(sous_unite, "apres_RGV", Vector{NAryTreeNode}()))
        end
    end
end

In [23]:
for palier in paliers
    unites = palier.children
    for unite in unites
        sous_unites = unite.children
        for sous_unite in sous_unites
            data_sous_unite_avant = filter(row -> row.UNITE .== unite.value 
                                    && row.SOUS_UNITE .== sous_unite.value
                                    && row[10] .== "ORIGINE",
                                 df_Infos_gen)
            data_sous_unite_apres = filter(row -> row.UNITE .== unite.value 
                                    && row.SOUS_UNITE .== sous_unite.value
                                    && row[10] .== "Remplacement",
                                    df_Infos_gen)
            circuits_avant = unique(data_sous_unite_avant.CIRCUIT)
            circuits_apres = unique(data_sous_unite_apres.CIRCUIT)
            for etat in sous_unite.children
                if etat.value == "avant_RGV"
                    for circuit in circuits_avant
                        push!(etat.children, NAryTreeNode(etat, circuit, Vector{NAryTreeNode}()))
                    end
                else
                    for circuit in circuits_apres
                        push!(etat.children, NAryTreeNode(etat, circuit, Vector{NAryTreeNode}()))
                    end
                end
            end
        end
    end
end

In [24]:
function max_string(v::Vector{Any})
    # Extract numbers from the strings and find the maximum
    max_str = ""
    max_num = -Inf
    for s in v
        # Use a regular expression to extract the number
        m = match(r"E(\d+)", s)
        if m !== nothing
            num = parse(Int, m.captures[1])
            if num > max_num
                max_num = num
                max_str = s
            end
        end
    end
    return max_str
end


max_string (generic function with 1 method)

In [25]:

# Spécifiez le chemin complet de votre répertoire
directory = string(chemin_données, "Données_EDF_240611/Encrassement/")

# Utilisez readdir() pour obtenir les noms des fichiers et des sous-répertoires
files = readdir(directory)

# Filtrer les fichiers pour ne garder que ceux qui ont l'extension .xlsx
xlsx_files = filter(file -> endswith(file, ".xlsx"), files)
xlsx_files = [file for file in xlsx_files if file != "~\$PERFOS_U2S4_3,48-3,47-3,49.xlsx" && file != "~\$PERFOS_U19S2_2,45-2,43-2,44.xlsx"]

56-element Vector{String}:
 "IND-ENC_U11S1_1,24 puis 4-1,22 puis 3,98-0,84 puis 3,95-1,21 puis 3,94.xlsx"
 "IND-ENC_U11S2_1,37-1,39-1,3-1,38.xlsx"
 "IND-ENC_U12S1_1,82-1,81-1,79-1,8.xlsx"
 "IND-ENC_U12S2_2,02-2-2,14-2,01.xlsx"
 "IND-ENC_U13S1_2,22-2,23-2,21.xlsx"
 "IND-ENC_U13S2_2,4-2,41-2,42.xlsx"
 "IND-ENC_U13S3_9,13-9,12-9,11.xlsx"
 "IND-ENC_U13S4_2,48-2,46-2,47.xlsx"
 "IND-ENC_U13S5_3,86-3,84-3,92.xlsx"
 "IND-ENC_U13S6_3,91-3,9-3,85.xlsx"
 ⋮
 "IND-ENC_U7S2_2,33-2,35-2,34-2,36.xlsx"
 "IND-ENC_U8S1_9,43-9,42-9,41.xlsx"
 "IND-ENC_U8S2_1,04-1,05-1,07.xlsx"
 "IND-ENC_U8S3_1,09-1,08-1,06.xlsx"
 "IND-ENC_U8S4_9,32-9,23-9,33.xlsx"
 "IND-ENC_U9S1_1,9-2,08-1,91.xlsx"
 "IND-ENC_U9S2_2,82-2,83-2,84.xlsx"
 "IND-ENC_U9S3_2,38-2,37-2,39.xlsx"
 "IND-ENC_U9S4_2,87-2,89-2,9.xlsx"

In [26]:
using Printf

function find_in_vector(substring::String, vec::Vector{String})
    # Chercher l'occurrence dans le vecteur de strings
    indices = findall(x -> occursin(substring, x), vec)    
    
    return vec[indices]
end

# Fonction pour convertir un float en string avec une virgule, formaté avec une précision de 2 décimales
function float_to_comma_string(num::Float64)::String
    # Formattage avec 2 décimales et remplacement du point par une virgule
    return replace(@sprintf("%.2f", num), "." => ",")
end


# Trouver les indices où la chaîne apparaît dans le vecteur
substring = float_to_comma_string(0.7)
indices = find_in_vector(substring, xlsx_files)
println("Indices found: ", indices)


Indices found: String

[]


In [27]:
# Exemple de DataFrames avec heures exprimées en entiers
A = DataFrame(HEURES_MAT = [8, 12, 16])
B = DataFrame(HEURES_MAT = [])

# Fonction pour calculer le nombre de nettoyages précédents pour chaque ligne de ind_col_1
function count_previous_cleanings(ind_col_1::DataFrame, nettoyages::DataFrame)::Vector{Int}
    if isempty(nettoyages)
        return zeros(Int8, nrow(ind_col_1))
    end
    counts = Vector{Int}(undef, nrow(ind_col_1))
    for i in 1:nrow(ind_col_1)
        counts[i] = sum(nettoyages.HEURES_MAT .< ind_col_1.HEURES_MAT[i]) + (ind_col_1.APRES_NET[i] ? 1 : 0)
    end
    return counts
end

# Ajouter la nouvelle colonne au DataFrame ind_col_1
A.NETTOYAGES_PRECEDENTS = count_previous_cleanings(A, B)

# Afficher le DataFrame mis à jour
println(A)


[1m3×2 DataFrame[0m
[1m Row [0m│[1m HEURES_MAT [0m[1m NETTOYAGES_PRECEDENTS [0m
     │[90m Int64      [0m[90m Int64                 [0m
─────┼───────────────────────────────────
   1 │          8                      0
   2 │         12                      0
   3 │         16                      0


In [28]:
for palier in paliers
    unites = palier.children
    for unite in unites
        sous_unites = unite.children
        for sous_unite in sous_unites
            data_sous_unite = filter(row -> row.UNITE .== unite.value && row.SOUS_UNITE .== sous_unite.value,
                                 df_Infos_gen)
            dict = Dict{String, DataFrame}()
            dict["avant_RGV"] = data_sous_unite[data_sous_unite[:, 10] .== "ORIGINE", :]
            dict["apres_RGV"] = data_sous_unite[data_sous_unite[:, 10] .== "Remplacement", :]
            for etat in sous_unite.children
                PE_max = max_string(filter(row -> row.UNITE .== unite.value && row.SOUS_UNITE .== sous_unite.value, df1).PE)
                for circuit in etat.children
                    num = unique(dict[etat.value][dict[etat.value].CIRCUIT .== circuit.value, :].NUMERO)
                    if length(num) == 1
                        num = num[1]

                        ref = unique(dict[etat.value][dict[etat.value].CIRCUIT .== circuit.value, :].REFERENCE)[1]

                        reg_ref = Vector([""])

                        numero_suc_ou_pred = missing
                        if etat.value=="avant_RGV" && !isempty(dict["apres_RGV"])
                            numero_suc_ou_pred = unique(dict["apres_RGV"][dict["apres_RGV"].CIRCUIT .== circuit.value, :].NUMERO)[1]
                        end
                        if etat.value=="apres_RGV" && !isempty(dict["avant_RGV"])
                            numero_suc_ou_pred = unique(dict["avant_RGV"][dict["avant_RGV"].CIRCUIT .== circuit.value, :].NUMERO)[1]
                        end

                        nettoyages = DataFrame()
                        ind_col_1 = DataFrame()
                        ind_col_2 = DataFrame()
                        df_enc, df_ind_col_3 = DataFrame(), DataFrame()
                        date_max = 0
                        if !ismissing(num)
                            nettoyages = df_nettoyages[df_nettoyages.NUMERO .== num, :]
                            nettoyages = nettoyages[:, [6, 7]] # 7 pour curatif
                            
                            ind_col_1 = df1[df1.NUMERO .== num, :]
                            ind_col_1 = ind_col_1[ind_col_1.PE .== PE_max, :]
                            select!(ind_col_1, Not([:ACIERISTE, :TUBISTE, :CONSTITUTION]))
                            ind_col_1_C = ind_col_1[ind_col_1.BR .== "C", :]
                            ind_col_1_F = ind_col_1[ind_col_1.BR .== "F", :]
                            if !isempty(ind_col_1_C) && !isempty(ind_col_1_F)
                                date_max = maximum(ind_col_1.HEURES_MAT)

                                # Supprimer la colonne 'B'
                                select!(ind_col_1_C, Not(:BR))
                                select!(ind_col_1_F, Not(:BR))

                                rename!(ind_col_1_C, :VALEUR => :VALEUR_CHAUD)
                                rename!(ind_col_1_F, :VALEUR => :VALEUR_FROID)

                                ind_col_1_C.nb_nettoyages_precedents = count_previous_cleanings(ind_col_1_C, nettoyages)
                                ind_col_1_F.nb_nettoyages_precedents = count_previous_cleanings(ind_col_1_F, nettoyages)

                                
                                ind_col_1 = innerjoin(ind_col_1_C, ind_col_1_F, 
                                on = [:UNITE, :SOUS_UNITE, :CIRCUIT, :NUMERO, :HEURES_MAT, :PE, :REFERENCE,
                                :nb_nettoyages_precedents], 
                                makeunique = true)
                            end
                            
                            ind_col_2 = df2[df2.NUMERO .== num, :]
                            ind_col_2 = ind_col_2[ind_col_2.PE .== PE_max, :]
                            select!(ind_col_2, Not([:ACIERISTE, :TUBISTE, :CONSTITUTION]))
                            ind_col_2_C = ind_col_2[ind_col_2.BR .== "C", :]
                            ind_col_2_F = ind_col_2[ind_col_2.BR .== "F", :]
                            if !isempty(ind_col_2)
                                date_max = max(date_max, maximum(ind_col_2.HEURES_MAT))

                                # Supprimer la colonne 'B'
                                select!(ind_col_2_C, Not(:BR))
                                select!(ind_col_2_F, Not(:BR))

                                rename!(ind_col_2_C, :VALEUR => :VALEUR_CHAUD)
                                rename!(ind_col_2_F, :VALEUR => :VALEUR_FROID)

                                ind_col_2_C.nb_nettoyages_precedents = count_previous_cleanings(ind_col_2_C, nettoyages)
                                ind_col_2_F.nb_nettoyages_precedents = count_previous_cleanings(ind_col_2_F, nettoyages)

                                
                                ind_col_2 = innerjoin(ind_col_2_C, ind_col_2_F, 
                                            on = [:UNITE, :SOUS_UNITE, :CIRCUIT, :NUMERO, :HEURES_MAT, :PE, :REFERENCE,
                                            :nb_nettoyages_precedents], 
                                            makeunique = true)
                            end

                            enc_files = find_in_vector(float_to_comma_string(num), xlsx_files)
                            if !isempty(enc_files)
                                enc_file = enc_files[1]
                                df_enc, df_ind_col_3 = IND_COL_3_and_EncMatToDataFrame(XLSX.readdata(string(directory, "$enc_file"), 
                                "Feuil1", "A1:V1000"), circuit.value)
                                
                                df_enc, df_ind_col_3 = dropmissing(df_enc, :HEURES_MAT), dropmissing(df_ind_col_3, :HEURES_MAT)
                                date_max = max(date_max, maximum(df_enc.HEURES_MAT))
                            end

                        end
                        
                        date_max = Int(floor(date_max))
                        gv = GV(num, palier.value, unite.value, sous_unite.value, circuit.value, ref, reg_ref, 
                        etat.value=="avant_RGV", numero_suc_ou_pred, nettoyages, ind_col_1, PE_max, 
                        ind_col_2, df_ind_col_3, df_enc, date_max)
                        push!(circuit.children, NAryTreeNode(circuit, gv, Vector{NAryTreeNode}()))
                    else
                        num = missing
                        push!(circuit.children, NAryTreeNode(circuit, num, Vector{NAryTreeNode}()))
                    end
                    
                end
            end
        end
    end
end


In [29]:
# Afficher l'arbre à partir des paliers
print_tree_from_paliers(paliers, "arbre.txt")

In [30]:
circuit_shapes = Dict{String, Symbol}()
# coder le numéro de circuit en forme géométrique
circuit_shapes["C1"]= :circle
circuit_shapes["C2"]= :diamond
circuit_shapes["C3"]= :star5
circuit_shapes["C4"]= :rect

circuit_enc_colors = Dict{String, Symbol}()

circuit_enc_colors["C1"]= :pink
circuit_enc_colors["C2"]= :purple
circuit_enc_colors["C3"]= :grey
circuit_enc_colors["C4"]= :magenta


:magenta

In [31]:
function plot_with_condition(p, x, y, z, color)
    for i in 1:length(x) - 1
        if z[i] == z[i + 1] 
            plot!(p, x[i:i + 1], y[i:i + 1], linestyle=:dash,
            color=color, legend = false)
        end
    end
end

plot_with_condition (generic function with 1 method)

In [32]:
function plot!(gv::GV, p_1_2::Plots.Plot{Plots.GRBackend}, p_3::Plots.Plot{Plots.GRBackend}, p_enc::Plots.Plot{Plots.GRBackend})
    
    if !isempty(gv.IND_COL_1)
        # plot!(p_1_2, gv.IND_COL_1.HEURES_MAT, gv.IND_COL_1.VALEUR_CHAUD, linestyle=:dash,
        # color=:yellow, legend = false)
        plot_with_condition(p_1_2, gv.IND_COL_1.HEURES_MAT, gv.IND_COL_1.VALEUR_CHAUD, 
        gv.IND_COL_1.nb_nettoyages_precedents, :yellow)
        scatter!(p_1_2, gv.IND_COL_1.HEURES_MAT, gv.IND_COL_1.VALEUR_CHAUD, alpha=.75, 
        label = false, markershape=circuit_shapes[gv.circuit], markercolor=:yellow, 
        legend = false, markerstrokewidth = 0.5)
        
        # plot!(p_1_2, gv.IND_COL_1.HEURES_MAT, gv.IND_COL_1.VALEUR_FROID, linestyle=:dash,
        # color=:green, legend = false)
        plot_with_condition(p_1_2, gv.IND_COL_1.HEURES_MAT, gv.IND_COL_1.VALEUR_FROID, 
        gv.IND_COL_1.nb_nettoyages_precedents, :green)
        scatter!(p_1_2, gv.IND_COL_1.HEURES_MAT, gv.IND_COL_1.VALEUR_FROID, alpha=.75, 
        label = false, markershape=circuit_shapes[gv.circuit], markercolor=:green, 
        legend = false, markerstrokewidth = 0.5)
    end
    
    if !isempty(gv.IND_COL_2)
        # plot!(p_1_2, gv.IND_COL_2.HEURES_MAT, gv.IND_COL_2.VALEUR_CHAUD, linestyle=:dash,
        # color=:red, legend = false)
        plot_with_condition(p_1_2, gv.IND_COL_2.HEURES_MAT, gv.IND_COL_2.VALEUR_CHAUD, 
        gv.IND_COL_2.nb_nettoyages_precedents, :red)
        scatter!(p_1_2, gv.IND_COL_2.HEURES_MAT, gv.IND_COL_2.VALEUR_CHAUD, 
        alpha=.75, label = false, markershape=circuit_shapes[gv.circuit], markercolor=:red, 
        legend = false, markerstrokewidth = 0.5)

        # plot!(p_1_2, gv.IND_COL_2.HEURES_MAT, gv.IND_COL_2.VALEUR_FROID, linestyle=:dash,
        # color=:blue)
        plot_with_condition(p_1_2, gv.IND_COL_2.HEURES_MAT, gv.IND_COL_2.VALEUR_FROID, 
        gv.IND_COL_2.nb_nettoyages_precedents, :blue)
        scatter!(p_1_2, gv.IND_COL_2.HEURES_MAT, gv.IND_COL_2.VALEUR_FROID, alpha=.75, 
        label = false, markershape=circuit_shapes[gv.circuit], markercolor=:blue, 
        legend = false, markerstrokewidth = 0.5)
    end

    if !isempty(gv.IND_COL_3)
        plot!(p_3, gv.IND_COL_3.HEURES_MAT, gv.IND_COL_3[:, 2],
        color= circuit_enc_colors[gv.circuit], label = false, linestyle=:dash)
        scatter!(p_3, gv.IND_COL_3.HEURES_MAT, gv.IND_COL_3[:, 2], 
        alpha=.75, label = gv.circuit, markershape=circuit_shapes[gv.circuit],
        markercolor=circuit_enc_colors[gv.circuit], markerstrokewidth = 0.5)
    end

    if !isempty(gv.IND_ENC)
        plot!(p_enc, gv.IND_ENC.HEURES_MAT, gv.IND_ENC[:, 2],
        color= circuit_enc_colors[gv.circuit], label = false, linestyle=:dash)
        scatter!(p_enc, gv.IND_ENC.HEURES_MAT, gv.IND_ENC[:, 2], 
        alpha=.75, label = gv.circuit, markershape=circuit_shapes[gv.circuit], 
        markercolor=circuit_enc_colors[gv.circuit], markerstrokewidth = 0.5)
    end
    
    if !isempty(gv.maintenances)
        for i in 1:length(gv.maintenances.HEURES_MAT)
            linestyle = gv.maintenances[i, 2] == 1 ? :solid : :dash
            vline!(p_1_2, [gv.maintenances[i, 1]], color=:black, linestyle=linestyle, label=false)
            vline!(p_3, [gv.maintenances[i, 1]], color=:black, linestyle=linestyle, label=false)
            vline!(p_enc, [gv.maintenances[i, 1]], color=:black, linestyle=linestyle, label=false)
        end
    end

    xlims!(p_1_2, 0, gv.date_max*11/10)
    xlims!(p_3, 0, gv.date_max*11/10)
    xlims!(p_enc, 0, gv.date_max*11/10)
end

plot! (generic function with 5 methods)

In [33]:
function plot!(sous_unite::NAryTreeNode)
    p_1_2_avant = plot(
        ylabel="IND-COL",  
        titlefont=10
        )
    p_3_avant = plot(
        ylabel="IND-COL-3",  
        titlefont=10
        )
    p_enc_avant = plot(
        xlabel="HEURES_MAT", 
        ylabel="IND-ENC",  
        titlefont=10
        )
    p_1_2_apres = plot(
        ylabel="IND-COL",  
        titlefont=10
        )
    p_3_apres = plot(
        ylabel="IND-COL-3",  
        titlefont=10
        )
    p_enc_apres = plot(
        xlabel="HEURES_MAT", 
        ylabel="IND-ENC",  
        titlefont=10,
        )
    titre_avant = ""
    titre_apres = ""
    for etat in sous_unite.children
        for circuit in etat.children
            if !isempty(circuit.children)
                gv = first(circuit.children).value
                numero = gv.numero
                circuit = gv.circuit
                PE = gv.PE_max_IND_COL_1
                etat.value=="avant_RGV" ? plot!(gv, p_1_2_avant, p_3_avant, p_enc_avant) : plot!(gv, p_1_2_apres, p_3_apres, p_enc_apres)
                etat.value=="avant_RGV" ? titre_avant = string(titre_avant, "$circuit-$numero-$PE ") : titre_apres = string(titre_apres, "$circuit-$numero-$PE ")
            end
        end
    end
    unite = sous_unite.parent.value
    sous_unite = sous_unite.value
    titre_avant = string("$unite-$sous_unite ", titre_avant)
    titre_apres = string("$unite-$sous_unite ", titre_apres)

    title!(p_1_2_avant, titre_avant)
    title!(p_1_2_apres, titre_apres)
    title!(p_3_avant, titre_avant)
    title!(p_enc_avant, titre_avant)
    title!(p_3_apres, titre_apres)
    title!(p_enc_apres, titre_apres)

    p = plot(p_1_2_avant, p_1_2_apres, p_3_avant, p_3_apres, p_enc_avant, p_enc_apres, layout=(3, 2), size=(1800, 1200))

    savefig(p, "combined_plot.png")
end

plot! (generic function with 6 methods)

In [34]:
for palier in paliers
    for unite in palier.children
        if unite.value == "U3"
            for sous_unite in unite.children
                if sous_unite.value == "S5"
                    display(unite.value)
                    display(sous_unite.value)
                    plot!(sous_unite)
                end
            end
        end

    end
end

"U3"

"S5"