In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import math
import re
import ast

In [73]:
aero = pd.read_csv('aeronefs.csv')
compo = pd.read_csv('composants.csv')
deg = pd.read_csv('degradations_2024-02-05_FULL.csv')
vol = pd.read_csv('logs_vols_2024-02-05_FULL.csv')

In [74]:
# passage au type datetime des df
aero['debut_service'] = pd.to_datetime(aero['debut_service'])
aero['last_maint'] = pd.to_datetime(aero['last_maint'])
aero['end_maint'] = pd.to_datetime(aero['end_maint'],errors='coerce')
aero['end_maint'] = aero['end_maint'].dt.strftime('%Y-%m-%d')
aero['end_maint'] = pd.to_datetime(aero['end_maint'],errors='coerce')
deg['measure_day'] = pd.to_datetime(deg['measure_day'])
vol['jour_vol'] = pd.to_datetime(vol['jour_vol'])

# modification des noms de colonne servant de clés, la base gardé étant le nom du csv 'aeronefs' à savoir 'ref_aero'
colonne_ref = 'ref_aero'
vol = vol.rename(columns={'aero_linked':colonne_ref})
deg = deg.rename(columns={'linked_aero':colonne_ref})
compo = compo.rename(columns={'aero':colonne_ref})

# modif noms de colonne des composants
deg = deg.rename(columns={'compo_concerned':'ref_compo'})

# application d'un arrondi sur les floats 
compo['taux_usure_actuel'] = compo['taux_usure_actuel'].apply(lambda x: math.ceil(x * 100) / 100)
deg['usure_nouvelle'] = deg['usure_nouvelle'].apply(lambda x: math.ceil(x * 100) / 100)

# !!! Peut être temporaire !!!
# drop de l'avion B737_4325 qui est en double dans la table aeronef et donc génère des doublons dans les autres tables.
# la correction est possible mais on ne peut pas l'automatisé car il n'exitse pas de colonne dernier vol dans les df que l'on récupère
# Liste des DataFrames
list_of_dataframes = [aero, compo, deg, vol]

# Boucle pour itérer sur chaque DataFrame
for df in list_of_dataframes:
    
    indices_a_supprimer = df[df['ref_aero'] == 'B737_4325'].index
    
    df.drop(indices_a_supprimer, inplace=True)
# !!! Peut être temporaire !!!

In [75]:
compo = compo.drop(columns=['desc'],axis=1)
deg = deg[['ref_aero','ref_compo','usure_nouvelle','measure_day']]
vol = vol[['ref_aero','jour_vol','time_en_air','etat_voyant']]

In [76]:
# création du df maitre
maitre = aero.merge(compo, on='ref_aero', how ='left')

In [77]:
# travail sur le df degradation pour préparer la création des colonnes
pivot_deg = deg.pivot_table(
    values='usure_nouvelle',
    index=['ref_aero','ref_compo'],
    columns=['measure_day'],
    aggfunc='first'
).reset_index()

# fonction pour créer les colonnes de mesure/jour et classement
def remove_time_from_date(date_str):
    return re.sub(r'\s00:00:00$', '', str(date_str))

pivot_deg.columns = pivot_deg.columns[:2].tolist() + ['usure_' + remove_time_from_date(col) if col != 'mesure_day' else str(col) for col in pivot_deg.columns[2:]]


In [78]:
# Récupérer le nom des colonnes
colonnes = pivot_deg.columns

for t in range (33):
    # Parcourir les colonnes en commençant par la dernière jusqu'à la quatrième
    for i in range(len(colonnes) - 1, 3, -1):
        nom_colonne = colonnes[i]
        # Vérifier si la colonne contient des valeurs NaN
        if pivot_deg[nom_colonne].isnull().any():
            # Trouver la prochaine colonne non-NaN
            j = i - 1
            while j >= 0 and pivot_deg[colonnes[j]].isnull().all():
                j -= 1
            if j >= 0:
                # Remplacer les NaN par les valeurs de la prochaine colonne non-NaN
                next_non_nan_column = pivot_deg[colonnes[j]]
                pivot_deg.loc[pivot_deg[nom_colonne].isnull(), nom_colonne] = next_non_nan_column

In [79]:
for t in range (35):
# Parcourir les colonnes en commençant par la quatrième jusqu'à la dernière
    for i in range(2, len(colonnes)):
        nom_colonne = colonnes[i]
    # Vérifier si la colonne contient des valeurs NaN
        if pivot_deg[nom_colonne].isnull().any():
        # Trouver la prochaine colonne non-NaN
            j = i + 1
            while j < len(colonnes) and pivot_deg[colonnes[j]].isnull().all():
                j += 1
            if j < len(colonnes):
            # Remplacer les NaN par les valeurs de la prochaine colonne non-NaN
                next_non_nan_column = pivot_deg[colonnes[j]]
                pivot_deg.loc[pivot_deg[nom_colonne].isnull(), nom_colonne] = next_non_nan_column

In [80]:
# Sélectionner uniquement les colonnes de mesure, en excluant les colonnes 'ref_aero' et 'ref_compo'
mesure_columns = [col for col in pivot_deg.columns if col.startswith('usure')]

# Créer un DataFrame pour stocker les colonnes d'évolution
evolution_df = pd.DataFrame()


for i in range(1, len(mesure_columns)):
    # Nom de la nouvelle colonne d'évolution
    evolution_col_name = f"evolution_{mesure_columns[i]}"
    
    # Calculer la différence entre chaque paire de colonnes adjacentes, en ignorant les valeurs nulles
    evolution_df[evolution_col_name] = pivot_deg[mesure_columns[i]] - pivot_deg[mesure_columns[i-1]]

    # Remplacer les valeurs où la mesure précédente ou la mesure actuelle est nulle par NaN
    evolution_df.loc[pivot_deg[mesure_columns[i]] == 0, evolution_col_name] = pd.NA
    evolution_df.loc[pivot_deg[mesure_columns[i-1]] == 0, evolution_col_name] = pd.NA

# Concaténer le DataFrame d'évolution avec le DataFrame original
pivot_deg = pd.concat([pivot_deg, evolution_df], axis=1)


In [81]:
# Réorganiser les colonnes par date croissante
colonnes_triees = sorted(pivot_deg.columns[2:], key=lambda x: pd.to_datetime(x.split('_')[-1]))

# Créer une liste des colonnes dans l'ordre souhaité
colonnes_ordre = pivot_deg.columns[:2].tolist() + colonnes_triees

# Réorganiser les colonnes dans le DataFrame
pivot_deg = pivot_deg[colonnes_ordre]


In [82]:
pivot_deg

Unnamed: 0,ref_aero,ref_compo,usure_2024-01-04,usure_2024-01-05,evolution_usure_2024-01-05,usure_2024-01-06,evolution_usure_2024-01-06,usure_2024-01-07,evolution_usure_2024-01-07,usure_2024-01-08,...,usure_2024-02-01,evolution_usure_2024-02-01,usure_2024-02-02,evolution_usure_2024-02-02,usure_2024-02-03,evolution_usure_2024-02-03,usure_2024-02-04,evolution_usure_2024-02-04,usure_2024-02-05,evolution_usure_2024-02-05
0,A320_0691,AILA320-A320_0691-20,45.99,45.99,0.0,45.99,0.0,45.99,0.0,45.99,...,51.20,0.64,51.34,0.14,51.34,0.00,51.57,0.23,52.08,0.51
1,A320_0691,AUTA320-A320_0691-4,41.03,41.03,0.0,41.03,0.0,41.03,0.0,41.03,...,46.56,0.68,46.71,0.15,46.71,0.00,46.96,0.25,47.50,0.54
2,A320_0691,DETA320-A320_0691-37,5.47,5.47,0.0,5.47,0.0,5.47,0.0,5.47,...,12.18,0.82,12.36,0.18,12.36,0.00,12.66,0.30,13.32,0.66
3,A320_0691,ECLA320-A320_0691-30,76.27,76.27,0.0,76.27,0.0,76.27,0.0,76.27,...,1.45,0.64,1.60,0.15,1.60,0.00,1.83,0.23,2.35,0.52
4,A320_0691,ECLA320-A320_0691-38,35.17,35.17,0.0,35.17,0.0,35.17,0.0,35.17,...,40.86,0.70,41.02,0.16,41.02,0.00,41.27,0.25,41.83,0.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10345,E175_6334,SYSE175-E175_6334-9,72.52,72.52,0.0,72.52,0.0,72.52,0.0,72.52,...,75.11,0.00,75.58,0.47,75.92,0.34,75.92,0.00,75.92,0.00
10346,E175_6334,TOIE175-E175_6334-35,51.36,51.36,0.0,51.36,0.0,51.36,0.0,51.36,...,54.63,0.00,55.22,0.59,55.64,0.42,55.64,0.00,55.64,0.00
10347,E175_6334,TRAE175-E175_6334-17,79.07,79.07,0.0,79.07,0.0,79.07,0.0,79.07,...,1.27,0.00,1.82,0.55,2.21,0.39,2.21,0.00,2.21,0.00
10348,E175_6334,TRAE175-E175_6334-18,41.57,41.57,0.0,41.57,0.0,41.57,0.0,41.57,...,44.62,0.00,45.17,0.55,45.56,0.39,45.56,0.00,45.56,0.00


In [83]:
# merge sur le maitre
maitre = maitre.merge(pivot_deg, on=['ref_aero', 'ref_compo'], how='left')

In [84]:
def create_pivot_column(df, variable):
    pivot_result = df.pivot_table(
        index='ref_aero',
        columns=['jour_vol'],
        values=[variable],
        aggfunc='sum',
        fill_value=0
    ).reset_index()

    pivot_result.columns = ['{}_{}'.format(variable, re.sub(r'^{}_'.format(variable), '', str(col[1])).split()[0]) if col[1] else col[0] for col in pivot_result.columns]
    pivot_result = pivot_result.rename(columns={'{}_NaT'.format(variable): 'ref_aero'})

    return pivot_result

# Appliquer la fonction à chaque variable
variables = ['etat_voyant','time_en_air']

for variable in variables:
    pivot_result = create_pivot_column(vol, variable)
    vol = pd.merge(vol, pivot_result, on='ref_aero', how='left')
    vol = vol.drop([variable], axis=1)

In [85]:
vol = vol.drop('jour_vol',axis=1)

In [86]:
# merge sur le maitre
maitre = maitre.merge(vol, on=['ref_aero'], how='left')

In [87]:
maitre = maitre.drop_duplicates(subset='ref_compo')

In [88]:
maitre = maitre.drop("taux_usure_actuel",axis=1)

In [89]:
maitre = maitre .drop(['usure_2024-01-04',
'usure_2024-01-05',
'evolution_usure_2024-01-05',
'usure_2024-01-06',
'evolution_usure_2024-01-06',
'usure_2024-01-07',
'evolution_usure_2024-01-07',
'usure_2024-01-08',
'evolution_usure_2024-01-08',
'usure_2024-01-09',
'evolution_usure_2024-01-09',
'usure_2024-01-10',
'evolution_usure_2024-01-10'], axis = 1)

In [90]:
maitre.columns

Index(['ref_aero', 'type_model', 'debut_service', 'last_maint',
       'en_maintenance', 'end_maint', 'ref_compo', 'categorie', 'lifespan',
       'cout',
       ...
       'time_en_air_2024-01-22', 'time_en_air_2024-01-23',
       'time_en_air_2024-01-29', 'time_en_air_2024-01-30',
       'time_en_air_2024-01-31', 'time_en_air_2024-02-01',
       'time_en_air_2024-02-02', 'time_en_air_2024-02-03',
       'time_en_air_2024-02-04', 'time_en_air_2024-02-05'],
      dtype='object', length=104)

In [91]:
from datetime import datetime
# Calculer la date actuelle
date_actuelle = datetime.now()

# Calculer l'âge de l'avion en soustrayant la date de début de service de la date actuelle
maitre['age_avion'] = (date_actuelle - maitre['debut_service']).dt.days  # Âge en jours


In [92]:
for i in maitre.columns:
    print(i)

ref_aero
type_model
debut_service
last_maint
en_maintenance
end_maint
ref_compo
categorie
lifespan
cout
usure_2024-01-11
evolution_usure_2024-01-11
usure_2024-01-12
evolution_usure_2024-01-12
usure_2024-01-13
evolution_usure_2024-01-13
usure_2024-01-14
evolution_usure_2024-01-14
usure_2024-01-15
evolution_usure_2024-01-15
usure_2024-01-16
evolution_usure_2024-01-16
usure_2024-01-17
evolution_usure_2024-01-17
usure_2024-01-18
evolution_usure_2024-01-18
usure_2024-01-19
evolution_usure_2024-01-19
usure_2024-01-20
evolution_usure_2024-01-20
usure_2024-01-21
evolution_usure_2024-01-21
usure_2024-01-22
evolution_usure_2024-01-22
usure_2024-01-23
evolution_usure_2024-01-23
usure_2024-01-24
evolution_usure_2024-01-24
usure_2024-01-25
evolution_usure_2024-01-25
usure_2024-01-26
evolution_usure_2024-01-26
usure_2024-01-27
evolution_usure_2024-01-27
usure_2024-01-28
evolution_usure_2024-01-28
usure_2024-01-29
evolution_usure_2024-01-29
usure_2024-01-30
evolution_usure_2024-01-30
usure_2024-01-31

In [93]:
# Liste des colonnes de time_en_air_ à parcourir
colonnes_time_en_air = [colonne for colonne in maitre.columns if colonne.startswith("time_en_air_")]

# Parcours des colonnes de time_en_air_
for colonne_time_en_air in colonnes_time_en_air:
    # Extraire la date de la colonne time_en_air_
    date = colonne_time_en_air.split("_")[-1]
    
    # Vérifier si la valeur de time_en_air_ pour cette date est > 0
    if maitre[colonne_time_en_air].iloc[0] > 0:
        # Calculer le taux d'usure
        colonne_evolution_usure = f"evolution_usure_{date}"
        taux_usure = maitre[colonne_evolution_usure] / maitre[colonne_time_en_air]
        
        # Créer une nouvelle colonne taux_usure_
        colonne_taux_usure = f"taux_usure_{date}"
        maitre[colonne_taux_usure] = taux_usure


In [159]:
# travail sur le maitre pour organiser les colonnes
columns = maitre.columns

# Extraire les dates de chaque colonne et créer une liste de tuples (date, colonne)
date_column_tuples = [(re.search(r'\d{4}-\d{2}-\d{2}', col).group(), col) for col in columns[8:] if re.search(r'\d{4}-\d{2}-\d{2}', col)]

# Trier la liste de tuples par date
sorted_date_column_tuples = sorted(date_column_tuples, key=lambda x: x[0])

# Extraire la liste triée des colonnes
sorted_columns = [col[1] for col in sorted_date_column_tuples]

# Réorganiser les colonnes selon la spécification demandée
desired_order = ['type_model', 'ref_aero', 'debut_service','age_avion', 'last_maint', 'end_maint', 'ref_compo', 'categorie','cout', 'lifespan']
sorted_columns = desired_order + sorted_columns

# Créer un DataFrame avec les colonnes triées
maitre = maitre[sorted_columns]

In [146]:
df_ml_1 =  maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-11','evolution_usure_2024-01-11','etat_voyant_2024-01-11','time_en_air_2024-01-11','taux_usure_2024-01-11']]

df_ml_2 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-12','evolution_usure_2024-01-12','etat_voyant_2024-01-12','time_en_air_2024-01-12','taux_usure_2024-01-12']]

df_ml_3 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-13','evolution_usure_2024-01-13','etat_voyant_2024-01-13','time_en_air_2024-01-13','taux_usure_2024-01-13']]

df_ml_4 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-14','evolution_usure_2024-01-14','etat_voyant_2024-01-14','time_en_air_2024-01-14','taux_usure_2024-01-14']]

df_ml_5 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-15','evolution_usure_2024-01-15','etat_voyant_2024-01-15','time_en_air_2024-01-15','taux_usure_2024-01-15']]

df_ml_6 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-16','evolution_usure_2024-01-16','etat_voyant_2024-01-16','time_en_air_2024-01-16','taux_usure_2024-01-16']]

df_ml_7 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-17','evolution_usure_2024-01-17','etat_voyant_2024-01-17','time_en_air_2024-01-17','taux_usure_2024-01-17']]

df_ml_8 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-18','evolution_usure_2024-01-18','etat_voyant_2024-01-18','time_en_air_2024-01-18']]

df_ml_9 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-19','evolution_usure_2024-01-19','etat_voyant_2024-01-19','time_en_air_2024-01-19']]

df_ml_10 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-20','evolution_usure_2024-01-20','etat_voyant_2024-01-20','time_en_air_2024-01-20']]

df_ml_11 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-21','evolution_usure_2024-01-21','etat_voyant_2024-01-21','time_en_air_2024-01-21']]

df_ml_12 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-22','evolution_usure_2024-01-22','etat_voyant_2024-01-22','time_en_air_2024-01-22']]

df_ml_13 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-23','evolution_usure_2024-01-23','etat_voyant_2024-01-23','time_en_air_2024-01-23']]

df_ml_14 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-29','evolution_usure_2024-01-29','etat_voyant_2024-01-29','time_en_air_2024-01-29']]

df_ml_15 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-30','evolution_usure_2024-01-30','etat_voyant_2024-01-30','time_en_air_2024-01-30','taux_usure_2024-01-30']]

df_ml_16 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-01-31','evolution_usure_2024-01-31','etat_voyant_2024-01-31','time_en_air_2024-01-31','taux_usure_2024-01-31']]

df_ml_17 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-02-01','evolution_usure_2024-02-01','etat_voyant_2024-02-01','time_en_air_2024-02-01','taux_usure_2024-02-01']]

df_ml_18= maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-02-02','evolution_usure_2024-02-02','etat_voyant_2024-02-02','time_en_air_2024-02-02','taux_usure_2024-02-02']]

df_ml_19= maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-02-03','evolution_usure_2024-02-03','etat_voyant_2024-02-03','time_en_air_2024-02-03','taux_usure_2024-02-03']]

df_ml_20 = maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-02-04','evolution_usure_2024-02-04','etat_voyant_2024-02-04','time_en_air_2024-02-04']]

df_ml_21= maitre[['type_model','ref_aero','age_avion','ref_compo','categorie','cout','lifespan',
'usure_2024-02-05','evolution_usure_2024-02-05','etat_voyant_2024-02-05','time_en_air_2024-02-05']]

In [147]:
df_ml_list = [df_ml_1,df_ml_2,df_ml_3,df_ml_4,df_ml_5,df_ml_6,df_ml_7,df_ml_8,df_ml_9,df_ml_10,df_ml_11,df_ml_12,
              df_ml_13,df_ml_14,df_ml_15,df_ml_16,df_ml_17,df_ml_18,df_ml_19,df_ml_20,df_ml_21]
column_names = ['time_en_air_2024-01-11', 'time_en_air_2024-01-12', 'time_en_air_2024-01-13',
                'time_en_air_2024-01-14', 'time_en_air_2024-01-15', 'time_en_air_2024-01-16',
                'time_en_air_2024-01-17', 'time_en_air_2024-01-18', 'time_en_air_2024-01-19',
                'time_en_air_2024-01-20', 'time_en_air_2024-01-21', 'time_en_air_2024-01-22',
                'time_en_air_2024-01-23', 'time_en_air_2024-01-29', 'time_en_air_2024-01-30',
                'time_en_air_2024-01-31', 'time_en_air_2024-02-01', 'time_en_air_2024-02-02',
                'time_en_air_2024-02-03', 'time_en_air_2024-02-04', 'time_en_air_2024-02-05']

for df_ml in df_ml_list:
    for column_name in column_names:
        if column_name in df_ml.columns:
            df_ml.drop(df_ml[df_ml[column_name] == 0].index, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml.drop(df_ml[df_ml[column_name] == 0].index, inplace=True)


In [148]:
df_ml_8['taux_usure_2024-01-18']=df_ml_8['evolution_usure_2024-01-18']/ df_ml_8['time_en_air_2024-01-18']
df_ml_9['taux_usure_2024-01-19']=df_ml_9['evolution_usure_2024-01-19']/ df_ml_9['time_en_air_2024-01-19']
df_ml_10['taux_usure_2024-01-20']=df_ml_10['evolution_usure_2024-01-20']/ df_ml_10['time_en_air_2024-01-20']
df_ml_11['taux_usure_2024-01-21']=df_ml_11['evolution_usure_2024-01-21']/ df_ml_11['time_en_air_2024-01-21']
df_ml_12['taux_usure_2024-01-22']=df_ml_12['evolution_usure_2024-01-22']/ df_ml_12['time_en_air_2024-01-22']
df_ml_13['taux_usure_2024-01-23']=df_ml_13['evolution_usure_2024-01-23']/df_ml_13['time_en_air_2024-01-23']
df_ml_14['taux_usure_2024-01-29']=df_ml_14['evolution_usure_2024-01-29']/ df_ml_14['time_en_air_2024-01-29']
df_ml_20['taux_usure_2024-02-04']=df_ml_20['evolution_usure_2024-02-04']/ df_ml_20['time_en_air_2024-02-04']
df_ml_21['taux_usure_2024-02-05']=df_ml_21['evolution_usure_2024-02-05']/ df_ml_21['time_en_air_2024-02-05']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_8['taux_usure_2024-01-18']=df_ml_8['evolution_usure_2024-01-18']/ df_ml_8['time_en_air_2024-01-18']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_9['taux_usure_2024-01-19']=df_ml_9['evolution_usure_2024-01-19']/ df_ml_9['time_en_air_2024-01-19']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-

In [25]:
# liste des colonnes servant de base de travail
#colonnes_time_en_air = maitre.filter(like='time_en_air_').columns.tolist()

# Afficher la liste des colonnes
#print(colonnes_time_en_air)

['time_en_air_2024-01-11', 'time_en_air_2024-01-12', 'time_en_air_2024-01-13', 'time_en_air_2024-01-14', 'time_en_air_2024-01-15', 'time_en_air_2024-01-16', 'time_en_air_2024-01-17', 'time_en_air_2024-01-18', 'time_en_air_2024-01-19', 'time_en_air_2024-01-20', 'time_en_air_2024-01-21', 'time_en_air_2024-01-22', 'time_en_air_2024-01-23', 'time_en_air_2024-01-29', 'time_en_air_2024-01-30', 'time_en_air_2024-01-31', 'time_en_air_2024-02-01', 'time_en_air_2024-02-02', 'time_en_air_2024-02-03', 'time_en_air_2024-02-04', 'time_en_air_2024-02-05']


In [26]:
# Parcours des colonnes time_en_air_
#for colonne_time_en_air in colonnes_time_en_air:
    # Extraire la date de la colonne time_en_air_
    #date = colonne_time_en_air.split("_")[-1]
    
    # Sélectionner toutes les colonnes de maitre jusqu'à la date de time_en_air inclus
    #colonnes_jusqu_a_date = maitre.columns[:maitre.columns.get_loc(colonne_time_en_air) + 1]
    
    # Créer un DataFrame contenant les colonnes sélectionnées
    #df_ml = maitre[colonnes_jusqu_a_date]
    
    # Renommer la colonne précédente pour inclure la date suivante
    #avant_dernier_colonne = df_ml.columns[-2]
    #nouvelle_avant_derniere_colonne = f'taux_usure_{date}'
    #df_ml = df_ml.rename(columns={avant_dernier_colonne: nouvelle_avant_derniere_colonne})
    
    # Afficher les informations sur le DataFrame pour vérification
    #print(df_ml.info())
    
    # Ajouter le DataFrame à l'environnement local avec le nom spécifié
    #globals()[f'df_ml_{date}'] = df_ml

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10350 entries, 0 to 79868
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   type_model                  10350 non-null  object        
 1   ref_aero                    10350 non-null  object        
 2   debut_service               10350 non-null  datetime64[ns]
 3   age_avion                   10350 non-null  int64         
 4   last_maint                  10350 non-null  datetime64[ns]
 5   end_maint                   1755 non-null   datetime64[ns]
 6   ref_compo                   10350 non-null  object        
 7   categorie                   10350 non-null  object        
 8   cout                        10350 non-null  int64         
 9   lifespan                    10350 non-null  int64         
 10  usure_2024-01-11            10350 non-null  float64       
 11  evolution_usure_2024-01-11  10350 non-null  float64   

In [28]:
# Récupérer toutes les variables globales
#variables_globales = globals()

# Filtrer les variables pour trouver celles qui commencent par "df_ml_"
#df_variables = [nom_variable for nom_variable in variables_globales.keys() if nom_variable.startswith("df_ml_2024")]

# Afficher les noms des DataFrames df_ml créés
#for nom_variable in df_variables:
    #print(nom_variable)

df_ml_2024-01-11
df_ml_2024-01-12
df_ml_2024-01-13
df_ml_2024-01-14
df_ml_2024-01-15
df_ml_2024-01-16
df_ml_2024-01-17
df_ml_2024-01-18
df_ml_2024-01-19
df_ml_2024-01-20
df_ml_2024-01-21
df_ml_2024-01-22
df_ml_2024-01-23
df_ml_2024-01-29
df_ml_2024-01-30
df_ml_2024-01-31
df_ml_2024-02-01
df_ml_2024-02-02
df_ml_2024-02-03
df_ml_2024-02-04
df_ml_2024-02-05


In [40]:
#df_ml.filter(items=cols)

Unnamed: 0,type_model,ref_aero,debut_service,age_avion,last_maint,end_maint,ref_compo,categorie,cout,lifespan,usure_2024-02-05,evolution_usure_2024-02-05,time_en_air_2024-02-05,taux_usure_2024-02-05
855,B777,B777_1214,2003-10-21,7414,2023-11-06,2023-11-13 00:00:00,REAB777-B777_1214-0,Composants Critiques,19845,11950,61.89,0.08,0.7,0
866,B777,B777_1214,2003-10-21,7414,2023-11-06,2023-11-13 00:00:00,REAB777-B777_1214-1,Composants Critiques,17416,14014,5.62,0.09,0.7,0
877,B777,B777_1214,2003-10-21,7414,2023-11-06,2023-11-13 00:00:00,SYSB777-B777_1214-2,Composants Critiques,16184,12519,60.75,0.08,0.7,0
888,B777,B777_1214,2003-10-21,7414,2023-11-06,2023-11-13 00:00:00,ORDB777-B777_1214-3,Composants Critiques,18574,11612,53.67,0.08,0.7,0
899,B777,B777_1214,2003-10-21,7414,2023-11-06,2023-11-13 00:00:00,AUTB777-B777_1214-4,Composants Critiques,18769,10655,79.30,0.07,0.7,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79840,B767,B767_6363,2016-07-06,2772,2023-10-07,0,SYSB767-B767_6363-40,Composants Secondaires,4344,12602,48.86,1.07,8.5,0
79847,B767,B767_6363,2016-07-06,2772,2023-10-07,0,PORB767-B767_6363-41,Composants Secondaires,2970,11227,39.82,0.95,8.5,0
79854,B767,B767_6363,2016-07-06,2772,2023-10-07,0,HUBB767-B767_6363-42,Composants Secondaires,2567,14930,44.29,1.27,8.5,0
79861,B767,B767_6363,2016-07-06,2772,2023-10-07,0,ECLB767-B767_6363-43,Composants Secondaires,4034,11837,49.90,1.01,8.5,0


In [41]:
# Parcourir chaque DataFrame df_ml_
#for nom_variable in df_variables:
    # Récupérer la date à partir du nom du DataFrame
    #date = nom_variable.split("_")[-1]
    
    # Récupérer le DataFrame correspondant
    #df_ml = variables_globales[nom_variable]
    
    # Sélectionner les colonnes spécifiées pour chaque DataFrame, y compris etat_voyant
    #cols = ['type_model', 'ref_aero', 'debut_service', 'age_avion', 'last_maint', 'end_maint', 'ref_compo', 'categorie', 'cout', 'lifespan', f'usure_{date}', f'evolution_usure_{date}', f'etat_voyant_{date}', f'time_en_air_{date}', f'taux_usure_{date}']
  
    #df_ml = df_ml.filter(items=cols)
  
    # Supprimer les lignes où la valeur de la colonne time_en_air_ pour la date est égale à zéro
    #df_ml = df_ml[df_ml[f"time_en_air_{date}"] != 0]
    
    # Mettre à jour le DataFrame dans l'environnement global
    #variables_globales[nom_variable] = df_ml

    # Imprimer les informations .info() pour le DataFrame mis à jour
    #print(f"Informations pour le DataFrame {nom_variable}:")
    #print(df_ml.info())
    #print("\n")

Informations pour le DataFrame df_ml_2024-01-11:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3960 entries, 0 to 78518
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   type_model                  3960 non-null   object        
 1   ref_aero                    3960 non-null   object        
 2   debut_service               3960 non-null   datetime64[ns]
 3   age_avion                   3960 non-null   int64         
 4   last_maint                  3960 non-null   datetime64[ns]
 5   end_maint                   3960 non-null   object        
 6   ref_compo                   3960 non-null   object        
 7   categorie                   3960 non-null   object        
 8   cout                        3960 non-null   int64         
 9   lifespan                    3960 non-null   int64         
 10  usure_2024-01-11            3960 non-null   float64       
 11  evolut

In [42]:
#print(globals()['df_ml_2024-01-11'].info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3960 entries, 0 to 78518
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   type_model                  3960 non-null   object        
 1   ref_aero                    3960 non-null   object        
 2   debut_service               3960 non-null   datetime64[ns]
 3   age_avion                   3960 non-null   int64         
 4   last_maint                  3960 non-null   datetime64[ns]
 5   end_maint                   3960 non-null   object        
 6   ref_compo                   3960 non-null   object        
 7   categorie                   3960 non-null   object        
 8   cout                        3960 non-null   int64         
 9   lifespan                    3960 non-null   int64         
 10  usure_2024-01-11            3960 non-null   float64       
 11  evolution_usure_2024-01-11  3960 non-null   float64    

In [149]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [43]:
df = maitre[['type_model',
'ref_aero',
'age_avion',
'ref_compo',
'categorie',
'cout',
'lifespan',
'usure_2024-01-11',
'evolution_usure_2024-01-11',
'etat_voyant_2024-01-11',
'time_en_air_2024-01-11',
'taux_usure_2024-01-11']]

In [44]:
df = df.fillna(0)

In [150]:
#def prepa(xp):
    #df_ml_1 = df_ml_1.fillna(0)
df_ml_1['type_model'] = pd.factorize(df_ml_1['type_model'])[0]
df_ml_1['ref_compo2']= df_ml_1['ref_compo']
df_ml_1['ref_aero'] = pd.factorize(df_ml_1['ref_aero'])[0]
df_ml_1['ref_compo2'] = pd.factorize(df_ml_1['ref_compo2'])[0]
df_ml_1['categorie'] = pd.factorize(df_ml_1['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_1.drop(['etat_voyant_2024-01-11','ref_compo'], axis=1)
y = df_ml_1['etat_voyant_2024-01-11']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model1 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model1.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred = random_forest_model1.predict(X_test)

    #return pred

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_1['type_model'] = pd.factorize(df_ml_1['type_model'])[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_1['ref_compo2']= df_ml_1['ref_compo']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_1['ref_aero'] = pd.factorize(df_ml_1['ref_aero'])[0]
A value is trying to be set on a copy o

In [139]:
# Évaluation du modèle
print("Rapport de classification (Random Forest) :\n", classification_report(y_test,pred))

# Calcul de la précision avec une validation croisée à 5 plis
rf_scores = cross_val_score(random_forest_model1, X, y, cv=5, scoring='accuracy')

# Affichage des performances de la validation croisée
print("Précision moyenne (Random Forest) :", rf_scores.mean())
print("Écart-type des précisions (Random Forest) :", rf_scores.std())

Rapport de classification (Random Forest) :
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       563
           1       1.00      1.00      1.00        34
           2       1.00      1.00      1.00       195

    accuracy                           1.00       792
   macro avg       1.00      1.00      1.00       792
weighted avg       1.00      1.00      1.00       792

Précision moyenne (Random Forest) : 0.5898989898989899
Écart-type des précisions (Random Forest) : 0.18041935245570714


In [115]:
"""
#def train_model_and_predict(df_ml, date_column):
    # Vérifier si les colonnes sont présentes
    #required_columns = ['etat_voyant_'+date_column, 'ref_aero']
    #missing_columns = [col for col in required_columns if col not in df_ml.columns]
    if missing_columns:
        raise KeyError(f"Les colonnes suivantes sont manquantes dans le DataFrame : {missing_columns}")
    
    # Prétraitement des données
    df_ml['type_model'] = pd.factorize(df_ml['type_model'])[0]
    df_ml['ref_aero2'] = df_ml['ref_aero']
    df_ml['ref_aero2'] = pd.factorize(df_ml['ref_aero2'])[0]
    df_ml['ref_compo'] = pd.factorize(df_ml['ref_compo'])[0]
    df_ml['categorie'] = pd.factorize(df_ml['categorie'])[0]
    
    # Séparation des caractéristiques et de la cible
    X = df_ml.drop(required_columns, axis=1)
    y = df_ml['etat_voyant_'+date_column]
    
    # Division des données en ensembles d'entraînement et de test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Création du modèle Random Forest
    random_forest_model = RandomForestClassifier(n_estimators=300, min_samples_split=5, min_samples_leaf=4, max_depth=20, bootstrap=False)
    
    # Entraînement du modèle
    random_forest_model.fit(X_train, y_train)
    
    # Prédiction sur l'ensemble de test
    predictions = random_forest_model.predict(X_test)
    
    return predictions

# Liste des DataFrame df_ml
df_ml_list = [df_ml_2, df_ml_3, df_ml_4, df_ml_5, df_ml_6, df_ml_7, df_ml_8, df_ml_9, df_ml_10,
              df_ml_11, df_ml_12, df_ml_13, df_ml_14, df_ml_15, df_ml_16, df_ml_17, df_ml_18, df_ml_19, df_ml_20, df_ml_21]

# Liste des dates correspondantes
date_columns = ['2024-01-12', '2024-01-13', '2024-01-14', '2024-01-15', '2024-01-16', '2024-01-17',
                '2024-01-18', '2024-01-19', '2024-01-20', '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-29',
                '2024-01-30', '2024-01-31', '2024-02-01', '2024-02-02', '2024-02-03', '2024-02-04', '2024-02-05']

# Appliquer le modèle à chaque DataFrame
for df_ml, date_column in zip(df_ml_list, date_columns):
    predictions = train_model_and_predict(df_ml, date_column)
    # Réinitialiser l'index du DataFrame df_ml
    df_ml.reset_index(drop=True, inplace=True)

    df_ml[f'predi_{date_column}'] = predictions
"""

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml['type_model'] = pd.factorize(df_ml['type_model'])[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml['ref_aero2'] = df_ml['ref_aero']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml['ref_aero2'] = pd.factorize(df_ml['ref_aero2'])[0]
A value is trying to be set on a copy of a slice f

ValueError: Length of values (792) does not match length of index (3960)

In [151]:
df_ml_2['type_model'] = pd.factorize(df_ml_2['type_model'])[0]
df_ml_2['ref_compo2']= df_ml_2['ref_compo']
df_ml_2['ref_compo2'] = pd.factorize(df_ml_2['ref_compo2'])[0]
df_ml_2['ref_aero'] = pd.factorize(df_ml_2['ref_aero'])[0]
df_ml_2['categorie'] = pd.factorize(df_ml_2['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_2.drop(['etat_voyant_2024-01-12','ref_aero','ref_compo'], axis=1)
y = df_ml_2['etat_voyant_2024-01-12']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model2 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model2.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred2 = random_forest_model2.predict(X_test)

df_ml_3['type_model'] = pd.factorize(df_ml_3['type_model'])[0]
df_ml_3['ref_compo2']= df_ml_3['ref_compo']
df_ml_3['ref_compo2'] = pd.factorize(df_ml_3['ref_compo2'])[0]
df_ml_3['ref_aero'] = pd.factorize(df_ml_3['ref_aero'])[0]
df_ml_3['categorie'] = pd.factorize(df_ml_3['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_3.drop(['etat_voyant_2024-01-13','ref_aero','ref_compo'], axis=1)
y = df_ml_3['etat_voyant_2024-01-13']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model3 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model3.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred3 = random_forest_model3.predict(X_test)

df_ml_4['type_model'] = pd.factorize(df_ml_4['type_model'])[0]
df_ml_4['ref_compo2']= df_ml_4['ref_compo']
df_ml_4['ref_compo2'] = pd.factorize(df_ml_4['ref_compo2'])[0]
df_ml_4['ref_aero'] = pd.factorize(df_ml_4['ref_aero'])[0]
df_ml_4['categorie'] = pd.factorize(df_ml_4['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_4.drop(['etat_voyant_2024-01-14','ref_aero','ref_compo'], axis=1)
y = df_ml_4['etat_voyant_2024-01-14']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model4 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model4.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred4 = random_forest_model4.predict(X_test)

df_ml_5['type_model'] = pd.factorize(df_ml_5['type_model'])[0]
df_ml_5['ref_compo2']= df_ml_5['ref_compo']
df_ml_5['ref_compo2'] = pd.factorize(df_ml_5['ref_compo2'])[0]
df_ml_5['ref_aero'] = pd.factorize(df_ml_5['ref_aero'])[0]
df_ml_5['categorie'] = pd.factorize(df_ml_5['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_5.drop(['etat_voyant_2024-01-15','ref_aero','ref_compo'], axis=1)
y = df_ml_5['etat_voyant_2024-01-15']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model5 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model5.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred5 = random_forest_model5.predict(X_test)

df_ml_6['type_model'] = pd.factorize(df_ml_6['type_model'])[0]
df_ml_6['ref_compo2']= df_ml_6['ref_compo']
df_ml_6['ref_compo2'] = pd.factorize(df_ml_6['ref_compo2'])[0]
df_ml_6['ref_aero'] = pd.factorize(df_ml_6['ref_aero'])[0]
df_ml_6['categorie'] = pd.factorize(df_ml_6['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_6.drop(['etat_voyant_2024-01-16','ref_aero','ref_compo'], axis=1)
y = df_ml_6['etat_voyant_2024-01-16']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model6 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model6.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred6 = random_forest_model6.predict(X_test)

df_ml_7['type_model'] = pd.factorize(df_ml_7['type_model'])[0]
df_ml_7['ref_compo2']= df_ml_7['ref_compo']
df_ml_7['ref_compo2'] = pd.factorize(df_ml_7['ref_compo2'])[0]
df_ml_7['ref_aero'] = pd.factorize(df_ml_7['ref_aero'])[0]
df_ml_7['categorie'] = pd.factorize(df_ml_7['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_7.drop(['etat_voyant_2024-01-17','ref_aero','ref_compo'], axis=1)
y = df_ml_7['etat_voyant_2024-01-17']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model7 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model7.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred7 = random_forest_model7.predict(X_test)

df_ml_8['type_model'] = pd.factorize(df_ml_8['type_model'])[0]
df_ml_8['ref_compo2']= df_ml_8['ref_compo']
df_ml_8['ref_compo2'] = pd.factorize(df_ml_8['ref_compo2'])[0]
df_ml_8['ref_aero'] = pd.factorize(df_ml_8['ref_aero'])[0]
df_ml_8['categorie'] = pd.factorize(df_ml_8['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_8.drop(['etat_voyant_2024-01-18','ref_aero','ref_compo'], axis=1)
y = df_ml_8['etat_voyant_2024-01-18']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model8 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model8.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred8 = random_forest_model8.predict(X_test)

df_ml_9['type_model'] = pd.factorize(df_ml_9['type_model'])[0]
df_ml_9['ref_compo2']= df_ml_9['ref_compo']
df_ml_9['ref_compo2'] = pd.factorize(df_ml_9['ref_compo2'])[0]
df_ml_9['ref_aero'] = pd.factorize(df_ml_9['ref_aero'])[0]
df_ml_9['categorie'] = pd.factorize(df_ml_9['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_9.drop(['etat_voyant_2024-01-19','ref_aero','ref_compo'], axis=1)
y = df_ml_9['etat_voyant_2024-01-19']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model9 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model9.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred9 = random_forest_model9.predict(X_test)

df_ml_10['type_model'] = pd.factorize(df_ml_10['type_model'])[0]
df_ml_10['ref_compo2']= df_ml_10['ref_compo']
df_ml_10['ref_compo2'] = pd.factorize(df_ml_10['ref_compo2'])[0]
df_ml_10['ref_aero'] = pd.factorize(df_ml_10['ref_aero'])[0]
df_ml_10['categorie'] = pd.factorize(df_ml_10['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_10.drop(['etat_voyant_2024-01-20','ref_aero','ref_compo'], axis=1)
y = df_ml_10['etat_voyant_2024-01-20']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model10 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model10.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred10 = random_forest_model10.predict(X_test)


df_ml_11['type_model'] = pd.factorize(df_ml_11['type_model'])[0]
df_ml_11['ref_compo2']= df_ml_11['ref_compo']
df_ml_11['ref_compo2'] = pd.factorize(df_ml_11['ref_compo2'])[0]
df_ml_11['ref_aeroo'] = pd.factorize(df_ml_11['ref_aero'])[0]
df_ml_11['categorie'] = pd.factorize(df_ml_11['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_11.drop(['etat_voyant_2024-01-21','ref_aero','ref_compo'], axis=1)
y = df_ml_11['etat_voyant_2024-01-21']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model11 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model11.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred11 = random_forest_model11.predict(X_test)

df_ml_12['type_model'] = pd.factorize(df_ml_12['type_model'])[0]
df_ml_12['ref_compo2']= df_ml_12['ref_compo']
df_ml_12['ref_compo2'] = pd.factorize(df_ml_12['ref_compo2'])[0]
df_ml_12['ref_aero'] = pd.factorize(df_ml_12['ref_aero'])[0]
df_ml_12['categorie'] = pd.factorize(df_ml_12['categorie'])[0]
# Séparation des caractéristiques et de la cible
X = df_ml_12.drop(['etat_voyant_2024-01-22','ref_aero','ref_compo'], axis=1)
y = df_ml_12['etat_voyant_2024-01-22']  # Choisissez la colonne d'état du voyant à prédire

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Création du modèle Random Forest
random_forest_model12 = RandomForestClassifier(n_estimators= 300, min_samples_split= 5, min_samples_leaf= 4, max_depth= 20, bootstrap= False)

# Entraînement du modèle
random_forest_model12.fit(X_train, y_train)

# Prédiction sur l'ensemble de test
pred12 = random_forest_model12.predict(X_test)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_2['type_model'] = pd.factorize(df_ml_2['type_model'])[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_2['ref_compo2']= df_ml_2['ref_compo']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_2['ref_compo2'] = pd.factorize(df_ml_2['ref_compo2'])[0]
A value is trying to be set on a co

In [152]:
df_ml_1['predi_2024-01-11'] = random_forest_model1.predict(df_ml_1.drop(['etat_voyant_2024-01-11','ref_compo'], axis=1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_1['predi_2024-01-11'] = random_forest_model1.predict(df_ml_1.drop(['etat_voyant_2024-01-11','ref_compo'], axis=1))


In [153]:
df_ml_2['predi_2024-01-12'] = random_forest_model2.predict(df_ml_2.drop(['etat_voyant_2024-01-12','ref_aero','ref_compo'], axis=1))
df_ml_3['predi_2024-01-13'] = random_forest_model3.predict(df_ml_3.drop(['etat_voyant_2024-01-13','ref_aero','ref_compo'], axis=1))
df_ml_4['predi_2024-01-14'] = random_forest_model4.predict(df_ml_4.drop(['etat_voyant_2024-01-14','ref_aero','ref_compo'], axis=1))
df_ml_5['predi_2024-01-15'] = random_forest_model5.predict(df_ml_5.drop(['etat_voyant_2024-01-15','ref_aero','ref_compo'], axis=1))
df_ml_6['predi_2024-01-16'] = random_forest_model6.predict(df_ml_6.drop(['etat_voyant_2024-01-16','ref_aero','ref_compo'], axis=1))
df_ml_7['predi_2024-01-17'] = random_forest_model7.predict(df_ml_7.drop(['etat_voyant_2024-01-17','ref_aero','ref_compo'], axis=1))
df_ml_8['predi_2024-01-18'] = random_forest_model8.predict(df_ml_8.drop(['etat_voyant_2024-01-18','ref_aero','ref_compo'], axis=1))
df_ml_9['predi_2024-01-19'] = random_forest_model9.predict(df_ml_9.drop(['etat_voyant_2024-01-19','ref_aero','ref_compo'], axis=1))
df_ml_10['predi_2024-01-20'] = random_forest_model10.predict(df_ml_10.drop(['etat_voyant_2024-01-20','ref_aero','ref_compo'], axis=1))
df_ml_11['predi_2024-01-21'] = random_forest_model11.predict(df_ml_11.drop(['etat_voyant_2024-01-21','ref_aero','ref_compo'], axis=1))
df_ml_12['predi_2024-01-22'] = random_forest_model12.predict(df_ml_12.drop(['etat_voyant_2024-01-22','ref_aero','ref_compo'], axis=1))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_2['predi_2024-01-12'] = random_forest_model2.predict(df_ml_2.drop(['etat_voyant_2024-01-12','ref_aero','ref_compo'], axis=1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml_3['predi_2024-01-13'] = random_forest_model3.predict(df_ml_3.drop(['etat_voyant_2024-01-13','ref_aero','ref_compo'], axis=1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

In [55]:
df_ml_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3960 entries, 0 to 78518
Data columns (total 13 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   type_model                  3960 non-null   int64  
 1   ref_aero                    3960 non-null   int64  
 2   age_avion                   3960 non-null   int64  
 3   ref_compo                   3960 non-null   int64  
 4   categorie                   3960 non-null   int64  
 5   cout                        3960 non-null   int64  
 6   lifespan                    3960 non-null   int64  
 7   usure_2024-01-11            3960 non-null   float64
 8   evolution_usure_2024-01-11  3960 non-null   float64
 9   etat_voyant_2024-01-11      3960 non-null   int64  
 10  time_en_air_2024-01-11      3960 non-null   float64
 11  taux_usure_2024-01-11       3960 non-null   float64
 12  predi_2024-01-11            3960 non-null   int64  
dtypes: float64(4), int64(9)
memory u

In [154]:
df_ml_1 = df_ml_1[['ref_compo','predi_2024-01-11']]
df_ml_2 = df_ml_2[['ref_compo','predi_2024-01-12']]
df_ml_3 = df_ml_3[['ref_compo','predi_2024-01-13']]
df_ml_4 = df_ml_4[['ref_compo','predi_2024-01-14']]
df_ml_5 = df_ml_5[['ref_compo','predi_2024-01-15']]
df_ml_6 = df_ml_6[['ref_compo','predi_2024-01-16']]
df_ml_7 = df_ml_7[['ref_compo','predi_2024-01-17']]
df_ml_8 = df_ml_8[['ref_compo','predi_2024-01-18']]
df_ml_9 = df_ml_9[['ref_compo','predi_2024-01-19']]
df_ml_10 = df_ml_10[['ref_compo','predi_2024-01-20']]
df_ml_11 = df_ml_11[['ref_compo','predi_2024-01-21']]
df_ml_12 = df_ml_12[['ref_compo','predi_2024-01-22']]

In [155]:
df_ml_1

Unnamed: 0,ref_compo,predi_2024-01-11
0,REAE175-E175_4124-0,0
12,REAE175-E175_4124-1,0
24,SYSE175-E175_4124-2,0
36,ORDE175-E175_4124-3,0
48,AUTE175-E175_4124-4,0
...,...,...
78490,SYSA380-A380_6805-40,2
78497,PORA380-A380_6805-41,2
78504,HUBA380-A380_6805-42,2
78511,ECLA380-A380_6805-43,2


In [132]:
maitre['ref_compo'].info()

<class 'pandas.core.series.Series'>
Int64Index: 10350 entries, 0 to 79868
Series name: ref_compo
Non-Null Count  Dtype 
--------------  ----- 
10350 non-null  object
dtypes: object(1)
memory usage: 419.8+ KB


In [156]:
maitre = maitre.merge(df_ml_1, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_2, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_3, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_4, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_5, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_6, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_7, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_8, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_9, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_10, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_11, on='ref_compo', how= 'left')
maitre = maitre.merge(df_ml_12, on='ref_compo', how= 'left')

In [162]:
for i in maitre.columns:
    print (i)

type_model
ref_aero
debut_service
age_avion
last_maint
end_maint
ref_compo
categorie
cout
lifespan
usure_2024-01-11
evolution_usure_2024-01-11
etat_voyant_2024-01-11
time_en_air_2024-01-11
taux_usure_2024-01-11
predi_2024-01-11
usure_2024-01-12
evolution_usure_2024-01-12
etat_voyant_2024-01-12
time_en_air_2024-01-12
taux_usure_2024-01-12
predi_2024-01-12
usure_2024-01-13
evolution_usure_2024-01-13
etat_voyant_2024-01-13
time_en_air_2024-01-13
taux_usure_2024-01-13
predi_2024-01-13
usure_2024-01-14
evolution_usure_2024-01-14
etat_voyant_2024-01-14
time_en_air_2024-01-14
taux_usure_2024-01-14
predi_2024-01-14
usure_2024-01-15
evolution_usure_2024-01-15
etat_voyant_2024-01-15
time_en_air_2024-01-15
taux_usure_2024-01-15
predi_2024-01-15
usure_2024-01-16
evolution_usure_2024-01-16
etat_voyant_2024-01-16
time_en_air_2024-01-16
taux_usure_2024-01-16
predi_2024-01-16
usure_2024-01-17
evolution_usure_2024-01-17
etat_voyant_2024-01-17
time_en_air_2024-01-17
taux_usure_2024-01-17
predi_2024-01-1

In [158]:
maitre = maitre.drop(['usure_2024-01-23',
'evolution_usure_2024-01-23',
'etat_voyant_2024-01-23',
'time_en_air_2024-01-23',
'usure_2024-01-24',
'evolution_usure_2024-01-24',
'usure_2024-01-25',
'evolution_usure_2024-01-25',
'usure_2024-01-26',
'evolution_usure_2024-01-26',
'usure_2024-01-27',
'evolution_usure_2024-01-27',
'usure_2024-01-28',
'evolution_usure_2024-01-28',
'usure_2024-01-29',
'evolution_usure_2024-01-29',
'etat_voyant_2024-01-29',
'time_en_air_2024-01-29',
'usure_2024-01-30',
'evolution_usure_2024-01-30',
'etat_voyant_2024-01-30',
'time_en_air_2024-01-30',
'taux_usure_2024-01-30',
'usure_2024-01-31',
'evolution_usure_2024-01-31',
'etat_voyant_2024-01-31',
'time_en_air_2024-01-31',
'taux_usure_2024-01-31',
'usure_2024-02-01',
'evolution_usure_2024-02-01',
'etat_voyant_2024-02-01',
'time_en_air_2024-02-01',
'taux_usure_2024-02-01',
'usure_2024-02-02',
'evolution_usure_2024-02-02',
'etat_voyant_2024-02-02',
'time_en_air_2024-02-02',
'taux_usure_2024-02-02',
'usure_2024-02-03',
'evolution_usure_2024-02-03',
'etat_voyant_2024-02-03',
'time_en_air_2024-02-03',
'taux_usure_2024-02-03',
'usure_2024-02-04',
'evolution_usure_2024-02-04',
'etat_voyant_2024-02-04',
'time_en_air_2024-02-04',
'usure_2024-02-05',
'evolution_usure_2024-02-05',
'etat_voyant_2024-02-05',
'time_en_air_2024-02-05'], axis = 1)

In [160]:
maitre = maitre.fillna(0)

In [161]:
printdf = maitre.to_csv('df_final.csv',index=False)

In [168]:
# Sélectionner les colonnes d'intérêt (y compris les colonnes de prédiction)
predi_columns = [col for col in maitre.columns if col.startswith('predi_')]

In [171]:

# Sélectionner les colonnes d'intérêt (y compris les colonnes de prédiction)

columns_of_interest = ['ref_aero'] + predi_columns

# Créer un DataFrame avec les colonnes d'intérêt
df_filtered = maitre[columns_of_interest]

# Réorganiser les données en utilisant pivot_table
pivot_df = df_filtered.pivot_table(index='ref_aero', aggfunc='max')

# Afficher le pivot avec les valeurs maximales de prédiction par jour pour chaque référence d'aéroport
print(pivot_df)

           predi_2024-01-11  predi_2024-01-12  predi_2024-01-13  \
ref_aero                                                          
A320_0691               0.0               0.0               0.0   
A320_0934               0.0               0.0               0.0   
A320_1963               0.0               0.0               0.0   
A320_1980               2.0               0.0               0.0   
A320_2053               0.0               0.0               0.0   
...                     ...               ...               ...   
E175_4124               0.0               0.0               0.0   
E175_4571               0.0               2.0               0.0   
E175_5930               2.0               0.0               0.0   
E175_6180               0.0               0.0               0.0   
E175_6334               2.0               0.0               0.0   

           predi_2024-01-14  predi_2024-01-15  predi_2024-01-16  \
ref_aero                                                     

In [176]:
pivot_df = pivot_df.astype(int)

In [183]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 230 entries, A320_0691 to E175_6334
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   predi_2024-01-11  230 non-null    int32
 1   predi_2024-01-12  230 non-null    int32
 2   predi_2024-01-13  230 non-null    int32
 3   predi_2024-01-14  230 non-null    int32
 4   predi_2024-01-15  230 non-null    int32
 5   predi_2024-01-16  230 non-null    int32
 6   predi_2024-01-17  230 non-null    int32
 7   predi_2024-01-18  230 non-null    int32
 8   predi_2024-01-19  230 non-null    int32
 9   predi_2024-01-20  230 non-null    int32
 10  predi_2024-01-21  230 non-null    int32
 11  predi_2024-01-22  230 non-null    int32
 12  economie          230 non-null    int64
dtypes: int32(12), int64(1)
memory usage: 22.5+ KB


In [180]:
pivot_df['economie'] = pivot_df.apply(lambda row: 90000 if (row == 2).any() else 0, axis=1)

# Afficher le DataFrame avec la nouvelle colonne "economie"
print(pivot_df)

           predi_2024-01-11  predi_2024-01-12  predi_2024-01-13  \
ref_aero                                                          
A320_0691                 0                 0                 0   
A320_0934                 0                 0                 0   
A320_1963                 0                 0                 0   
A320_1980                 2                 0                 0   
A320_2053                 0                 0                 0   
...                     ...               ...               ...   
E175_4124                 0                 0                 0   
E175_4571                 0                 2                 0   
E175_5930                 2                 0                 0   
E175_6180                 0                 0                 0   
E175_6334                 2                 0                 0   

           predi_2024-01-14  predi_2024-01-15  predi_2024-01-16  \
ref_aero                                                     

In [181]:
pivot_df['economie'].sum()

18090000

In [182]:
pivot_df['economie'].value_counts()

90000    201
0         29
Name: economie, dtype: int64