In [31]:
import pandas as pd

import mysql.connector

from sqlalchemy import create_engine

from skimpy import skim

from datetime import datetime


# 0- importation des données en df

In [32]:
username = 'postgres'
password = 'admin'
host = '127.0.0.1'  # ou l'adresse IP de votre serveur PostgreSQL
port = '5432'  # le port par défaut pour PostgreSQL est 5432
database = 'preventive_maintenance'

# Créer l'URL de connexion
db_url = f'postgresql+psycopg2://{username}:{password}@{host}:{port}/{database}'

# Créer le moteur SQLAlchemy
engine = create_engine(db_url)

# Se connecter à la base de données
connexion = engine.connect()


In [33]:
df_aero = pd.read_sql_query('''
                  SELECT * 
                  FROM aeronefs
                  ''', connexion)


In [34]:
df_composants = pd.read_sql_query('''
                  SELECT * 
                  FROM composants
                  ''', connexion)

In [35]:
df_degradations = pd.read_sql_query('''
                  SELECT * 
                  FROM degradations
                  ''', connexion)

In [36]:
df_logs_vols = pd.read_sql_query('''
                  SELECT * 
                  FROM logs_vols
                  ''', connexion)

In [37]:
connexion.close()

# 1- merge des dataframes

In [38]:
# merge du degradations et composants en left join avec degradations en left

df_merge_deg_compo = pd.merge(df_degradations, df_composants, how='inner', left_on='compo_concerned', right_on='ref_compo')

In [39]:
df_merge_deg_compo.head(1)

Unnamed: 0,clef,ref_deg,linked_aero,compo_concerned,usure_nouvelle,measure_day,need_replacement,ref_compo,categorie,aero,desc_compo,lifespan,taux_usure_actuel,cout
0,D001982CRJ700_6943SYSCRJ700-CRJ700_6943-292024...,D001982,CRJ700_6943,SYSCRJ700-CRJ700_6943-29,16.941998,2024-06-02,False,SYSCRJ700-CRJ700_6943-29,Composants Majeurs,CRJ700_6943,Système d'extinction d'incendie,11971,16.941998,10109


In [40]:
# faire un group by par categoriee et ref_aero
df_merge_deg_compo_gb = df_merge_deg_compo.groupby(['aero', 'desc_compo', 'measure_day']).agg({'usure_nouvelle': ['max', 'mean']}).reset_index()

# Aplatir les colonnes
df_merge_deg_compo_gb.columns = ['_'.join(col).strip().replace(' ', '_') if col[1] else col[0] for col in df_merge_deg_compo_gb.columns.values]


In [41]:
# faire un pivot pour obtenir une seule ligne par avion
df_pivot = df_merge_deg_compo_gb.pivot_table(index=['aero', 'measure_day'], columns=['desc_compo'], values=['usure_nouvelle_max', 'usure_nouvelle_mean'], aggfunc='first').reset_index()

df_pivot.columns = ['_'.join([str(i) for i in col]).strip().replace(' ', '_')  for col in df_pivot.columns.values]


In [42]:
df_pivot['measure_day_'] = df_pivot['measure_day_'].astype('str')

In [43]:
# ajout d'une clé aero et mesure_day
df_pivot['cle'] = df_pivot.apply(lambda x: x['aero_'] + '_' + x['measure_day_'], axis=1)

In [44]:
df_logs_vols

Unnamed: 0,ref_vol,aero_linked,jour_vol,time_en_air,sensor_data,etat_voyant,temp,temp_unit,pressure,pressure_unit,vibrations,vibrations_unit
0,V06783026,A320_6242,2024-06-04,6.2,"{'temp': '-14.7°C', 'pressure': '1009.1 hPa', ...",1,-14.7,°C,1009.1,hPa,0.015340,m/s²
1,V06810954,A380_5199,2024-06-04,2.1,"{'temp': '2.9°C', 'pressure': '1015.8 hPa', 'v...",0,2.9,°C,1015.8,hPa,2.317764,m/s²
2,V05201226,A350_3122,2024-06-04,7.7,"{'temp': '8.0°C', 'pressure': '934.2 hPa', 'vi...",0,8.0,°C,934.2,hPa,1.999378,m/s²
3,V05582404,B747_3165,2024-06-04,8.3,"{'temp': '7.3°C', 'pressure': '999.9 hPa', 'vi...",0,7.3,°C,999.9,hPa,0.875383,m/s²
4,V01713095,A350_6452,2024-06-04,9.8,"{'temp': '-11.4°C', 'pressure': '946.7 hPa', '...",0,-11.4,°C,946.7,hPa,1.551531,m/s²
...,...,...,...,...,...,...,...,...,...,...,...,...
5237,V08907808,A350_6495,2024-07-25,6.6,"{'temp': '13.8°C', 'pressure': '899.0 hPa', 'v...",0,13.8,°C,899.0,hPa,1.876524,m/s²
5238,V01846975,E175_5414,2024-07-25,2.7,"{'temp': '-24.2°C', 'pressure': '1051.4 hPa', ...",1,-24.2,°C,1051.4,hPa,0.888896,m/s²
5239,V09385402,E175_0620,2024-07-25,5.8,"{'temp': '1.9°C', 'pressure': '907.5 hPa', 'vi...",0,1.9,°C,907.5,hPa,3.648072,m/s²
5240,V04742938,B747_1460,2024-07-25,3.7,"{'temp': '-20.2°C', 'pressure': '1074.7 hPa', ...",1,-20.2,°C,1074.7,hPa,2.406679,m/s²


In [45]:
df_logs_vols['jour_vol'] = df_logs_vols['jour_vol'].astype('str')

In [46]:
# ajout d'une clé aero et mesure_day sur le df_logs_vols
df_logs_vols['cle'] = df_logs_vols.apply(lambda x: x['aero_linked'] + '_' + x['jour_vol'], axis=1)

In [47]:
# merge de df_log_vol et df_pivot

df_merge_pivot_logvol = pd.merge(df_pivot, df_logs_vols, how='inner', left_on='cle', right_on='cle')

In [48]:
# merge de df_aero et df_merge_pivot_logvol

df_merge_tot = pd.merge(df_merge_pivot_logvol, df_aero, how='inner', left_on='aero_linked', right_on='ref_aero')

In [49]:
df_merge_tot.shape

(5216, 109)

In [50]:
del df_aero, df_logs_vols, df_merge_pivot_logvol, df_degradations, df_composants

# 2- Nettoayage du df

In [51]:
# df_merge_tot.columns

In [52]:
# suppression des colonne n'apportant pas d'informations
df_merge_tot = df_merge_tot.drop(columns=[
                            'cle', 'ref_vol',
                            'aero_linked', 'jour_vol', 'sensor_data', 
                            'temp_unit', 'pressure_unit',
                            'vibrations_unit', 'ref_aero', 
                            'end_maint'])

In [53]:
pd.set_option("display.max_columns", None)

In [54]:
# skim(df_merge_total)

In [55]:
# modification du type pour date 
df_merge_tot['debut_service'] = pd.to_datetime(df_merge_tot['debut_service'], format='%Y-%m-%d')
df_merge_tot['last_maint'] = pd.to_datetime(df_merge_tot['last_maint'], format='%Y-%m-%d')
df_merge_tot['measure_day'] = pd.to_datetime(df_merge_tot['measure_day_'], format='%Y-%m-%d')

In [56]:
# modif type category
df_merge_tot['etat_voyant'] = df_merge_tot['etat_voyant'].astype('category')  

In [57]:
# Ajout d'une colonne temps de vol cumulé depuis la derniere maintenance

# Assumons que df_merge_total est le DataFrame fourni et qu'il contient les colonnes 'temps_de_vol', 'last_maint', 'jour_vol', et 'ref_aero'
# Trier le DataFrame par avion et par date de vol
df_merge_total_sort = df_merge_tot.sort_values(by=['aero_', 'measure_day']).reset_index(drop=True)

# Initialiser la colonne 'temps_de_vol_cumule' et 'etat_voyant_suivant' avec des zéros
df_merge_total_sort['temps_de_vol_cumule'] = 0
df_merge_total_sort['etat_voyant_suivant'] = 0

# Parcourir chaque avion
for avion in df_merge_total_sort['aero_'].unique():
    
    # Filtrer les données pour l'avion actuel
    df_avion = df_merge_total_sort[df_merge_total_sort['aero_'] == avion]
    
    # Variable pour stocker le temps de vol cumulé
    temps_de_vol_cumule = 0
    
    # Parcourir chaque ligne du DataFrame filtré pour l'avion actuel
    for i in df_avion.index:
      
        # Si la maintenance a eu lieu (en supposant que 'last_maint' est un booléen ou indicatif)
        if (i != 0 and ((df_merge_total_sort.loc[i, 'last_maint'].date() == df_merge_total_sort.loc[i, 'measure_day'].date()) or
            (df_merge_total_sort.loc[i-1, 'etat_voyant'] != 0))):
            # Réinitialiser le temps de vol cumulé
            temps_de_vol_cumule = 0
        
        # Ajouter le temps de vol actuel au temps de vol cumulé
        temps_de_vol_cumule += df_merge_total_sort.loc[i, 'time_en_air']
        
        # Mettre à jour la colonne 'temps_de_vol_cumule'
        df_merge_total_sort.loc[i, 'temps_de_vol_cumule'] = temps_de_vol_cumule

        if i < len(df_merge_total_sort)-1:
            df_merge_total_sort.loc[i,'etat_voyant_suivant'] = df_merge_total_sort.loc[i+1, 'etat_voyant'] 








  df_merge_total_sort.loc[i, 'temps_de_vol_cumule'] = temps_de_vol_cumule


In [58]:
df_merge_total_sort.head(5)

Unnamed: 0,aero_,measure_day_,usure_nouvelle_max_Ailerons,usure_nouvelle_max_Autopilote,usure_nouvelle_max_Détecteurs_de_fumée,usure_nouvelle_max_Empennage,usure_nouvelle_max_Flaps,usure_nouvelle_max_Freins,usure_nouvelle_max_Gouvernail_de_direction,usure_nouvelle_max_Générateur_électrique,usure_nouvelle_max_Hublots,usure_nouvelle_max_Ordinateur_de_vol,usure_nouvelle_max_Porte_cargo,usure_nouvelle_max_Portes_passagers,usure_nouvelle_max_Radar_météorologique,usure_nouvelle_max_Réacteur_droit,usure_nouvelle_max_Réacteur_gauche,usure_nouvelle_max_Siège_copilote,usure_nouvelle_max_Siège_pilote,usure_nouvelle_max_Spoilers,usure_nouvelle_max_Système_anti-givre,usure_nouvelle_max_Système_d'alerte_de_proximité_au_sol_(GPWS),usure_nouvelle_max_Système_d'annonce_publique,usure_nouvelle_max_Système_d'approvisionnement_en_eau,usure_nouvelle_max_Système_d'atterrissage_automatique,usure_nouvelle_max_Système_d'extinction_d'incendie,usure_nouvelle_max_Système_de_commande_des_gaz,usure_nouvelle_max_Système_de_communication_radio,usure_nouvelle_max_Système_de_conditionnement_d'air,usure_nouvelle_max_Système_de_contrôle_de_vol,usure_nouvelle_max_Système_de_contrôle_des_surfaces_de_vol,usure_nouvelle_max_Système_de_divertissement_en_vol,usure_nouvelle_max_Système_de_gestion_de_carburant,usure_nouvelle_max_Système_de_gestion_électrique,usure_nouvelle_max_Système_de_navigation,usure_nouvelle_max_Système_de_pressurisation,usure_nouvelle_max_Système_de_surveillance_du_trafic_aérien,usure_nouvelle_max_Système_hydraulique_principal,usure_nouvelle_max_Systèmes_d'oxygène_d'urgence,usure_nouvelle_max_Toilettes,usure_nouvelle_max_Train_d'atterrissage_avant,usure_nouvelle_max_Train_d'atterrissage_principal,usure_nouvelle_max_Unité_de_gestion_de_l'air,usure_nouvelle_max_Éclairage_d'urgence,usure_nouvelle_max_Éclairage_de_cabine,usure_nouvelle_max_Éclairage_de_la_cabine,usure_nouvelle_mean_Ailerons,usure_nouvelle_mean_Autopilote,usure_nouvelle_mean_Détecteurs_de_fumée,usure_nouvelle_mean_Empennage,usure_nouvelle_mean_Flaps,usure_nouvelle_mean_Freins,usure_nouvelle_mean_Gouvernail_de_direction,usure_nouvelle_mean_Générateur_électrique,usure_nouvelle_mean_Hublots,usure_nouvelle_mean_Ordinateur_de_vol,usure_nouvelle_mean_Porte_cargo,usure_nouvelle_mean_Portes_passagers,usure_nouvelle_mean_Radar_météorologique,usure_nouvelle_mean_Réacteur_droit,usure_nouvelle_mean_Réacteur_gauche,usure_nouvelle_mean_Siège_copilote,usure_nouvelle_mean_Siège_pilote,usure_nouvelle_mean_Spoilers,usure_nouvelle_mean_Système_anti-givre,usure_nouvelle_mean_Système_d'alerte_de_proximité_au_sol_(GPWS),usure_nouvelle_mean_Système_d'annonce_publique,usure_nouvelle_mean_Système_d'approvisionnement_en_eau,usure_nouvelle_mean_Système_d'atterrissage_automatique,usure_nouvelle_mean_Système_d'extinction_d'incendie,usure_nouvelle_mean_Système_de_commande_des_gaz,usure_nouvelle_mean_Système_de_communication_radio,usure_nouvelle_mean_Système_de_conditionnement_d'air,usure_nouvelle_mean_Système_de_contrôle_de_vol,usure_nouvelle_mean_Système_de_contrôle_des_surfaces_de_vol,usure_nouvelle_mean_Système_de_divertissement_en_vol,usure_nouvelle_mean_Système_de_gestion_de_carburant,usure_nouvelle_mean_Système_de_gestion_électrique,usure_nouvelle_mean_Système_de_navigation,usure_nouvelle_mean_Système_de_pressurisation,usure_nouvelle_mean_Système_de_surveillance_du_trafic_aérien,usure_nouvelle_mean_Système_hydraulique_principal,usure_nouvelle_mean_Systèmes_d'oxygène_d'urgence,usure_nouvelle_mean_Toilettes,usure_nouvelle_mean_Train_d'atterrissage_avant,usure_nouvelle_mean_Train_d'atterrissage_principal,usure_nouvelle_mean_Unité_de_gestion_de_l'air,usure_nouvelle_mean_Éclairage_d'urgence,usure_nouvelle_mean_Éclairage_de_cabine,usure_nouvelle_mean_Éclairage_de_la_cabine,time_en_air,etat_voyant,temp,pressure,vibrations,type_model,debut_service,last_maint,en_maintenance,measure_day,temps_de_vol_cumule,etat_voyant_suivant
0,A320_1884,2024-06-04,24.42432,30.54246,6.84188,36.80248,17.1145,17.5027,40.33574,30.02888,28.25604,36.52938,28.50046,26.18926,35.66614,43.44268,12.1281,45.796,54.04908,28.31668,8.39526,9.42614,29.24768,38.84328,61.12666,10.67478,48.83446,23.0166,60.78174,14.7344,59.5825,26.3673,25.96942,27.94362,15.48738,45.95084,23.06414,19.63598,23.21684,19.2414,43.96352,47.52002,14.45032,59.68462,3.96022,18.20502,24.42432,30.54246,6.84188,36.80248,17.1145,17.5027,40.33574,30.02888,28.25604,36.52938,28.50046,26.18926,35.66614,43.44268,12.1281,45.796,54.04908,28.31668,8.39526,9.42614,29.24768,38.84328,61.12666,10.67478,48.83446,23.0166,47.24674,14.7344,59.5825,26.3673,25.96942,27.94362,15.48738,45.95084,23.06414,19.63598,23.21684,19.2414,43.96352,47.52002,14.45032,59.68462,3.96022,18.20502,9.8,0,4.3,907.3,1.763677,A320,2015-12-26,2024-06-01,False,2024-06-04,9.8,1
1,A320_1884,2024-06-05,25.203096,31.641138,7.660764,37.818944,17.93035,18.31781,41.271322,31.043864,33.360412,37.302014,29.347538,27.159178,36.548442,44.451004,13.09343,46.8468,54.944924,29.310204,9.490978,10.197442,30.274504,39.790184,62.128398,11.560634,49.703738,23.78398,61.547122,15.66532,60.40575,27.40219,26.947626,28.713886,16.267414,46.789852,23.805842,20.731994,24.181652,20.22042,44.996856,48.288806,15.428896,60.747186,4.905866,18.992306,25.203096,31.641138,7.660764,37.818944,17.93035,18.31781,41.271322,31.043864,33.360412,37.302014,29.347538,27.159178,36.548442,44.451004,13.09343,46.8468,54.944924,29.310204,9.490978,10.197442,30.274504,39.790184,62.128398,11.560634,49.703738,23.78398,48.012122,15.66532,60.40575,27.40219,26.947626,28.713886,16.267414,46.789852,23.805842,20.731994,24.181652,20.22042,44.996856,48.288806,15.428896,60.747186,4.905866,18.992306,7.4,1,-24.3,905.9,3.428686,A320,2015-12-26,2024-06-01,False,2024-06-05,17.2,0
2,A320_1884,2024-06-08,25.466196,32.012313,7.937414,38.162344,18.205975,18.593185,41.587397,31.386764,35.084862,37.563039,29.633713,27.486853,36.846517,44.791654,13.419555,47.2018,55.247574,29.645854,9.861153,10.458017,30.621404,40.110084,62.466823,11.859909,49.997413,24.04323,61.805697,15.97982,60.683875,27.751815,27.278101,28.974111,16.530939,47.073302,24.056417,21.102269,24.507602,20.55117,45.345956,48.548531,15.759496,61.106161,5.225341,19.258281,25.466196,32.012313,7.937414,38.162344,18.205975,18.593185,41.587397,31.386764,35.084862,37.563039,29.633713,27.486853,36.846517,44.791654,13.419555,47.2018,55.247574,29.645854,9.861153,10.458017,30.621404,40.110084,62.466823,11.859909,49.997413,24.04323,48.270697,15.97982,60.683875,27.751815,27.278101,28.974111,16.530939,47.073302,24.056417,21.102269,24.507602,20.55117,45.345956,48.548531,15.759496,61.106161,5.225341,19.258281,2.5,0,-8.4,962.5,1.290613,A320,2015-12-26,2024-06-01,False,2024-06-08,2.5,0
3,A320_1884,2024-06-10,25.52934,32.101395,8.00381,38.24476,18.272125,18.659275,41.663255,31.46906,35.49873,37.625685,29.702395,27.565495,36.918055,44.87341,13.497825,47.287,55.32021,29.72641,9.949995,10.520555,30.70466,40.18686,62.548045,11.931735,50.067895,24.10545,61.867755,16.0553,60.750625,27.835725,27.357415,29.036565,16.594185,47.14133,24.116555,21.191135,24.58583,20.63055,45.42974,48.610865,15.83884,61.192315,5.302015,19.322115,25.52934,32.101395,8.00381,38.24476,18.272125,18.659275,41.663255,31.46906,35.49873,37.625685,29.702395,27.565495,36.918055,44.87341,13.497825,47.287,55.32021,29.72641,9.949995,10.520555,30.70466,40.18686,62.548045,11.931735,50.067895,24.10545,48.332755,16.0553,60.750625,27.835725,27.357415,29.036565,16.594185,47.14133,24.116555,21.191135,24.58583,20.63055,45.42974,48.610865,15.83884,61.192315,5.302015,19.322115,0.6,0,17.4,872.1,3.525162,A320,2015-12-26,2024-06-01,False,2024-06-10,3.1,0
4,A320_1884,2024-06-14,25.908204,32.635887,8.402186,38.739256,18.669025,19.055815,42.118403,31.962836,37.981938,38.001561,30.114487,28.037347,37.347283,45.363946,13.967445,47.7982,55.756026,30.209746,10.483047,10.895783,31.204196,40.647516,63.035377,12.362691,50.490787,24.47877,62.240103,16.50818,61.151125,28.339185,27.833299,29.411289,16.973661,47.549498,24.477383,21.724331,25.055198,21.10683,45.932444,48.984869,16.314904,61.709239,5.762059,19.705119,25.908204,32.635887,8.402186,38.739256,18.669025,19.055815,42.118403,31.962836,37.981938,38.001561,30.114487,28.037347,37.347283,45.363946,13.967445,47.7982,55.756026,30.209746,10.483047,10.895783,31.204196,40.647516,63.035377,12.362691,50.490787,24.47877,48.705103,16.50818,61.151125,28.339185,27.833299,29.411289,16.973661,47.549498,24.477383,21.724331,25.055198,21.10683,45.932444,48.984869,16.314904,61.709239,5.762059,19.705119,3.6,0,18.3,986.5,0.973155,A320,2015-12-26,2024-06-01,False,2024-06-14,6.7,0


In [59]:
# skim(df_merge_total)

In [60]:
df_merge_total_sort.to_parquet('data_preclean_avion_full_compo.parquet')