In [13]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, accuracy_score, classification_report
import asyncpg
import asyncio
import joblib
import logging
import requests
import os
from pymongo import MongoClient
from flatten_json import flatten

client_vps = MongoClient(
    host = "mongodb",
    port = 27017,
    username = "datascientest",
    password = "dst123"
)
mongodb_client = client_vps

API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")  # Utiliser l'IP externe si configuré
endpoint = '/flights-all'

try:
    response = requests.get(f"{API_BASE_URL}{endpoint}", timeout=10)
    if response.status_code == 200:
        dict_flattened = (flatten(record, '_') for record in response.json())
        df = pd.DataFrame(dict_flattened)
        #print(df.head())
        #print(df.columns)
        df_minimized = df.drop(['MarketingCarrierList_MarketingCarrier_AirlineID',
               'MarketingCarrierList_MarketingCarrier_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_0_AirlineID',
               'MarketingCarrierList_MarketingCarrier_0_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_1_AirlineID',
               'MarketingCarrierList_MarketingCarrier_1_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_2_AirlineID',
               'MarketingCarrierList_MarketingCarrier_2_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_3_AirlineID',
               'MarketingCarrierList_MarketingCarrier_3_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_4_AirlineID',
               'MarketingCarrierList_MarketingCarrier_4_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_5_AirlineID',
               'MarketingCarrierList_MarketingCarrier_5_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_6_AirlineID',
               'MarketingCarrierList_MarketingCarrier_6_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_7_AirlineID',
               'MarketingCarrierList_MarketingCarrier_7_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_8_AirlineID',
               'MarketingCarrierList_MarketingCarrier_8_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_9_AirlineID',
               'MarketingCarrierList_MarketingCarrier_9_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_10_AirlineID',
               'MarketingCarrierList_MarketingCarrier_10_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_11_AirlineID',
               'MarketingCarrierList_MarketingCarrier_11_FlightNumber', 'Departure_Terminal_Gate',
                               'Arrival_Terminal_Name', 'Arrival_Terminal_Gate', 'Arrival_Scheduled_Date',
                               'Departure_Terminal_Name', 'Departure_Scheduled_Date', 'Departure_Scheduled_Time', 'Arrival_Scheduled_Date', 'Arrival_Scheduled_Time',
                               'Status_Description', 'Departure_Status_Description', 'Arrival_Status_Description'], axis=1)
        print(df_minimized.count())
except Exception as e:
    logger.error(f"Erreur API {endpoint} : {e}")
    


_id                              416
Departure_AirportCode            416
Departure_Actual_Date            416
Departure_Actual_Time            416
Departure_Status_Code            416
Arrival_AirportCode              416
Arrival_Actual_Date              416
Arrival_Actual_Time              416
Arrival_Status_Code              416
OperatingCarrier_AirlineID       416
OperatingCarrier_FlightNumber    416
Equipment_AircraftCode           416
Status_Code                      416
dtype: int64


In [14]:
print(df_minimized.isna().sum())
print(df_minimized.count())

_id                              0
Departure_AirportCode            0
Departure_Actual_Date            0
Departure_Actual_Time            0
Departure_Status_Code            0
Arrival_AirportCode              0
Arrival_Actual_Date              0
Arrival_Actual_Time              0
Arrival_Status_Code              0
OperatingCarrier_AirlineID       0
OperatingCarrier_FlightNumber    0
Equipment_AircraftCode           0
Status_Code                      0
dtype: int64
_id                              416
Departure_AirportCode            416
Departure_Actual_Date            416
Departure_Actual_Time            416
Departure_Status_Code            416
Arrival_AirportCode              416
Arrival_Actual_Date              416
Arrival_Actual_Time              416
Arrival_Status_Code              416
OperatingCarrier_AirlineID       416
OperatingCarrier_FlightNumber    416
Equipment_AircraftCode           416
Status_Code                      416
dtype: int64


In [15]:
print(df_minimized['Departure_Actual_Date'].unique())
print(df_minimized['Arrival_Actual_Date'].unique())
print(df_minimized['Status_Code'].unique())

df_minimized['Departure_Actual_Datetime'] = pd.to_datetime(df['Departure_Actual_Date'] + ' ' + df['Departure_Actual_Time'])
df_minimized['Arrival_Actual_Datetime'] = pd.to_datetime(df['Arrival_Actual_Date'] + ' ' + df['Arrival_Actual_Time'])
#print(df_minimized.head())

df_w_datetime = df_minimized.drop(['Departure_Actual_Date', 'Departure_Actual_Time', 'Arrival_Actual_Date', 'Arrival_Actual_Time'], axis=1)
#print(df_w_datetime.head())
print(df_w_datetime[['Departure_Actual_Datetime']].drop_duplicates())

['2025-10-03' '2025-10-04' '2025-10-01' '2025-09-30']
['2025-10-04' '2025-10-01']
['LD' 'OT' 'DV']
    Departure_Actual_Datetime
0         2025-10-03 21:13:00
1         2025-10-03 21:24:00
2         2025-10-03 21:21:00
3         2025-10-03 21:26:00
4         2025-10-03 21:27:00
..                        ...
395       2025-10-01 09:44:00
397       2025-10-01 09:04:00
412       2025-10-01 07:52:00
413       2025-10-01 09:19:00
414       2025-10-01 06:40:00

[249 rows x 1 columns]


In [7]:
print(df['dt_txt'].unique())

['2025-09-13 15:00:00' '2025-09-13 18:00:00' '2025-09-13 21:00:00'
 '2025-09-14 00:00:00' '2025-09-14 03:00:00' '2025-09-14 06:00:00'
 '2025-09-14 09:00:00' '2025-09-14 12:00:00' '2025-09-14 15:00:00'
 '2025-09-14 18:00:00' '2025-09-14 21:00:00' '2025-09-15 00:00:00'
 '2025-09-15 03:00:00' '2025-09-15 06:00:00' '2025-09-15 09:00:00'
 '2025-09-15 12:00:00' '2025-09-15 15:00:00' '2025-09-15 18:00:00'
 '2025-09-15 21:00:00' '2025-09-16 00:00:00' '2025-09-16 03:00:00'
 '2025-09-16 06:00:00' '2025-09-16 09:00:00' '2025-09-16 12:00:00'
 '2025-09-16 15:00:00' '2025-09-16 18:00:00' '2025-09-16 21:00:00'
 '2025-09-17 00:00:00' '2025-09-17 03:00:00' '2025-09-17 06:00:00'
 '2025-09-17 09:00:00' '2025-09-17 12:00:00' '2025-09-17 15:00:00'
 '2025-09-17 18:00:00' '2025-09-17 21:00:00' '2025-09-18 00:00:00'
 '2025-09-18 03:00:00' '2025-09-18 06:00:00' '2025-09-18 09:00:00'
 '2025-09-18 12:00:00' '2025-10-02 06:00:00' '2025-10-02 09:00:00'
 '2025-10-02 12:00:00' '2025-10-02 15:00:00' '2025-10-02 18:00

In [32]:
import numpy as np

print(df_w_datetime.head())
df_w_datetime['ts_Departure_Actual_Datetime'] = pd.to_numeric(df_w_datetime['Departure_Actual_Datetime'].values)
df_w_datetime['ts_Arrival_Actual_Datetime'] = pd.to_numeric(df_w_datetime['Arrival_Actual_Datetime'].values)

print(df_w_datetime.columns)

                        _id Departure_AirportCode Departure_Status_Code  \
0  68e27a8913a58ecb37204024                   OUA                    DP   
1  68e27a8913a58ecb37204025                   DSS                    DP   
2  68e27a8913a58ecb37204026                   NSI                    DP   
3  68e27a8913a58ecb37204027                   ROB                    DP   
4  68e27a8913a58ecb37204028                   FIH                    DP   

  Arrival_AirportCode Arrival_Status_Code OperatingCarrier_AirlineID  \
0                 BRU                  LD                         SN   
1                 BRU                  LD                         SN   
2                 BRU                  LD                         SN   
3                 BRU                  LD                         SN   
4                 BRU                  LD                         SN   

  OperatingCarrier_FlightNumber Equipment_AircraftCode Status_Code  \
0                           256               

In [34]:
endpoint = '/weathers-airport?airport_code=BRU'

try:
    response = requests.get(f"{API_BASE_URL}{endpoint}", timeout=10)
    if response.status_code == 200:
        dict_flattened = (flatten(record, '_') for record in response.json())
        weather = pd.DataFrame(dict_flattened)
        print(weather.columns)
except Exception as e:
    logger.error(f"Erreur API {endpoint} : {e}")

Index(['_id', 'dt', 'main_temp', 'main_feels_like', 'main_temp_min',
       'main_temp_max', 'main_pressure', 'main_sea_level', 'main_grnd_level',
       'main_humidity', 'main_temp_kf', 'weather_0_id', 'weather_0_main',
       'weather_0_description', 'weather_0_icon', 'clouds_all', 'wind_speed',
       'wind_deg', 'wind_gust', 'visibility', 'pop', 'rain_3h', 'sys_pod',
       'dt_txt', 'AirportCode'],
      dtype='object')


In [None]:
df_w_datetime = flight.sort_values("ts_Arrival_Actual_Datetime")
weather = weather.sort_values("dt")

merged = pd.merge_asof(
    df_w_datetime,
    weather,
    by="Arrival_AirportCode",  # clé d’aéroport
    left_on="ts_Arrival_Actual_Datetime",
    right_on="dt",
    direction="nearest"  # on cherche le weather le plus proche
)

print(merged)

In [None]:
print(df.loc[df['AirportCode'] == 'BRU' && ])