In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, accuracy_score, classification_report
import asyncpg
import asyncio
import joblib
import logging
import requests
import os
from pymongo import MongoClient
from flatten_json import flatten

client_vps = MongoClient(
    host = "mongodb",
    port = 27017,
    username = "datascientest",
    password = "dst123"
)
mongodb_client = client_vps

API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")  # Utiliser l'IP externe si configuré
endpoint = '/flights-all'

try:
    response = requests.get(f"{API_BASE_URL}{endpoint}", timeout=10)
    if response.status_code == 200:
        dict_flattened = (flatten(record, '_') for record in response.json())
        df = pd.DataFrame(dict_flattened)
        #print(df.head())
        #print(df.columns)
        df_minimized = df.drop(['MarketingCarrierList_MarketingCarrier_AirlineID',
               'MarketingCarrierList_MarketingCarrier_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_0_AirlineID',
               'MarketingCarrierList_MarketingCarrier_0_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_1_AirlineID',
               'MarketingCarrierList_MarketingCarrier_1_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_2_AirlineID',
               'MarketingCarrierList_MarketingCarrier_2_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_3_AirlineID',
               'MarketingCarrierList_MarketingCarrier_3_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_4_AirlineID',
               'MarketingCarrierList_MarketingCarrier_4_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_5_AirlineID',
               'MarketingCarrierList_MarketingCarrier_5_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_6_AirlineID',
               'MarketingCarrierList_MarketingCarrier_6_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_7_AirlineID',
               'MarketingCarrierList_MarketingCarrier_7_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_8_AirlineID',
               'MarketingCarrierList_MarketingCarrier_8_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_9_AirlineID',
               'MarketingCarrierList_MarketingCarrier_9_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_10_AirlineID',
               'MarketingCarrierList_MarketingCarrier_10_FlightNumber',
               'MarketingCarrierList_MarketingCarrier_11_AirlineID',
               'MarketingCarrierList_MarketingCarrier_11_FlightNumber', 'Departure_Terminal_Gate',
                               'Arrival_Terminal_Name', 'Arrival_Terminal_Gate', 'Arrival_Scheduled_Date',
                               'Departure_Terminal_Name', 'Departure_Scheduled_Date', 'Departure_Scheduled_Time', 'Arrival_Scheduled_Date', 'Arrival_Scheduled_Time',
                               'Status_Description', 'Departure_Status_Description', 'Arrival_Status_Description'], axis=1)
        print(df_minimized.count())
except Exception as e:
    logger.error(f"Erreur API {endpoint} : {e}")
    


_id                                       1140
Departure_AirportCode                     1140
Departure_Actual_Date                     1139
Departure_Actual_Time                     1139
Departure_Status_Code                     1140
Departure_weather_time                    1125
Departure_weather_rain                    1125
Departure_weather_snowfall                1125
Departure_weather_temperature_2m          1125
Departure_weather_relative_humidity_2m    1125
Departure_weather_wind_speed_100m         1125
Departure_weather_cloud_cover             1125
Arrival_AirportCode                       1140
Arrival_Actual_Date                       1140
Arrival_Actual_Time                       1140
Arrival_Status_Code                       1140
Arrival_weather_time                      1107
Arrival_weather_rain                      1107
Arrival_weather_snowfall                  1107
Arrival_weather_temperature_2m            1107
Arrival_weather_relative_humidity_2m      1107
Arrival_weath

In [2]:
print(df_minimized.isna().sum())
print(df_minimized.count())

_id                                          0
Departure_AirportCode                        0
Departure_Actual_Date                        1
Departure_Actual_Time                        1
Departure_Status_Code                        0
Departure_weather_time                      15
Departure_weather_rain                      15
Departure_weather_snowfall                  15
Departure_weather_temperature_2m            15
Departure_weather_relative_humidity_2m      15
Departure_weather_wind_speed_100m           15
Departure_weather_cloud_cover               15
Arrival_AirportCode                          0
Arrival_Actual_Date                          0
Arrival_Actual_Time                          0
Arrival_Status_Code                          0
Arrival_weather_time                        33
Arrival_weather_rain                        33
Arrival_weather_snowfall                    33
Arrival_weather_temperature_2m              33
Arrival_weather_relative_humidity_2m        33
Arrival_weath

In [3]:
print(df_minimized['Departure_Actual_Date'].unique())
print(df_minimized['Arrival_Actual_Date'].unique())
print(df_minimized['Status_Code'].unique())

df_minimized['Departure_Actual_Datetime'] = pd.to_datetime(df['Departure_Actual_Date'] + ' ' + df['Departure_Actual_Time'])
df_minimized['Arrival_Actual_Datetime'] = pd.to_datetime(df['Arrival_Actual_Date'] + ' ' + df['Arrival_Actual_Time'])
#print(df_minimized.head())

df_w_datetime = df_minimized.drop(['Departure_Actual_Date', 'Departure_Actual_Time', 'Arrival_Actual_Date', 'Arrival_Actual_Time'], axis=1)
#print(df_w_datetime.head())
print(df_w_datetime[['Departure_Actual_Datetime']].drop_duplicates())

['2025-10-10' nan '2025-10-09']
['2025-10-10' '2025-10-11' '2025-10-09']
['LD' 'OT' 'FE' 'DL' 'DV']
     Departure_Actual_Datetime
0          2025-10-10 05:56:00
1          2025-10-10 06:11:00
2          2025-10-10 06:26:00
3          2025-10-10 06:33:00
4          2025-10-10 06:53:00
...                        ...
1129       2025-10-09 20:14:00
1131       2025-10-09 21:54:00
1132       2025-10-09 20:52:00
1135       2025-10-09 22:01:00
1136       2025-10-09 19:55:00

[751 rows x 1 columns]


In [6]:
print(df['dt_txt'].unique())

KeyError: 'dt_txt'

In [4]:
import numpy as np

print(df_w_datetime.head())
df_w_datetime['ts_Departure_Actual_Datetime'] = pd.to_numeric(df_w_datetime['Departure_Actual_Datetime'].values)
df_w_datetime['ts_Arrival_Actual_Datetime'] = pd.to_numeric(df_w_datetime['Arrival_Actual_Datetime'].values)

print(df_w_datetime.columns)

                        _id Departure_AirportCode Departure_Status_Code  \
0  68eafd1b5dfd99e30302f5f9                   VIE                    DP   
1  68eafd1b5dfd99e30302f5fa                   VIE                    DP   
2  68eafd1b5dfd99e30302f5fb                   VIE                    DP   
3  68eafd1b5dfd99e30302f5fc                   VIE                    OT   
4  68eafd1b5dfd99e30302f5fd                   VIE                    DP   

  Departure_weather_time  Departure_weather_rain  Departure_weather_snowfall  \
0    2025-10-10T06:00:00                     0.0                         0.0   
1    2025-10-10T06:00:00                     0.0                         0.0   
2    2025-10-10T06:00:00                     0.0                         0.0   
3    2025-10-10T07:00:00                     0.0                         0.0   
4    2025-10-10T07:00:00                     0.0                         0.0   

   Departure_weather_temperature_2m  Departure_weather_relative_humi

In [7]:
endpoint = '/weathers-airport?airport_code=ORY'

try:
    response = requests.get(f"{API_BASE_URL}{endpoint}", timeout=10)
    if response.status_code == 200:
        dict_flattened = (flatten(record, '_') for record in response.json())
        weather = pd.DataFrame(dict_flattened)
        print(weather)
except Exception as e:
    logger.error(f"Erreur API {endpoint} : {e}")

Empty DataFrame
Columns: []
Index: []


In [22]:
print(weather.where(weather['Arrival_AirportCode'] == 'VIE'))

     _id  dt  main_temp  main_feels_like  main_temp_min  main_temp_max  \
0    NaN NaN        NaN              NaN            NaN            NaN   
1    NaN NaN        NaN              NaN            NaN            NaN   
40   NaN NaN        NaN              NaN            NaN            NaN   
80   NaN NaN        NaN              NaN            NaN            NaN   
2    NaN NaN        NaN              NaN            NaN            NaN   
..   ...  ..        ...              ...            ...            ...   
78   NaN NaN        NaN              NaN            NaN            NaN   
39   NaN NaN        NaN              NaN            NaN            NaN   
118  NaN NaN        NaN              NaN            NaN            NaN   
79   NaN NaN        NaN              NaN            NaN            NaN   
119  NaN NaN        NaN              NaN            NaN            NaN   

     main_pressure  main_sea_level  main_grnd_level  main_humidity  ...  \
0              NaN             NaN  

In [21]:
weather = weather.rename(columns={"AirportCode": "Arrival_AirportCode"})
df_w_datetime = df_w_datetime.sort_values("ts_Arrival_Actual_Datetime")
weather = weather.sort_values("dt")

print(weather.where(weather['Arrival_AirportCode'] == 'VIE'))

merged = pd.merge_asof(
    df_w_datetime,
    weather,
    by="Arrival_AirportCode",  # clé d’aéroport
    left_on="ts_Arrival_Actual_Datetime",
    right_on="dt",
    direction="nearest"  # on cherche le weather le plus proche
)

print(merged)
#print(merged.isna().sum())

                        _id_x Departure_AirportCode Departure_Status_Code  \
0    68e27cdc4ad73782ab0dd7b9                   NSI                    DP   
1    68e27cdc4ad73782ab0dd7ba                   DSS                    DP   
2    68e27d0d4ad73782ab0dd874                   GVA                    OT   
3    68e27cd74ad73782ab0dd790                   BKK                    DP   
4    68e27cd74ad73782ab0dd793                   SKP                    DP   
..                        ...                   ...                   ...   
411  68e27aa713a58ecb3720405f                   AMS                    DP   
412  68e27aa213a58ecb37204054                   FCO                    DP   
413  68e27aae13a58ecb37204065                   MAD                    DP   
414  68e27aba13a58ecb37204073                   GVA                    DP   
415  68e27aba13a58ecb3720407b                   GVA                    DP   

    Arrival_AirportCode Arrival_Status_Code OperatingCarrier_AirlineID  \
0

In [30]:
import time
import json
import pandas as pd
from datetime import datetime, timedelta


API_BASE_URL = "http://localhost:8000"  # Utiliser l'IP externe si configuré

airportCode_dep = df_w_datetime['Departure_AirportCode']
print(airportCode_dep[0])
print(df_w_datetime.loc[0])
endpoint = '/airports-by-code?airport_code='+str(airportCode_dep[0])

try:
  response = requests.get(f"{API_BASE_URL}{endpoint}", timeout=10)
  time.sleep(0.5)
  if response.status_code == 200:
    airport = response.json()
    print(airport[0]['Position']['Coordinate'])

    target_time = df_minimized['Departure_Actual_Datetime']

    start_date = (target_time[0] - timedelta(days=1)).strftime("%Y-%m-%d")
    end_date = (target_time[0] + timedelta(days=1)).strftime("%Y-%m-%d")

    # Requête API
    url = (
        "https://archive-api.open-meteo.com/v1/archive"
        f"?latitude={airport[0]['Position']['Coordinate']['Latitude']}&longitude={airport[0]['Position']['Coordinate']['Longitude']}"
        f"&start_date={start_date}&end_date={end_date}"
        "&hourly=rain,snowfall,temperature_2m,relative_humidity_2m,wind_speed_100m,cloud_cover"
    )
    
    response = requests.get(url)
    data = response.json()
    print(data)
    
    # Transformation en DataFrame
    df = pd.DataFrame(data["hourly"])
    df["time"] = pd.to_datetime(df["time"])
    
    # Trouver la ligne dont le timestamp est le plus proche de target_time
    closest = df.iloc[(df["time"] - target_time).abs().argsort()[:1]]
    
    print("Résultat le plus proche :")
    print(closest)
  else:
    print(airport, response.status_code, response.reason, response.text)
except requests.exceptions.HTTPError as err:
    print(response.status_code)
    print(f"Erreur lors de la requête : {err}")
    data = None  # Gérer l'échec

OUA
_id                              68e27a8913a58ecb37204024
Departure_AirportCode                                 OUA
Departure_Status_Code                                  DP
Arrival_AirportCode                                   BRU
Arrival_Status_Code                                    LD
OperatingCarrier_AirlineID                             SN
OperatingCarrier_FlightNumber                         256
Equipment_AircraftCode                                333
Status_Code                                            LD
Departure_Actual_Datetime             2025-10-03 21:13:00
Arrival_Actual_Datetime               2025-10-04 05:31:00
Name: 0, dtype: object
{'Latitude': 12.3533, 'Longitude': -1.5125}
{'latitude': 12.337434, 'longitude': -1.5419312, 'generationtime_ms': 0.20563602447509766, 'utc_offset_seconds': 0, 'timezone': 'GMT', 'timezone_abbreviation': 'GMT', 'elevation': 305.0, 'hourly_units': {'time': 'iso8601', 'rain': 'mm', 'snowfall': 'cm', 'temperature_2m': '°C', 'relative_hu