# IMPORTANT

## This notebook must be run from the <code>acropole</code> conda environment as per https://github.com/DGAC/Acropole

## Do NOT install anything else except <code>conda install -c conda-forge tqdm</code>

In [4]:
import os
import joblib
import warnings
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from copy import deepcopy as copy
from acropole import FuelEstimator
from joblib import Parallel, delayed

warnings.filterwarnings("ignore", message=".*Aircraft.*", category=UserWarning)
warnings.filterwarnings("ignore", message=".*A worker stopped .*", category=UserWarning)

data_folder = os.path.join(os.getcwd(), "data")
flights_folder = os.path.join(os.getcwd(), "flightDfs")
cha_df = pd.read_csv(os.path.join(data_folder, "challenge_set.csv"))
final_sub_df = pd.read_csv(os.path.join(data_folder, "final_submission_set.csv"))
aircraft_df = pd.read_excel("FAA-Aircraft-Char-DB-AC-150-5300-13B-App-2023-09-07.xlsx")
acropole_folder = "../../../Acropole/acropole/data/"
acropole_csv_path = os.path.join(acropole_folder, "aircraft_params.csv")
acropole_df = pd.read_csv(acropole_csv_path)

In [10]:
replace_for_acropole_dict = {}
relevant_actypes = set(cha_df.aircraft_type).union(set(final_sub_df.aircraft_type))
missing_acropole_aircraft_list = np.array(
    sorted(list(relevant_actypes.difference(set(acropole_df.ACFT_ICAO_TYPE))))
)
for missing_acropole_aircraft in missing_acropole_aircraft_list:
    missing_acropole_aircraft = missing_acropole_aircraft.item()
    aircraft_row_df = aircraft_df.loc[aircraft_df.ICAO_Code==missing_acropole_aircraft]
    for available_acropole_actype in acropole_df.ACFT_ICAO_TYPE:
        col_1_similar_bool = aircraft_row_df.Model_FAA.str.contains(available_acropole_actype).item()
        col_2_similar_bool = aircraft_row_df.Model_BADA.str.contains(available_acropole_actype).item()
        if col_1_similar_bool or col_2_similar_bool:
            replace_for_acropole_dict[missing_acropole_aircraft] = available_acropole_actype

In [14]:
def process_flight_fuel_batch(flight_ids_batch, replace_for_acropole_dict, meta_flights_df, flights_folder):
    flight_fuel_data = []
    for flight_id in flight_ids_batch:
        flight_df = joblib.load(os.path.join(flights_folder, str(flight_id)))
        fuel_df = flight_df[
            ["groundspeed", "altitude", "vertical_rate", "TAS"]
        ].copy()
        fuel_df["typecode"] = meta_flights_df.loc[meta_flights_df.flight_id==flight_id, "aircraft_type"].item()
        fuel_df["second"] = fuel_df.index.values.astype(float)
        fuel_df = fuel_df.rename(columns={"TAS": "airspeed"})

        
        # without replacing
        fe = FuelEstimator()
        flight_fuel = fe.estimate(fuel_df, timestamp="second")
        total_fuel_kg_1 = flight_fuel.fuel_flow.sum().item()
        if total_fuel_kg_1==0:
            total_fuel_kg_1 = np.nan
            
        # with replaciong
        fe = FuelEstimator()
        fuel_df["typecode"] = fuel_df["typecode"].replace(replace_for_acropole_dict)
        flight_fuel = fe.estimate(fuel_df, timestamp="second")
        total_fuel_kg_2 = flight_fuel.fuel_flow.sum().item()
        if total_fuel_kg_2==0:
            total_fuel_kg_2 = np.nan
        flight_fuel_data.append(
            [
                flight_id, total_fuel_kg_1, total_fuel_kg_2
            ]
        )
    flight_fuel_df = pd.DataFrame(
        data=flight_fuel_data,
        columns=["flight_id", "total_fuel", "total_fuel_replace"]
    )
    return flight_fuel_df

In [None]:
flight_ids = sorted([int(item) for item in os.listdir(flights_folder) if "." not in item])

num_batches = 1000
flight_ids_batches = [
    flight_ids[i*(len(flight_ids)//num_batches):(i+1)*(len(flight_ids)//num_batches)]
    for i in range(num_batches)
]
if len(flight_ids)%num_batches:
    missing_flight_ids = flight_ids[num_batches*(len(flight_ids)//num_batches):]
    for i in range(len(missing_flight_ids)):
        missing_flight_id = missing_flight_ids[i]
        flight_ids_batches[i] = flight_ids_batches[i] + [missing_flight_id]

all_flight_fuel_df_list = Parallel(n_jobs=12)(
    delayed(        
        process_flight_fuel_batch
    )(
        flights_folder=copy(flights_folder),
        flight_ids_batch=copy(flight_ids_batch), 
        replace_for_acropole_dict=copy(replace_for_acropole_dict),
        meta_flights_df=cha_df.loc[
            cha_df.flight_id.isin(flight_ids_batch).values.astype(bool), 
            ["flight_id", "aircraft_type"]
        ].copy(),
    )
    for flight_ids_batch in tqdm(flight_ids_batches)
)
flight_fuel_df = pd.concat(all_flight_fuel_df_list, axis=0)
joblib.dump(flight_fuel_df, f"flight_fuel_df.pkl")