In [1]:
import sys
from pathlib import Path
from typing import List, Dict

from loguru import logger
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import col

In [2]:
CONF_LOG_PREFIX = 'CONFLOG'
FLST_LOG_PREFIX = 'FLSTLOG'
GEO_LOG_PREFIX = 'GEOLOG'
LOS_LOG_PREFIX = 'LOSLOG'
REG_LOG_PREFIX = 'REGLOG'
LOADING_PATH = '/mnt/shared/repos/metropolis/M2_data_analysis_platform/output'
DATAFRAMES_NAMES = [CONF_LOG_PREFIX, FLST_LOG_PREFIX, GEO_LOG_PREFIX, LOS_LOG_PREFIX, REG_LOG_PREFIX]

Give access to the constants that defines

In [3]:
sys.path.append(str(Path(Path().absolute().parent, 'platform_code')))
from schemas.tables_attributes import *

In [4]:
def load_dataframes(files_names: List[str], loading_path: str, spark: SparkSession) -> Dict[str, DataFrame]:
    """ Loads the dataframes which macht the file names passed by arguments.
    The method read from the config the path were to read the files, which
    matches the folder where the files are saved in `save_dataframes_dict()`.

    :param files_names: list of the names of the files.
    :param loading_path: path were the files are saved.
    :param spark: spark session.
    :return: dictionary with the dataframes loaded from the files, with the
     file name as key.
    """
    dataframes = dict()

    for file_name in files_names:
        file_path = Path(loading_path, f'{file_name.lower()}.parquet')
        logger.info('Loading dataframe from `{}`.', file_path)
        df = spark.read.parquet(str(file_path))
        dataframes[file_name] = df

    return dataframes

In [5]:
spark = SparkSession.builder.appName('Notebook').getOrCreate()

22/03/24 14:08:50 WARN Utils: Your hostname, GRILUX-DEV resolves to a loopback address: 127.0.1.1; using 192.168.2.150 instead (on interface wlp3s0)
22/03/24 14:08:50 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/03/24 14:09:01 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [6]:
input_dataframes = load_dataframes(DATAFRAMES_NAMES, LOADING_PATH, spark)

2022-03-24 14:09:05.518 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/conflog.parquet`.
2022-03-24 14:09:10.541 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/flstlog.parquet`.
2022-03-24 14:09:10.793 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/geolog.parquet`.
2022-03-24 14:09:10.985 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/loslog.parquet`.
2022-03-24 14:09:11.189 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/reglog.parquet`.


For this metrics we are going to work with the combined FLST log and the flight intentions

In [7]:
dataframe = input_dataframes[FLST_LOG_PREFIX]

In [8]:
dataframe

DataFrame[Flight_id: bigint, Scenario: string, ACID: string, Origin_LAT: string, Origin_LON: string, Destination_LAT: string, Destination_LON: string, Baseline_departure_time: int, cruising_speed: double, Vertical_Speed: double, Priority: int, loitering: boolean, Baseline_2D_distance: string, Baseline_vertical_distance: double, Baseline_ascending_distance: double, Baseline_3D_distance: double, Baseline_flight_time: double, Baseline_arrival_time: double, Deletion_Time: double, Spawn_Time: double, Flight_time: string, Distance_2D: string, Distance_3D: double, Distance_ALT: double, Deletion_LAT: double, Deletion_LON: double, Deletion_ALT: double, Distance_ascend: double, Work_Done: double]

# EFF-1: Horizontal distance route efficiency
Ratio representing the length of the ideal horizontal route to the actual horizontal route

In [9]:
result = dataframe\
    .select(SCENARIO_NAME, ACID, BASELINE_2D_DISTANCE, DISTANCE_2D)\
    .withColumn(EFF1, col(BASELINE_2D_DISTANCE) / col(DISTANCE_2D))\
    .drop(BASELINE_2D_DISTANCE)\
    .drop(DISTANCE_2D)

In [10]:
result.show()

+------------------+----+------------------+
|          Scenario|ACID|              EFF1|
+------------------+----+------------------+
|1_very_low_40_8_R2| D10|0.9608191768054934|
|1_very_low_40_8_R2| D54|1.3723764634171112|
|1_very_low_40_8_R2| D71| 0.986615721143255|
|1_very_low_40_8_R2| D86|0.6955052389554486|
|1_very_low_40_8_R2| D35|0.9702935756455379|
|1_very_low_40_8_R2|D151|0.8564444665672379|
|1_very_low_40_8_R2|D103| 0.717308511613168|
|1_very_low_40_8_R2|  D5|1.1128596443377299|
|1_very_low_40_8_R2| D85|0.9506608012506442|
|1_very_low_40_8_R2|D158|0.8863571170429271|
|1_very_low_40_8_R2|  D9|0.7569072761255073|
|1_very_low_40_8_R2| D95| 0.961903398443339|
|1_very_low_40_8_R2| D13|1.4695376745367321|
|1_very_low_40_8_R2|D165|0.7777560566330564|
|1_very_low_40_8_R2| D51|0.6212689771532777|
|1_very_low_40_8_R2|D111|0.9290942686696004|
|1_very_low_40_8_R2| D80| 0.779269642015952|
|1_very_low_40_8_R2|D243| 0.773636817073662|
|1_very_low_40_8_R2|D182|0.8608714171529597|
|1_very_lo

                                                                                

# EFF-2: Vertical distance route efficiency
Ratio representing the length of the ideal vertical route to the actual vertical route

In [11]:
result = dataframe\
    .select(SCENARIO_NAME, ACID, DISTANCE_ALT, BASELINE_VERTICAL_DISTANCE)\
    .withColumn(EFF2, col(BASELINE_VERTICAL_DISTANCE) / col(DISTANCE_ALT))\
    .select(SCENARIO_NAME, ACID, EFF2)

In [12]:
result.show()

+------------------+----+-------------------+
|          Scenario|ACID|               EFF2|
+------------------+----+-------------------+
|1_very_low_40_8_R2| D10| 0.7488896774419824|
|1_very_low_40_8_R2| D54| 0.8649445332694427|
|1_very_low_40_8_R2| D71|  2.342047441293653|
|1_very_low_40_8_R2| D86| 0.2979610377208245|
|1_very_low_40_8_R2| D35|0.25603549266413506|
|1_very_low_40_8_R2|D151| 0.2064746954420814|
|1_very_low_40_8_R2|D103| 0.4010196325176394|
|1_very_low_40_8_R2|  D5|0.39121434301321506|
|1_very_low_40_8_R2| D85|0.15469155908233978|
|1_very_low_40_8_R2|D158| 0.3421364861134499|
|1_very_low_40_8_R2|  D9| 0.3215113433756687|
|1_very_low_40_8_R2| D95| 0.1606758841779672|
|1_very_low_40_8_R2| D13| 0.4214412151667422|
|1_very_low_40_8_R2|D165|0.12044049666368782|
|1_very_low_40_8_R2| D51|0.13773516241650005|
|1_very_low_40_8_R2|D111|0.17076084286527474|
|1_very_low_40_8_R2| D80|0.15023714557836682|
|1_very_low_40_8_R2|D243|0.21215686411787774|
|1_very_low_40_8_R2|D182|0.1844062

# EFF-3: Ascending route efficiency
Ratio representing the length of the ascending distance in the ideal route to the length of the ascending distance of the actual route

In [13]:
result = dataframe\
    .select(SCENARIO_NAME, ACID, DISTANCE_ASCEND, BASELINE_ASCENDING_DISTANCE)\
    .withColumn(EFF3, col(BASELINE_ASCENDING_DISTANCE) / col(DISTANCE_ASCEND))\
    .select(SCENARIO_NAME, ACID, EFF3)

In [14]:
result.show()

+------------------+----+-------------------+
|          Scenario|ACID|               EFF3|
+------------------+----+-------------------+
|1_very_low_40_8_R2| D10| 0.7488896774419824|
|1_very_low_40_8_R2| D54| 0.8649445332694427|
|1_very_low_40_8_R2| D71|  2.342047441293653|
|1_very_low_40_8_R2| D86| 0.2979610377208245|
|1_very_low_40_8_R2| D35|0.25603549266413506|
|1_very_low_40_8_R2|D151| 0.2064746954420814|
|1_very_low_40_8_R2|D103| 0.4010196325176394|
|1_very_low_40_8_R2|  D5|0.39121434301321506|
|1_very_low_40_8_R2| D85|0.15469155908233978|
|1_very_low_40_8_R2|D158| 0.3421364861134499|
|1_very_low_40_8_R2|  D9| 0.3215113433756687|
|1_very_low_40_8_R2| D95| 0.1606758841779672|
|1_very_low_40_8_R2| D13| 0.4214412151667422|
|1_very_low_40_8_R2|D165|0.12044049666368782|
|1_very_low_40_8_R2| D51|0.13773516241650005|
|1_very_low_40_8_R2|D111|0.17076084286527474|
|1_very_low_40_8_R2| D80|0.15023714557836682|
|1_very_low_40_8_R2|D243|0.21215686411787774|
|1_very_low_40_8_R2|D182|0.1844062

# EFF-4: 3D distance route efficiency
Ratio representing the 3D length of the ideal route to the 3D length of the actual route

In [15]:
result = dataframe\
    .select(SCENARIO_NAME, ACID, DISTANCE_3D, BASELINE_3D_DISTANCE)\
    .withColumn(EFF4, col(BASELINE_3D_DISTANCE) / col(DISTANCE_3D))\
    .select(SCENARIO_NAME, ACID, EFF4)

In [16]:
result.show()

+------------------+----+------------------+
|          Scenario|ACID|              EFF4|
+------------------+----+------------------+
|1_very_low_40_8_R2| D10|0.9442787850499214|
|1_very_low_40_8_R2| D54|1.3615898651835148|
|1_very_low_40_8_R2| D71|0.9895395564939459|
|1_very_low_40_8_R2| D86|0.6912133709592109|
|1_very_low_40_8_R2| D35|0.9502777218974685|
|1_very_low_40_8_R2|D151|0.8214195002723466|
|1_very_low_40_8_R2|D103|0.7096781312791104|
|1_very_low_40_8_R2|  D5|1.1002670228183125|
|1_very_low_40_8_R2| D85| 0.923034908493389|
|1_very_low_40_8_R2|D158|0.8691639321328346|
|1_very_low_40_8_R2|  D9| 0.750372462025529|
|1_very_low_40_8_R2| D95|  0.94478907088306|
|1_very_low_40_8_R2| D13|1.4483626924169273|
|1_very_low_40_8_R2|D165|0.7360279670033341|
|1_very_low_40_8_R2| D51|0.6023764442497584|
|1_very_low_40_8_R2|D111|0.9053087706922391|
|1_very_low_40_8_R2| D80|0.7642739947817306|
|1_very_low_40_8_R2|D243|0.7514584613834485|
|1_very_low_40_8_R2|D182|0.8456303479542867|
|1_very_lo

# EFF-5: Route duration efficiency
Ratio representing the time duration of the ideal route to the time duration of the actual route

In [17]:
result = dataframe\
    .select(SCENARIO_NAME, ACID, FLIGHT_TIME, BASELINE_FLIGHT_TIME)\
    .withColumn(EFF5, col(BASELINE_FLIGHT_TIME) / col(FLIGHT_TIME))\
    .select(SCENARIO_NAME, ACID, EFF5)

In [18]:
result.show()

+------------------+----+-------------------+
|          Scenario|ACID|               EFF5|
+------------------+----+-------------------+
|1_very_low_40_8_R2| D10| 0.6686367592067566|
|1_very_low_40_8_R2| D54| 1.0046308421667616|
|1_very_low_40_8_R2| D71| 0.7236735625065744|
|1_very_low_40_8_R2| D86| 0.4377566792578369|
|1_very_low_40_8_R2| D35|  0.694186295648401|
|1_very_low_40_8_R2|D151|0.41192136735883295|
|1_very_low_40_8_R2|D103| 0.4627735822907179|
|1_very_low_40_8_R2|  D5| 0.9924904188378142|
|1_very_low_40_8_R2| D85| 0.5024162423372425|
|1_very_low_40_8_R2|D158| 0.4595177209632804|
|1_very_low_40_8_R2|  D9| 0.7019232154215249|
|1_very_low_40_8_R2| D95| 0.5996442976574768|
|1_very_low_40_8_R2| D13| 1.3448234975739657|
|1_very_low_40_8_R2|D165|0.33477565577859153|
|1_very_low_40_8_R2| D51|0.48363602264578465|
|1_very_low_40_8_R2|D111| 0.6255753200329853|
|1_very_low_40_8_R2| D80| 0.5071576713561103|
|1_very_low_40_8_R2|D243|0.32289081029585354|
|1_very_low_40_8_R2|D182|0.4004472

# EFF-6: Departure delay
Time duration from the planned departure time until the actual departure time of the aircraft

In [23]:
result = dataframe\
    .select(SCENARIO_NAME, ACID, SPAWN_TIME, BASELINE_DEPARTURE_TIME)\
    .withColumn(EFF6, col(SPAWN_TIME) - col(BASELINE_DEPARTURE_TIME))\
    .select(SCENARIO_NAME, ACID, EFF6)

In [25]:
result.show()

+------------------+----+----------+-----------------------+----+
|          Scenario|ACID|Spawn_Time|Baseline_departure_time|EFF5|
+------------------+----+----------+-----------------------+----+
|1_very_low_40_8_R2| D10|       8.0|                      8| 0.0|
|1_very_low_40_8_R2| D54|      35.0|                     35| 0.0|
|1_very_low_40_8_R2| D71|      45.0|                     45| 0.0|
|1_very_low_40_8_R2| D86|      57.0|                     57| 0.0|
|1_very_low_40_8_R2| D35|      21.0|                     21| 0.0|
|1_very_low_40_8_R2|D151|     104.0|                    104| 0.0|
|1_very_low_40_8_R2|D103|      68.0|                     68| 0.0|
|1_very_low_40_8_R2|  D5|       6.0|                      6| 0.0|
|1_very_low_40_8_R2| D85|      57.0|                     57| 0.0|
|1_very_low_40_8_R2|D158|     110.0|                    110| 0.0|
|1_very_low_40_8_R2|  D9|       8.0|                      8| 0.0|
|1_very_low_40_8_R2| D95|      63.0|                     63| 0.0|
|1_very_lo