In [94]:
import sys
from pathlib import Path
from typing import List, Dict

from loguru import logger
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import col, mean, length, lit

In [2]:
CONF_LOG_PREFIX = 'CONFLOG'
FLST_LOG_PREFIX = 'FLSTLOG'
GEO_LOG_PREFIX = 'GEOLOG'
LOS_LOG_PREFIX = 'LOSLOG'
REG_LOG_PREFIX = 'REGLOG'
LOADING_PATH = '/mnt/shared/repos/metropolis/M2_data_analysis_platform/output'
DATAFRAMES_NAMES = [CONF_LOG_PREFIX, FLST_LOG_PREFIX, GEO_LOG_PREFIX, LOS_LOG_PREFIX, REG_LOG_PREFIX]

Give access to the constants that defines

In [78]:
sys.path.append(str(Path(Path().absolute().parent, 'platform_code')))
from schemas.tables_attributes import *

In [4]:
def load_dataframes(files_names: List[str], loading_path: str, spark: SparkSession) -> Dict[str, DataFrame]:
    """ Loads the dataframes which macht the file names passed by arguments.
    The method read from the config the path were to read the files, which
    matches the folder where the files are saved in `save_dataframes_dict()`.

    :param files_names: list of the names of the files.
    :param loading_path: path were the files are saved.
    :param spark: spark session.
    :return: dictionary with the dataframes loaded from the files, with the
     file name as key.
    """
    dataframes = dict()

    for file_name in files_names:
        file_path = Path(loading_path, f'{file_name.lower()}.parquet')
        logger.info('Loading dataframe from `{}`.', file_path)
        df = spark.read.parquet(str(file_path))
        dataframes[file_name] = df

    return dataframes

In [5]:
spark = SparkSession.builder.appName('Notebook').getOrCreate()

22/04/04 16:46:07 WARN Utils: Your hostname, GRILUX-DEV resolves to a loopback address: 127.0.1.1; using 192.168.2.150 instead (on interface wlp3s0)
22/04/04 16:46:07 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/04/04 16:46:18 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [6]:
input_dataframes = load_dataframes(DATAFRAMES_NAMES, LOADING_PATH, spark)

2022-04-04 16:46:21.893 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/conflog.parquet`.
2022-04-04 16:46:27.201 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/flstlog.parquet`.
2022-04-04 16:46:27.541 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/geolog.parquet`.
2022-04-04 16:46:27.816 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/loslog.parquet`.
2022-04-04 16:46:28.078 | INFO     | __main__:load_dataframes:16 - Loading dataframe from `/mnt/shared/repos/metropolis/M2_data_analysis_platform/output/reglog.parquet`.


For this metrics we are going to work with the combined FLST log and the flight intentions

# CAP-1: Average demand delay
Although a flight intention delay represents direct measure of the system efficiency, it can also be used as a proxy for the lack of the system capacity as previously explained. The more flights are delayed, the bigger the capacity problem is. Hence, it can be used for the relative comparison of the concepts

Average demand delay is computed as the arithmetic mean of the delays of all flight intentions in an scenario.

In [7]:
dataframe = input_dataframes[FLST_LOG_PREFIX]

In [8]:
cap_1_result = dataframe\
    .select(SCENARIO_NAME, BASELINE_ARRIVAL_TIME, DEL_TIME)\
    .groupby(SCENARIO_NAME)\
    .agg(mean(col(DEL_TIME) - col(BASELINE_ARRIVAL_TIME)).alias(CAP1))

In [9]:
cap_1_result.show()

[Stage 5:>                                                          (0 + 4) / 4]

+------------------+------------------+
|          Scenario|              CAP1|
+------------------+------------------+
|1_very_low_40_8_R2|173.02193691854063|
|3_very_low_40_8_R2|173.02193691854063|
|2_very_low_40_8_R1|173.02193691854063|
|2_very_low_40_8_W1|173.02193691854063|
|1_very_low_40_8_R1|173.02193691854063|
|3_very_low_40_8_W1|173.02193691854063|
|3_very_low_40_8_R1|173.02193691854063|
|1_very_low_40_8_W1|173.02193691854063|
|2_very_low_40_8_R2|173.02193691854063|
|   1_very_low_40_8|173.02193691854063|
|   2_very_low_40_8|173.02193691854063|
|   3_very_low_40_8|173.02193691854063|
+------------------+------------------+



                                                                                

# CAP-2: Average number of intrusions
Is a ratio of the total number of intrusions with respect of the number of flight intention in the scenario.

In [10]:
los_log_df = input_dataframes[LOS_LOG_PREFIX]
flst_log_df = input_dataframes[FLST_LOG_PREFIX]

In [11]:
saf_2_result = los_log_df\
    .groupBy(SCENARIO_NAME)\
    .count()\
    .select(SCENARIO_NAME, col('count').alias(SAF2))

In [12]:
saf_2_result.show()

+------------------+----+
|          Scenario|SAF2|
+------------------+----+
|2_very_low_40_8_W1| 785|
|3_very_low_40_8_W1| 785|
|2_very_low_40_8_R1| 785|
|3_very_low_40_8_R1| 785|
|1_very_low_40_8_W1| 785|
|1_very_low_40_8_R1| 785|
|1_very_low_40_8_R2| 785|
|3_very_low_40_8_R2| 785|
|2_very_low_40_8_R2| 785|
|   1_very_low_40_8| 785|
|   2_very_low_40_8| 785|
|   3_very_low_40_8| 785|
+------------------+----+



In [13]:
number_of_flights = flst_log_df\
    .groupby(SCENARIO_NAME)\
    .count()\
    .select([SCENARIO_NAME, col('count').alias('num_flights')])

In [14]:
number_of_flights.show()

+------------------+-----------+
|          Scenario|num_flights|
+------------------+-----------+
|1_very_low_40_8_R2|       3760|
|3_very_low_40_8_R2|       3760|
|2_very_low_40_8_R1|       3760|
|2_very_low_40_8_W1|       3760|
|1_very_low_40_8_R1|       3760|
|3_very_low_40_8_W1|       3760|
|3_very_low_40_8_R1|       3760|
|1_very_low_40_8_W1|       3760|
|2_very_low_40_8_R2|       3760|
|   1_very_low_40_8|       3760|
|   2_very_low_40_8|       3760|
|   3_very_low_40_8|       3760|
+------------------+-----------+



In [15]:
cap_2_result = saf_2_result.join(number_of_flights, on=SCENARIO_NAME)\
    .withColumn(CAP2, col(SAF2) / col('num_flights'))\
    .select(SCENARIO_NAME, CAP2)

In [16]:
cap_2_result.show()

+------------------+-------------------+
|          Scenario|               CAP2|
+------------------+-------------------+
|1_very_low_40_8_R2|0.20877659574468085|
|3_very_low_40_8_R2|0.20877659574468085|
|2_very_low_40_8_R1|0.20877659574468085|
|2_very_low_40_8_W1|0.20877659574468085|
|1_very_low_40_8_R1|0.20877659574468085|
|3_very_low_40_8_W1|0.20877659574468085|
|3_very_low_40_8_R1|0.20877659574468085|
|1_very_low_40_8_W1|0.20877659574468085|
|2_very_low_40_8_R2|0.20877659574468085|
|   1_very_low_40_8|0.20877659574468085|
|   2_very_low_40_8|0.20877659574468085|
|   3_very_low_40_8|0.20877659574468085|
+------------------+-------------------+



Join the results

In [85]:
results = cap_1_result.join(cap_2_result,
                            on=SCENARIO_NAME)

In [86]:
results.show()

+------------------+------------------+-------------------+
|          Scenario|              CAP1|               CAP2|
+------------------+------------------+-------------------+
|1_very_low_40_8_R2|173.02193691854063|0.20877659574468085|
|3_very_low_40_8_R2|173.02193691854063|0.20877659574468085|
|2_very_low_40_8_R1|173.02193691854063|0.20877659574468085|
|2_very_low_40_8_W1|173.02193691854063|0.20877659574468085|
|1_very_low_40_8_R1|173.02193691854063|0.20877659574468085|
|3_very_low_40_8_W1|173.02193691854063|0.20877659574468085|
|3_very_low_40_8_R1|173.02193691854063|0.20877659574468085|
|1_very_low_40_8_W1|173.02193691854063|0.20877659574468085|
|2_very_low_40_8_R2|173.02193691854063|0.20877659574468085|
|   1_very_low_40_8|173.02193691854063|0.20877659574468085|
|   2_very_low_40_8|173.02193691854063|0.20877659574468085|
|   3_very_low_40_8|173.02193691854063|0.20877659574468085|
+------------------+------------------+-------------------+



# CAP-3 and CAP-4: Relating rogue scenarios with baseline ones
These metric uses the CAP-1 and CAP-2 and compares the baseline without uncertainties with the scenarios where it exists of a rogue carrier, in this case, concretely the ones that end in R1, R2 or R3.

Pick those scenarios with a rogue, that are such of them that finish wth R1, R2 or R3

In [87]:
scenarios_with_rogue = results.where(col(SCENARIO_NAME).rlike('.*_R[1,2,3]'))

In [88]:
scenarios_with_rogue.show()

+------------------+------------------+-------------------+
|          Scenario|              CAP1|               CAP2|
+------------------+------------------+-------------------+
|1_very_low_40_8_R2|173.02193691854063|0.20877659574468085|
|3_very_low_40_8_R2|173.02193691854063|0.20877659574468085|
|2_very_low_40_8_R1|173.02193691854063|0.20877659574468085|
|1_very_low_40_8_R1|173.02193691854063|0.20877659574468085|
|3_very_low_40_8_R1|173.02193691854063|0.20877659574468085|
|2_very_low_40_8_R2|173.02193691854063|0.20877659574468085|
+------------------+------------------+-------------------+



Pick those scenarios without uncertainty, which are those without rogues or wind.
Rename the column to identify it after join.

In [112]:
scenarios_without_uncertainty = results\
    .where(col(SCENARIO_NAME).rlike('.*_[R|W][1,2,3,5]') == False)\
    .select(col(SCENARIO_NAME).alias(f'Ref_{SCENARIO_NAME}'), col(CAP1).alias(f'Ref_{CAP1}'), col(CAP2).alias(f'Ref_{CAP2}'))

In [113]:
scenarios_without_uncertainty.show()

+------------+--------+--------+
|Ref_Scenario|Ref_CAP1|Ref_CAP2|
+------------+--------+--------+
+------------+--------+--------+



Now, relate both types of scenarios by removing the uncertainty part of the scenario name.
Thus, we remove the trailing '_R1', '_R2', '_R3' from the scenario names of the executions with uncertainty.

In [95]:
scenarios_with_rogue = scenarios_with_rogue\
    .withColumn(f'{SCENARIO_NAME}_LENGTH', length(col(SCENARIO_NAME)))\
    .withColumn(f'Ref_{SCENARIO_NAME}', (col(SCENARIO_NAME).substr(lit(0), col(f'{SCENARIO_NAME}_LENGTH') - lit(3))))\
    .drop(f'{SCENARIO_NAME}_LENGTH')

In [96]:
scenarios_with_rogue.show()

+------------------+------------------+-------------------+---------------+
|          Scenario|              CAP1|               CAP2|   Ref_Scenario|
+------------------+------------------+-------------------+---------------+
|1_very_low_40_8_R2|173.02193691854063|0.20877659574468085|1_very_low_40_8|
|3_very_low_40_8_R2|173.02193691854063|0.20877659574468085|3_very_low_40_8|
|2_very_low_40_8_R1|173.02193691854063|0.20877659574468085|2_very_low_40_8|
|1_very_low_40_8_R1|173.02193691854063|0.20877659574468085|1_very_low_40_8|
|3_very_low_40_8_R1|173.02193691854063|0.20877659574468085|3_very_low_40_8|
|2_very_low_40_8_R2|173.02193691854063|0.20877659574468085|2_very_low_40_8|
+------------------+------------------+-------------------+---------------+



Join the tables, to have the reference CAP1 and CAP2 metrics

In [97]:
scenarios = scenarios_with_rogue.join(scenarios_without_uncertainty,
                                      on=f'Ref_{SCENARIO_NAME}')

In [98]:
scenarios.show()

                                                                                

+---------------+------------------+------------------+-------------------+------------------+-------------------+
|   Ref_Scenario|          Scenario|              CAP1|               CAP2|          Ref_CAP1|           Ref_CAP2|
+---------------+------------------+------------------+-------------------+------------------+-------------------+
|1_very_low_40_8|1_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085|
|3_very_low_40_8|3_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085|
|2_very_low_40_8|2_very_low_40_8_R1|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085|
|1_very_low_40_8|1_very_low_40_8_R1|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085|
|3_very_low_40_8|3_very_low_40_8_R1|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085|
|2_very_low_40_8|2_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.0

# CAP-3: Additional demand delay
Calculates the magnitude of delay increase (CAP-1) due to the fact of the existence of rogue aircraft.

In [103]:
results = scenarios\
    .withColumn('CAP3', col(f'Ref_{CAP1}') - col(CAP1))

In [104]:
results.show()

                                                                                

+---------------+------------------+------------------+-------------------+------------------+-------------------+----+
|   Ref_Scenario|          Scenario|              CAP1|               CAP2|          Ref_CAP1|           Ref_CAP2|CAP3|
+---------------+------------------+------------------+-------------------+------------------+-------------------+----+
|1_very_low_40_8|1_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0|
|1_very_low_40_8|1_very_low_40_8_R1|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0|
|2_very_low_40_8|2_very_low_40_8_R1|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0|
|2_very_low_40_8|2_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0|
|3_very_low_40_8|3_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0|
|3_very_low_40_8|3_very_low_40_8_R1|173.

# CAP-4: Additional number of intrusions
Calculates the degradation produced in the intrusion safety indicator when rogue aircraft are introduced.

In [105]:
results = results\
    .withColumn('CAP4', col(f'Ref_{CAP2}') - col(CAP2))

In [106]:
results.show()

+---------------+------------------+------------------+-------------------+------------------+-------------------+----+----+
|   Ref_Scenario|          Scenario|              CAP1|               CAP2|          Ref_CAP1|           Ref_CAP2|CAP3|CAP4|
+---------------+------------------+------------------+-------------------+------------------+-------------------+----+----+
|1_very_low_40_8|1_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|1_very_low_40_8|1_very_low_40_8_R1|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|2_very_low_40_8|2_very_low_40_8_R1|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|2_very_low_40_8|2_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|3_very_low_40_8|3_very_low_40_8_R2|173.02193691854063|0.20877659574468085|173.02193691854063|0.20877659574468085| 0.0| 0.0|


Removed unnecessary columns

In [107]:
results = results.drop(f'Ref_{SCENARIO_NAME}', f'Ref_{CAP1}', f'Ref_{CAP2}')

In [108]:
results.show()

+------------------+------------------+-------------------+----+----+
|          Scenario|              CAP1|               CAP2|CAP3|CAP4|
+------------------+------------------+-------------------+----+----+
|1_very_low_40_8_R2|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|1_very_low_40_8_R1|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|2_very_low_40_8_R1|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|2_very_low_40_8_R2|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|3_very_low_40_8_R2|173.02193691854063|0.20877659574468085| 0.0| 0.0|
|3_very_low_40_8_R1|173.02193691854063|0.20877659574468085| 0.0| 0.0|
+------------------+------------------+-------------------+----+----+

