# Computing Fuel, Time, Spatial, and weather integrals for the corresponding optimal flight


In [None]:
import sys
# This variable should indicate the path from this Jupyter Notebook to the root directory of the repo.
root_path = '../'
# Adds the repo's root to the list of paths
sys.path.append(root_path)

# Package to read yml files
import yaml
# Package to handle file paths
import os
# Package to deal with DataFrames
import pandas as pd
# Package to plot stuff
import matplotlib.pyplot as plt
# Package for numerical and array handling
import numpy as np
# Package to read and write to .sqlite files
import sqlite3
# Package to keep track of time
import datetime

# Function to clear output from jupyter notebook
from IPython.display import clear_output
# Package for compressing dataframes into file
from src.data import compressors
# Package for defining and fitting weather models
from src.models import weather
# Utilities package
from src.common import utils
# Package for interpolating and estimating weather
from src.analysis import weather_interpolator

def sigmoid(x, mu = 0, sig = 1):
    return 1/(1+np.exp(-(x - mu)/sig))

# Time Integral
def integrate_time(state_vectors):
    return state_vectors['time'].iloc[-1] - state_vectors['time'].iloc[0]

def integrate_fuel(state_vectors):
    return state_vectors['used_fuel'].iloc[-1]

# Wind Integral
def integrate_wind(state_vectors):
    speed_of_sound = 666.739
    return np.sum(state_vectors['sknt'])/(speed_of_sound*len(state_vectors))

# Air Density Integral
def integrate_air_density(state_vectors):
    sea_level_density = 1.204
    return np.sum(state_vectors['air_density'])/(sea_level_density*len(state_vectors))

def integrate_air_pressure(state_vectors):
    sea_level_pressure = 1013.25
    return np.sum(state_vectors['air_pressure'])/(sea_level_pressure*len(state_vectors))

def integrate_clouds(state_vectors):
    max_clouds = 1
    return np.sum(state_vectors['clouds'])/(max_clouds*len(state_vectors))

def integrate_severity(state_vectors):
    max_severity = 1
    return np.sum(state_vectors['severity'])/(max_severity*len(state_vectors))


def integrate_distance(state_vectors):
    d = 0
    for row_a, row_b in zip(state_vectors[:-1].itertuples(), state_vectors[1:].itertuples()):
        d += utils.haversine_distance(row_a.lat, row_a.lon, row_b.lat, row_b.lon)
    return d

# Path from this notebook to the root directory
root_path = os.path.normpath(root_path)
# Path from root to the desired config file
config_path_from_root = os.path.normpath('config/config.yml')
# Defining path from this notebook to config file
config_path = os.path.join(root_path, config_path_from_root)

# Loading config file
with open(config_path, 'r',  encoding='utf8') as file:
    config = yaml.safe_load(file)

# Defining "clear-output" function to feed into logger
def clear():
    clear_output(wait=True)

# Creates an instance of a logger class to log all that happens, optional (but encouraged).
logger = utils.Logger(config, clear_function=None)

flights_database = '../data/flight/KLAX_KSFO_2023-01-01_2023-01-31.sqlite'

In [None]:
conn = sqlite3.connect(flights_database)

cursor = conn.cursor()

cursor.execute("DROP TABLE IF EXISTS optimal_flights_integrals;")

new_columns = ['time', 'fuel', 'distance', 'wind', 'air_density', 'air_pressure', 'clouds', 'severity']

# Create the new table
create_table_query = f'''
    CREATE TABLE optimal_flights_integrals (
        flight_id TEXT PRIMARY KEY,
        {", ".join([f"{col} REAL" for col in new_columns])}
    );
'''
# Create the new table if it doesn't exist
cursor.execute(create_table_query) 

flight_ids, icao24s = pd.read_sql_query("SELECT flight_id, icao24 FROM flights", conn).values.T

i = 0
for i, flight_id in enumerate(flight_ids):
    clear_output(wait=True)
    print(f'{i}/{len(flight_ids)} | {flight_id}')
    optimal_state_vectors = pd.read_sql_query(f"""
        SELECT sv.*, svf.*
        FROM state_vectors AS sv
        INNER JOIN (
            SELECT of.flight_id
            FROM flights AS fs
            JOIN flights_aircraft AS fa ON fs.icao24 = fa.icao24
            JOIN optimal_flights AS of ON fa.typecode = of.typecode
            WHERE fs.flight_id = "{flight_id}"
        ) AS optimal_flight_id ON sv.flight_id = optimal_flight_id.flight_id
        JOIN state_vector_fuel AS svf ON sv.vector_id = svf.vector_id;
        """,
        conn)

    optimal_state_vectors_weather = pd.read_sql_query(f"""
        SELECT svw.*
        FROM optimal_state_vector_weather AS svw
        WHERE svw.flight_id = "{flight_id}"
        """,
        conn)
    
    if len(optimal_state_vectors) == len(optimal_state_vectors_weather):

        state_vectors = pd.concat([optimal_state_vectors, optimal_state_vectors_weather], axis=1)

        state_vectors = state_vectors.iloc[:, ::-1]

        # Drop duplicated column names, keeping the first occurrence (which is actually the last in the original DataFrame)
        state_vectors = state_vectors.loc[:, ~state_vectors.columns.duplicated(keep='first')]

        # Reverse the column order back to original
        state_vectors = state_vectors.iloc[:, ::-1]

        integrals = {'flight_id': flight_id,
                    'time': integrate_time(state_vectors),
                    'fuel': integrate_fuel(state_vectors),
                    'distance': integrate_distance(state_vectors),
                    'wind': integrate_wind(state_vectors),
                    'air_density': integrate_air_density(state_vectors),
                    'air_pressure': integrate_air_pressure(state_vectors),
                    'clouds': integrate_clouds(state_vectors),
                    'severity': integrate_severity(state_vectors)}

        insert_data = tuple(integrals.values())
        # Creating query to insert new values
        insert_query = f'''
            INSERT INTO optimal_flights_integrals (flight_id, {', '.join(new_columns)})
            VALUES ({', '.join('?' * len(insert_data))})
            ON CONFLICT(flight_id) DO UPDATE SET
            {', '.join([f"{col} = excluded.{col}" for col in new_columns])};
        '''
        cursor.execute(insert_query, insert_data)

        conn.commit()
    else:
        raise ValueError("DataFrames must be the same length")

conn.close()


In [None]:
conn = sqlite3.connect(flights_database)
optimal_state_vectors = pd.read_sql_query(f"""
    SELECT sv.*, svf.*
    FROM state_vectors AS sv
    INNER JOIN (
        SELECT of.flight_id
        FROM flights AS fs
        JOIN flights_aircraft AS fa ON fs.icao24 = fa.icao24
        JOIN optimal_flights AS of ON fa.typecode = of.typecode
        WHERE fs.flight_id = "{flight_ids[0]}"
    ) AS optimal_flight_id ON sv.flight_id = optimal_flight_id.flight_id
    JOIN state_vector_fuel AS svf ON sv.vector_id = svf.vector_id;
    """,
    conn)
conn.close()
optimal_state_vectors

In [None]:
conn = sqlite3.connect(flights_database)
optimal_state_vectors_weather = pd.read_sql_query(f"""
    SELECT *
    FROM optimal_state_vector_weather
    WHERE flight_id = "a8c6e1_1672532770_1672536146_KLAX_KSFO"
    """,
    conn)
conn.close()
optimal_state_vectors_weather

In [None]:
flight_id

In [None]:
import sqlite3
import pandas as pd

flights_database = '../data/flight/KDEN_KSEA_2023-01-01_2023-01-31.sqlite'

flight_id = 'a3649a_1673032170_1673040502_KDEN_KSEA'
conn = sqlite3.connect(flights_database)

integrals = pd.read_sql_query(f"""
    SELECT fi.*
    FROM flights_integrals AS fi
    """,
    conn)

optimal_integrals = pd.read_sql_query(f"""
    SELECT ofi.*
    FROM optimal_flights_integrals AS ofi
""",
    conn)


conn.close()

optimal_integrals

In [None]:
import os
import sqlite3
import pandas as pd

files = ['../data/flight/' + f for f in os.listdir('../data/flight/') if f.endswith('.sqlite')]

for file in files:
    print('=========')
    print(file)
    conn = sqlite3.connect(file)
    cur = conn.cursor()
    cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cur.fetchall()
    tables = [table[0] for table in tables]
    counts = {table:0 for table in tables}
    for table in tables:
        cur.execute(f"SELECT COUNT(*) FROM {table}")
        row_count = cur.fetchone()[0]
        print(f"{table}: {row_count}")
        counts[table] = row_count        
    
    strout = file
    if "state_vector_weather" in tables:
        strout += ' | Weather: Yes'
    else:
        strout += ' | Weather: No'
    if "state_vector_fuel" in tables:
        strout += ' | Fuel: Yes'
    else:
        strout += ' | Fuel: No'
    print(strout)
    
    conn.close()
    

In [None]:
integrals

In [None]:
optimal_integrals

In [None]:
from traffic.core import Flight
typecodes = []
for icao24 in fuels['icao24']:
    df = pd.DataFrame({'icao24':[icao24], 'timestamp':[0]})
    typecodes += [Flight(df).aircraft['typecode']]


In [None]:
fuels['typecode'] = typecodes

In [None]:
Flight(df).aircraft

In [None]:
fig, ax = plt.subplots( figsize = [10, 10/1.62])
aircrafts, counts = np.unique(typecodes, return_counts=True)
thresh = 20
mask = counts >= thresh
aircrafts = aircrafts[mask]
counts = counts[mask]
for i, aicraft_type in enumerate(aircrafts):
    ax.hist(fuels[fuels['typecode'] == aicraft_type]['fuel'], bins = 10, histtype = 'step', label = aicraft_type)
ax.legend()
ax.set_title(f"Fuel Consumption of different aircraft types ({thresh} flights or more)")
ax.set_xlabel('Fuel Consumption (kg)')
ax.set_ylabel('Count')

In [None]:
# We can get optimal flight path
# We need the weather calculation for the optimal path for the time of each of the other flights -> state_vector_weather_optimal
# Integrals for optimal -> flights_integrals_optimal
# flight time Vs weather
# distance Vs flight_time
# Plane Type

In [None]:
import sqlite3
import pandas as pd

file = "../data/flight/KSFO_KLAX_2023-01-01_2023-01-31.sqlite"
conn = sqlite3.connect(file)

query = f"""
    SELECT DISTINCT sv.vector_id, sv.time_normalized, sv.lat, sv.lon, svf.mass, svf.fuelflow
    FROM state_vectors AS sv
    JOIN state_vector_fuel svf ON svf.vector_id = sv.vector_id
    LEFT JOIN flights as fs ON sv.flight_id = fs.flight_id
    WHERE sv.flight_id = "a4c0db_1672545968_1672549575_KSFO_KLAX";"""
df = pd.read_sql_query(query,conn)

conn.close()
df