# Computing Fuel, Time, Spatial, and weather integrals


In [1]:
import sys
# This variable should indicate the path from this Jupyter Notebook to the root directory of the repo.
root_path = '../'
# Adds the repo's root to the list of paths
sys.path.append(root_path)

# Package to read yml files
import yaml
# Package to handle file paths
import os
# Package to deal with DataFrames
import pandas as pd
# Package to plot stuff
import matplotlib.pyplot as plt
# Package for numerical and array handling
import numpy as np
# Package to read and write to .sqlite files
import sqlite3
# Package to keep track of time
import datetime

# Function to clear output from jupyter notebook
from IPython.display import clear_output
# Package for compressing dataframes into file
from src.data import compressors
# Package for defining and fitting weather models
from src.models import weather
# Utilities package
from src.common import utils
# Package for interpolating and estimating weather
from src.analysis import weather_interpolator

def sigmoid(x, mu = 0, sig = 1):
    return 1/(1+np.exp(-(x - mu)/sig))

# Time Integral
def integrate_time(state_vectors):
    return state_vectors['time_normalized'].iloc[-1]

def integrate_fuel(state_vectors):
    return state_vectors['used_fuel'].iloc[-1]

# Wind Integral
def integrate_wind(state_vectors):
    speed_of_sound = 666.739
    return np.sum(state_vectors['sknt'])/(speed_of_sound*len(state_vectors))

# Air Density Integral
def integrate_air_density(state_vectors):
    sea_level_density = 1.204
    return np.sum(state_vectors['air_density'])/(sea_level_density*len(state_vectors))

def integrate_air_pressure(state_vectors):
    sea_level_pressure = 1013.25
    return np.sum(state_vectors['air_pressure'])/(sea_level_pressure*len(state_vectors))

def integrate_clouds(state_vectors):
    max_clouds = 1
    return np.sum(state_vectors['clouds'])/(max_clouds*len(state_vectors))

def integrate_severity(state_vectors):
    max_severity = 1
    return np.sum(state_vectors['severity'])/(max_severity*len(state_vectors))


def integrate_distance(state_vectors):
    d = 0
    for row_a, row_b in zip(state_vectors[:-1].itertuples(), state_vectors[1:].itertuples()):
        d += utils.haversine_distance(row_a.lat, row_a.lon, row_b.lat, row_b.lon)
    return d

# Path from this notebook to the root directory
root_path = os.path.normpath(root_path)
# Path from root to the desired config file
config_path_from_root = os.path.normpath('config/config.yml')
# Defining path from this notebook to config file
config_path = os.path.join(root_path, config_path_from_root)

# Loading config file
with open(config_path, 'r',  encoding='utf8') as file:
    config = yaml.safe_load(file)

# Defining "clear-output" function to feed into logger
def clear():
    clear_output(wait=True)

# Creates an instance of a logger class to log all that happens, optional (but encouraged).
logger = utils.Logger(config, clear_function=None)

In [4]:

files = ['../data/flight/' + f for f in os.listdir('../data/flight/') if f.endswith('.sqlite')]

for file in files:
    flights_database = file

    conn = sqlite3.connect(flights_database)

    cursor = conn.cursor()

    table_name = 'state_vector_weather'
    
    query = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';"
    
    # Execute the query
    cursor.execute(query)
    
    result = cursor.fetchone()
    
    if result:
        
        cursor.execute("DROP TABLE IF EXISTS flights_integrals;")

        new_columns = ['time', 'fuel', 'distance', 'wind', 'air_density', 'air_pressure', 'clouds', 'severity']

        # Create the new table
        create_table_query = f'''
            CREATE TABLE flights_integrals (
                flight_id TEXT PRIMARY KEY,
                {", ".join([f"{col} REAL" for col in new_columns])}
            );
        '''
        # Create the new table if it doesn't exist
        cursor.execute(create_table_query) 

        flight_ids, icao24s = pd.read_sql_query("SELECT flight_id, icao24 FROM flights", conn).values.T

        i = 0
        for flight_id, icao24 in zip(flight_ids, icao24s):
            clear_output(wait=True)
            print(f'{i}/{len(flight_ids)} | {flight_id}')
            state_vectors = pd.read_sql_query(f"""
                SELECT DISTINCT sv.*, svw.*, svf.*
                FROM state_vectors sv
                JOIN flights ON flights.flight_id = sv.flight_id
                LEFT JOIN state_vector_weather svw ON svw.vector_id = sv.vector_id
                LEFT JOIN state_vector_fuel svf ON svf.vector_id = sv.vector_id
                WHERE sv.flight_id = '{flight_id}';
                                               """,
                                               conn)
            state_vectors = state_vectors.iloc[:, ::-1]

            # Drop duplicated column names, keeping the first occurrence (which is actually the last in the original DataFrame)
            state_vectors = state_vectors.loc[:, ~state_vectors.columns.duplicated(keep='first')]

            # Reverse the column order back to original
            state_vectors = state_vectors.iloc[:, ::-1]

            integrals = {'flight_id': flight_id,
                        'time': integrate_time(state_vectors),
                        'fuel': integrate_fuel(state_vectors),
                        'distance': integrate_distance(state_vectors),
                        'wind': integrate_wind(state_vectors),
                        'air_density': integrate_air_density(state_vectors),
                        'air_pressure': integrate_air_pressure(state_vectors),
                        'clouds': integrate_clouds(state_vectors),
                        'severity': integrate_severity(state_vectors)}

            insert_data = tuple(integrals.values())
            # Creating query to insert new values
            insert_query = f'''
                INSERT INTO flights_integrals (flight_id, {', '.join(new_columns)})
                VALUES ({', '.join('?' * len(insert_data))})
                ON CONFLICT(flight_id) DO UPDATE SET
                {', '.join([f"{col} = excluded.{col}" for col in new_columns])};
            '''
            cursor.execute(insert_query, insert_data)

            conn.commit()

            i += 1
    conn.close()


966/967 | a11380_1690482460_1690501195_KLAX_KJFK


In [18]:
import os
import sqlite3
import pandas as pd

files = ['../data/flight/' + f for f in os.listdir('../data/flight/') if f.endswith('.sqlite')]

for file in files:
    print('=========')
    print(file)
    conn = sqlite3.connect(file)
    cur = conn.cursor()
    cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cur.fetchall()
    tables = [table[0] for table in tables]
    counts = {table:0 for table in tables}
    for table in tables:
        cur.execute(f"SELECT COUNT(*) FROM {table}")
        row_count = cur.fetchone()[0]
        print(f"{table}: {row_count}")
        counts[table] = row_count        
    
    strout = file
    if "state_vector_weather" in tables:
        strout += ' | Weather: Yes'
    else:
        strout += ' | Weather: No'
    if "state_vector_fuel" in tables:
        strout += ' | Fuel: Yes'
    else:
        strout += ' | Fuel: No'
    print(strout)
    
    conn.close()
    

../data/flight/KDEN_KSEA_2023-01-01_2023-01-31.sqlite
sqlite_sequence: 0
flights: 649
state_vectors: 2938155
state_vector_weather: 2938155
state_vector_fuel: 2938155
flights_integrals: 335
../data/flight/KLAX_KJFK_2023-01-01_2023-01-31.sqlite
state_vectors: 10616462
sqlite_sequence: 1
flights: 1163
state_vector_weather: 10300542
state_vector_fuel: 10300542
flights_integrals: 595
../data/flight/KSEA_KDEN_2023-07-01_2023-07-31.sqlite
flights: 814
state_vectors: 3160183
sqlite_sequence: 1
state_vector_weather: 0
state_vector_fuel: 3160183
flights_integrals: 416
../data/flight/KLAX_KSFO_2023-01-01_2023-01-31.sqlite
state_vectors: 2789573
sqlite_sequence: 1
flights: 1462
state_vector_weather: 2789573
state_vector_fuel: 2789573
flights_integrals: 754
../data/flight/KJFK_KLAX_2023-07-01_2023-07-31.sqlite
flights: 999
state_vectors: 22724633
sqlite_sequence: 1
state_vector_weather: 9748879
state_vector_fuel: 9748879
flights_integrals: 510
../data/flight/KSFO_KLAX_2023-07-01_2023-07-31.sqlite
f

Unnamed: 0,vector_id,tmpf,air_pressure,air_density,clouds,sknt,severity
0,1,74.552784,1009.697744,1.185180,0.040000,11.069600,0.0
1,2,74.424375,1008.504930,1.183996,0.039833,11.275403,0.0
2,3,74.295967,1007.312115,1.182813,0.039667,11.481206,0.0
3,4,74.167558,1006.119301,1.181629,0.039500,11.687008,0.0
4,5,74.039150,1004.926487,1.180446,0.039333,11.892811,0.0
...,...,...,...,...,...,...,...
95,96,62.353974,896.380369,1.072745,0.024167,30.620865,0.0
96,97,62.225566,895.187555,1.071562,0.024000,30.826668,0.0
97,98,62.097157,893.994740,1.070378,0.023833,31.032471,0.0
98,99,61.968749,892.801926,1.069195,0.023667,31.238274,0.0


In [None]:
for file in files