# Computing Weather for Each Flight Route with Paralellization

This notebook should have everything you need to compute the weather conditions selected for a given flight.

To make this work, make sure you have the following done:
 - Rename the `config/config_template.yml` file as `config/config.yml`
 - Change the `base-configs - root-directory` entry to wherever you downloaded the project repo
 - Change the `base-configs - tag` entry to whatever you'd like to, I don't think it'll matter, but who knows
 - Make sure all the dependencies are installed with `pip install -r requirements.txt`
 - Change the `flights_database` and `weather_database` variables below to whatever flight one you assigned yourself to, and the corresponding weather
 - Have fun and pray for no bugs!
 - If you need to make an environment readable on Jupyter you can follow [this guide](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). If not, just copy the code and make it into a script. Up to ya

In [15]:
import sys
# This variable should indicate the path from this Jupyter Notebook to the root directory of the repo.
root_path = '../'
# Adds the repo's root to the list of paths
sys.path.append(root_path)

# Package to read yml files
import yaml
# Package to handle file paths
import os
# Package to deal with DataFrames
import pandas as pd
# Package to plot stuff
import matplotlib.pyplot as plt
# Package for numerical and array handling
import numpy as np
# Package to read and write to .sqlite files
import sqlite3
# Package to keep track of time
import datetime

# Function to clear output from jupyter notebook
from IPython.display import clear_output
# Package for compressing dataframes into file
from src.data import compressors
# Package for defining and fitting weather models
from src.models import weather
# Utilities package
from src.common import utils
# Package for interpolating and estimating weather
from src.analysis import weather_interpolator

# Path from this notebook to the root directory
root_path = os.path.normpath(root_path)
# Path from root to the desired config file
config_path_from_root = os.path.normpath('config/config.yml')
# Defining path from this notebook to config file
config_path = os.path.join(root_path, config_path_from_root)

# Loading config file
with open(config_path, 'r',  encoding='utf8') as file:
    config = yaml.safe_load(file)

# Defining "clear-output" function to feed into logger
def clear():
    clear_output(wait=True)

# Creates an instance of a logger class to log all that happens, optional (but encouraged).
logger = utils.Logger(config, clear_function=None)

# Creates an instance of the weather interpolator
interpolator = weather_interpolator.WeatherInterpolator(config, logger=logger)

# Defining location of data
flights_database = '../data/flight/KDEN_KSEA_2023-01-01_2023-01-31.sqlite'
weather_database = '../data/weather/Weather-US_2022-12-31_2023-02-01.sqlite'
# weather_database = '../data/weather/Weather-US_2023-06-30_2022-08-01.sqlite'

# Path to file keeping track of already-loaded flights
tracking_file = flights_database.replace('sqlite','txt')

In [17]:
import concurrent.futures

# Path to file keeping track of already-loaded flights
tracking_file = flights_database.replace('sqlite', 'txt')

flights_connection = sqlite3.connect(flights_database)
weather_connection = sqlite3.connect(weather_database)
# Declares a cursor to write to the database
cursor = flights_connection.cursor()

# Checking and loading the tracking_file
if os.path.isfile(tracking_file):
    with open(tracking_file, 'r') as f:
        loaded_ids = f.read().split('\n')
        loaded_ids = [i for i in loaded_ids if i != '']
else:
    with open(tracking_file, 'w') as f:
        loaded_ids = []

# Runs a query to identify all the flight_ids available
flight_ids = pd.read_sql_query("SELECT flight_id FROM flights;", flights_connection).values[:,0]
# Only care about the ids that have not been loaded yet.
flight_ids = [f for f in flight_ids if f not in loaded_ids]

# Loading the time threshold variable, which is the time interval that weather data will be loaded for each calculation.
time_thresh = config['statistics']['interpolation']['weather']['time-thresh']
lat_lon_thresh = config['statistics']['interpolation']['weather']['lat-lon-thresh']

# The list of columns to be added to the new table to be created.
new_columns = ['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt', 'severity']

# If there is no record of loaded ids, we start from scratch
if len(loaded_ids) == 0:
    # Drop the table if it exists
    cursor.execute("DROP TABLE IF EXISTS state_vector_weather;")

    # Create the new table
    create_table_query = f'''
        CREATE TABLE state_vector_weather (
            vector_id INTEGER PRIMARY KEY,
            {", ".join([f"{col} REAL" for col in new_columns])}
        );
    '''
    # Create the new table if it doesn't exist
    cursor.execute(create_table_query)

# Commits change to file
flights_connection.commit()
# Closing connections
flights_connection.close()
weather_connection.close()

t_start_full = datetime.datetime.now()

count = 0

num_cores = os.cpu_count()

def main_computation(i, flight_id):
    flights_connection = sqlite3.connect(flights_database)
    weather_connection = sqlite3.connect(weather_database)

    # Finds minimum and maximum flight for current flight
    min_time, max_time = pd.read_sql_query(f"""
                                SELECT MIN(time) as min_time, MAX(time) as max_time
                                FROM state_vectors
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0]

    # Finds the minimum and maximum latitudes for the current flight
    min_latitude, max_latitude = pd.read_sql_query(f"""
                                    SELECT MIN(lat) as min_lat, MAX(lat) as max_lat
                                    FROM state_vectors
                                    JOIN flights ON flights.flight_id = state_vectors.flight_id
                                    WHERE state_vectors.flight_id = "{flight_id}"
                                   """,
                                  flights_connection
                                  ).values[0]

    # Finds the minimum and maximum longitudes for the current flight
    min_longitude, max_longitude = pd.read_sql_query(f"""
                                    SELECT MIN(lon) as min_lon, MAX(lon) as max_lon
                                    FROM state_vectors
                                    JOIN flights ON flights.flight_id = state_vectors.flight_id
                                    WHERE state_vectors.flight_id = "{flight_id}"
                                   """,
                                  flights_connection
                                  ).values[0]

    # Adjusting time, lat and lon thresholds.
    # Adds time threshold to time limits
    min_time -= time_thresh
    max_time += time_thresh

    # Adds latitude and longitude threshold
    range_latitude = max_latitude - min_latitude
    range_longitude = max_longitude - min_longitude
    min_latitude -= range_latitude*lat_lon_thresh
    max_latitude += range_latitude*lat_lon_thresh
    min_longitude -= range_longitude*lat_lon_thresh
    max_longitude += range_longitude*lat_lon_thresh

    # Loads the weather data corresponding to the flight
    flight_weather_data = pd.read_sql_query(f"""
                                    SELECT ws.lat, ws.lon, ws.elevation, ws.sigma, wd.*
                                    FROM weather_data as wd
                                    JOIN weather_stations as ws ON ws.station_id = wd.station_id
                                    WHERE wd.time BETWEEN {min_time} AND {max_time};
                                   """,
                                   weather_connection
                                    )

    # Loads state vectors for the given flight
    state_vectors = pd.read_sql_query(f"""
                                    SELECT DISTINCT state_vectors.*
                                    FROM state_vectors
                                    JOIN flights ON flights.flight_id = state_vectors.flight_id
                                    WHERE state_vectors.flight_id = "{flight_id}";
                                   """,
                                   flights_connection)

    # Computes the weather values for the current flight
    state_vectors = interpolator.compute_flight_weather_quantities(new_columns, state_vectors, stations_data=flight_weather_data)

    state_vectors[['vector_id'] + new_columns].to_csv(flights_database.replace('.sqlite',f'_{flight_id}.csv'))
    
    flights_connection.close()
    weather_connection.close()
    
    time_iteration = datetime.datetime.now()
    time_elapsed = (time_iteration - t_start_full).total_seconds()
    if i == 0:
        ETA = np.nan
    else:
        ETA = time_elapsed*len(flight_ids)/i - time_elapsed
    clear_output(wait=True)
    print(f'{i}/{len(flight_ids)}.')
    print(f'Time Elapsed: {utils.format_time(time_elapsed)}.')
    print(f'Estimate time to finish: {utils.format_time(ETA)}.')

with concurrent.futures.ThreadPoolExecutor(max_workers=num_cores) as executor:
    futures = [executor.submit(main_computation, i, flight_id) for i, flight_id in enumerate(flight_ids)]
    completed = 0
    total = len(futures)
    for future in concurrent.futures.as_completed(futures):
        time_iteration = datetime.datetime.now()
        completed += 1 
        # Clears output at every loop
        try:
            result = future.result()
        except Exception as e:
            print(f"An error occurred: {e}")

import os
csv_files = ['../data/flight/' + f for f in os.listdir('../data/flight') if f.startswith(flights_database.split('/')[-1].replace('.sqlite','')) and f.endswith('.csv')]

flights_connection = sqlite3.connect(flights_database)
cursor = flights_connection.cursor()

for file in csv_files:
    state_vectors_weather = pd.read_csv(file, index_col=0)
    clear_output(wait=True)
    print(file)
    for index, row in state_vectors_weather.iterrows():
        # Preparing the data to be inserted
        insert_data = tuple(row[col] for col in ['vector_id'] + new_columns)

        # Creating query to insert new values
        insert_query = f'''
            INSERT INTO state_vector_weather (vector_id, {', '.join(new_columns)})
            VALUES ({', '.join('?' * len(insert_data))})
            ON CONFLICT(vector_id) DO UPDATE SET
            {', '.join([f"{col} = excluded.{col}" for col in new_columns])};
        '''
        cursor.execute(insert_query, insert_data)
    # Commiting changes to the database
    flights_connection.commit()

flights_connection.close()

../data/flight/KDEN_KSEA_2023-01-01_2023-01-31_a43458_1674787841_1674798355_KDEN_KSEA.csv


## Checking databases, no need to run

In [25]:
import os
import pandas as pd
import sqlite3

def check_table(connection, table_name):
    cursor = connection.cursor()

    # Check if table exists
    cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
    if not cursor.fetchone():
        return False

    # Check if table has more than 0 rows
    cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
    if cursor.fetchone()[0] == 0:
        return False

    # Check if 10 random rows are not just 'NaN's
    try:
        df_sample = pd.read_sql_query(f"SELECT * FROM {table_name} ORDER BY RANDOM() LIMIT 10;", connection)
        if df_sample.isnull().all().all():
            return False
    except Exception as e:
        print(f"An error occurred: {e}")
        return False

    return True

db_files = ['../data/flight/' + f for f in os.listdir('../data/flight/') if f.endswith('.sqlite')]

tables = ['flights', 'state_vectors', 'state_vector_weather', 'state_vector_fuel', 'flights_integrals', 'flights_aircraft', 'optimal_flights', 'optimal_state_vector_weather', 'optimal_flights_integrals']
for file in db_files:
    print('-----------------------------')
    conn = sqlite3.connect(file)
    print(file)
    for table in tables:
        if check_table(conn, table):
            print(f"{table.ljust(max([len(t) for t in tables]) + 5)}True")
        else:
            print(f"{table.ljust(max([len(t) for t in tables]) + 5)}False")
    
    conn.close()
    print('-----------------------------')

-----------------------------
../data/flight/KDEN_KSEA_2023-01-01_2023-01-31.sqlite
flights                          True
state_vectors                    True
state_vector_weather             True
state_vector_fuel                True
flights_integrals                True
flights_aircraft                 True
optimal_flights                  True
optimal_state_vector_weather     True
optimal_flights_integrals        False
-----------------------------
-----------------------------
../data/flight/KLAX_KJFK_2023-01-01_2023-01-31.sqlite
flights                          True
state_vectors                    True
state_vector_weather             True
state_vector_fuel                True
flights_integrals                True
flights_aircraft                 False
optimal_flights                  False
optimal_state_vector_weather     False
optimal_flights_integrals        False
-----------------------------
-----------------------------
../data/flight/KSEA_KDEN_2023-07-01_2023-07-31.sqlite

In [24]:
conn = sqlite3.connect(flights_database)

o_sv_w = pd.read_sql_query(f'SELECT * FROM optimal_state_vector_weather LIMIT 10;', conn)

conn.close()
o_sv_w

Unnamed: 0,flight_id,tmpf,air_pressure,air_density,clouds,sknt,severity
0,a44565_1673808290_1673816496_KDEN_KSEA,34.975499,827.408322,1.048916,0.26,8.4604,0.0
1,a44565_1673808290_1673816496_KDEN_KSEA,34.86313,826.61592,1.048029,0.260433,8.652349,0.0
2,a44565_1673808290_1673816496_KDEN_KSEA,34.75076,825.823517,1.047142,0.260867,8.844297,0.0
3,a44565_1673808290_1673816496_KDEN_KSEA,34.63839,825.031115,1.046255,0.2613,9.036246,0.0
4,a44565_1673808290_1673816496_KDEN_KSEA,34.526021,824.238712,1.045368,0.261733,9.228195,0.0
5,a44565_1673808290_1673816496_KDEN_KSEA,34.413651,823.44631,1.044481,0.262167,9.420143,0.0
6,a44565_1673808290_1673816496_KDEN_KSEA,34.301281,822.653907,1.043593,0.2626,9.612092,0.0
7,a44565_1673808290_1673816496_KDEN_KSEA,34.188911,821.861504,1.042706,0.263033,9.804041,0.0
8,a44565_1673808290_1673816496_KDEN_KSEA,34.076542,821.069102,1.041819,0.263467,9.995989,0.0
9,a44565_1673808290_1673816496_KDEN_KSEA,33.964172,820.276699,1.040932,0.2639,10.187938,0.0


In [23]:
flights_database

'../data/flight/KDEN_KSEA_2023-01-01_2023-01-31.sqlite'

In [21]:
conn.close()