# Usage of `src.analysis.weather_interpolator` module.

This notebook outlies the basic usage of the `src.analysis.weather_interpolator` module. Used to interpolate weather conditions at any time, lon, lat, and elevation.
 
**Requirements**
 - A csv with weather station data for the whole country for a given time interval
 
**Helpful Links**


## Basic Setup

In [1]:
import sys
# This variable should indicate the path from this Jupyter Notebook to the root directory of the repo.
root_path = '../'
# Adds the repo's root to the list of paths
sys.path.append(root_path)

# Package to read yml files
import yaml
# Package to handle file paths
import os
# Package to deal with DataFrames
import pandas as pd
# Package to plot stuff
import matplotlib.pyplot as plt
# Package for numerical and array handling
import numpy as np
#
import sqlite3

# Function to clear output from jupyter notebook
from IPython.display import clear_output
# Package for compressing dataframes into file
from src.data import compressors
# Package for defining and fitting weather models
from src.models import weather
# Utilities package
from src.common import utils
# Package for interpolating and estimating weather
from src.analysis import weather_interpolator

%load_ext autoreload
%autoreload 2

root_path = os.path.normpath(root_path) # Path from this notebook to the root directory
config_path_from_root = os.path.normpath('config/config_tutorial.yml') # Path from root to the desired config file
config_path = os.path.join(root_path, config_path_from_root) # Defining path from this notebook to config file

# Loading config file
with open(config_path, 'r',  encoding='utf8') as file:
    config = yaml.safe_load(file)

# Defining "clear-output" function to feed into logger
def clear():
    clear_output(wait=True)

# Creates an instance of a logger class to log all that happens, optional (but encouraged).
logger = utils.Logger(config, clear_function=None)

# Creates an instance of the weather interpolator
interpolator = weather_interpolator.WeatherInterpolator(config, logger=logger)

# Defining location of data
flights_database = '../data/flight/KDEN_KSEA_2023-01-01_2023-01-31.sqlite'
weather_database = '../data/weather/1673827200_1685923200.sqlite'

## Interpolating weather at specific time, lat, lon, and elevation

By Default, the interpolator will look for data in the weather's out-dir as specified in the config file, unless it's given one directly.

The interpolator will also calibrate the weather data, unless specified that it has already beed calibrated as an argument

Weather model calibration happens inside the estimation function.

Target needs to be a dictionary with the necessary parameters, in this case we'll use the avegate values form the weather stations info

## Estimating scalar at given target location and time

In [2]:
flights_connection = sqlite3.connect(flights_database)
weather_connection = sqlite3.connect(weather_database)

flight_id = pd.read_sql_query("SELECT flight_id FROM flights ORDER BY RANDOM() LIMIT 1;", flights_connection).values[0, 0]

mean_time, max_time, min_time = pd.read_sql_query(f"""
                                SELECT AVG(time) as avg_time, MAX(time) as max_time, MIN(time) as min_time
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0]

mean_latitude = pd.read_sql_query(f"""
                                SELECT AVG(lat) as avg_lat
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0, 0]

mean_longitude = pd.read_sql_query(f"""
                                SELECT AVG(lon) as avg_lon
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0, 0]

mean_geoaltitude = pd.read_sql_query(f"""
                                SELECT AVG(geoaltitude) as avg_geoaltitude
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0, 0]

target = {'lon': mean_longitude,
         'lat': mean_latitude,
         'time': mean_time,
         'elevation': mean_geoaltitude,
         }

time_thresh = config['statistics']['interpolation']['weather']['time-thresh']
min_time -= time_thresh
max_time += time_thresh

flight_weather_data = pd.read_sql_query(f"""
                                SELECT ws.lat, ws.lon, ws.elevation, ws.sigma, wd.*
                                FROM weather_data as wd
                                JOIN weather_stations as ws ON ws.station_id = wd.station_id
                                WHERE wd.time BETWEEN {min_time} AND {max_time};
                               """,
                               weather_connection
                                )

state_vectors = pd.read_sql_query(f"""
                                SELECT DISTINCT state_vectors.*
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                               flights_connection)
# query

flights_connection.close()
weather_connection.close()

In [3]:
%%time
interpolator.estimate_scalars(target, ['tmpf'], stations_data=flight_weather_data)


CPU times: user 1.66 s, sys: 290 ms, total: 1.95 s
Wall time: 2.42 s


array([-62.82083594])

In [4]:
%%time
interpolator.estimate_scalars(target, ['air_pressure'], stations_data=flight_weather_data)


CPU times: user 121 ms, sys: 7.99 ms, total: 129 ms
Wall time: 130 ms


array([262.95534452])

In [5]:
%%time
interpolator.estimate_scalars(target, ['air_density'], stations_data=flight_weather_data)


CPU times: user 114 ms, sys: 3.29 ms, total: 117 ms
Wall time: 117 ms


array([0.41547248])

In [6]:
%%time
interpolator.estimate_scalars(target, ['clouds'], stations_data=flight_weather_data)


CPU times: user 51.2 ms, sys: 2.52 ms, total: 53.7 ms
Wall time: 55.7 ms


array([0.])

In [7]:
%%time
interpolator.estimate_scalars(target, ['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt'], stations_data=flight_weather_data)


CPU times: user 149 ms, sys: 2.23 ms, total: 151 ms
Wall time: 152 ms


array([-62.82083594, 262.95534452,   0.41547248,   0.        ,
       151.6994851 ])

In [8]:
%%time
state_vectors = interpolator.compute_flight_weather_quantities(['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt'], state_vectors, stations_data=flight_weather_data)
state_vectors

CPU times: user 1min 5s, sys: 621 ms, total: 1min 6s
Wall time: 1min 10s


Unnamed: 0,vector_id,flight_id,time,time_normalized,lat,lon,geoaltitude,baroaltitude,heading,velocity,tmpf,air_pressure,air_density,clouds,sknt
0,1372675,abd57e_1673794395_1673802586_KDEN_KSEA,1673794396,0,39.878230,-104.696411,1699.260000,1828.800000,1.645977,89.550206,35.488779,826.091215,1.046165,0.0,3.460374
1,1372676,abd57e_1673794395_1673802586_KDEN_KSEA,1673794397,1,39.879025,-104.696373,1712.457516,1841.997516,1.644831,89.612938,35.377975,825.153133,1.045198,0.0,3.646796
2,1372677,abd57e_1673794395_1673802586_KDEN_KSEA,1673794398,2,39.879820,-104.696335,1725.655032,1855.195032,1.643684,89.675671,35.267170,824.215051,1.044231,0.0,3.833217
3,1372678,abd57e_1673794395_1673802586_KDEN_KSEA,1673794399,3,39.880615,-104.696296,1738.852549,1868.392549,1.642537,89.738404,35.156365,823.276969,1.043265,0.0,4.019639
4,1372679,abd57e_1673794395_1673802586_KDEN_KSEA,1673794400,4,39.881410,-104.696258,1752.050065,1881.590065,1.641390,89.801137,35.045560,822.338887,1.042298,0.0,4.206060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8185,1380860,abd57e_1673794395_1673802586_KDEN_KSEA,1673802581,8185,47.460104,-122.318077,103.511591,294.011591,180.464005,63.669381,44.721939,991.296702,1.232399,0.0,4.913564
8186,1380861,abd57e_1673794395_1673802586_KDEN_KSEA,1673802582,8186,47.459486,-122.318086,102.398693,292.898693,180.469390,62.928679,44.721939,991.296702,1.232399,0.0,4.913564
8187,1380862,abd57e_1673794395_1673802586_KDEN_KSEA,1673802583,8187,47.458868,-122.318096,101.285796,291.785796,180.474775,62.187976,44.721939,991.296702,1.232399,0.0,4.913564
8188,1380863,abd57e_1673794395_1673802586_KDEN_KSEA,1673802584,8188,47.458250,-122.318106,100.172898,290.672898,180.480161,61.447274,44.721939,991.296702,1.232399,0.0,4.913564


## Looping thorugh flight_ids and computing weather

In [None]:
flights_connection = sqlite3.connect(flights_database)
weather_connection = sqlite3.connect(weather_database)

flight_ids = pd.read_sql_query("SELECT flight_id FROM flights;", flights_connection).values[:,0]
time_thresh = config['statistics']['interpolation']['weather']['time-thresh']

new_columns = ['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt']

cursor = flights_connection.cursor()

# Create the new table if it doesn't exist
cursor.execute('''
    CREATE TABLE IF NOT EXISTS state_vector_weather (
        vector_id INTEGER PRIMARY KEY,
        tmpf REAL, 
        air_pressure REAL, 
        air_density REAL, 
        clouds REAL, 
        sknt REAL
    );
''')
flights_connection.commit()

for i, flight_id in enumerate(flight_ids):
    clear_output(wait=True)
    print(f'{flight_id} | {i}/{len(flight_ids)}')

    print('Loading time limits')
    max_time, min_time = pd.read_sql_query(f"""
                                SELECT MAX(time) as max_time, MIN(time) as min_time
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0]
    
    min_time -= time_thresh
    max_time += time_thresh

    print('Loading relevant weather data')
    flight_weather_data = pd.read_sql_query(f"""
                                    SELECT ws.lat, ws.lon, ws.elevation, ws.sigma, wd.*
                                    FROM weather_data as wd
                                    JOIN weather_stations as ws ON ws.station_id = wd.station_id
                                    WHERE wd.time BETWEEN {min_time} AND {max_time};
                                   """,
                                   weather_connection
                                    )

    print('Loading state vectors')
    state_vectors = pd.read_sql_query(f"""
                                    SELECT DISTINCT state_vectors.*
                                    FROM state_vectors 
                                    JOIN flights ON flights.flight_id = state_vectors.flight_id
                                    WHERE state_vectors.flight_id = "{flight_id}";
                                   """,
                                   flights_connection)
    
    print('Computing weather values')
    state_vectors = interpolator.compute_flight_weather_quantities(['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt'], state_vectors, stations_data=flight_weather_data)
    
    print('Adding new columns')
    for col in new_columns:
        try:
            print(f"Attempting to add column '{col}' to 'state_vectors'.")
            cursor.execute(f"ALTER TABLE state_vectors ADD COLUMN {col} REAL;")
            flights_connection.commit()
            print(f"Column '{col}' added successfully.")
        except sqlite3.OperationalError as e:
            print(f"Error adding column '{col}': {e}")
            # If the error message is not about the column existing, re-raise the exception
            if not 'duplicate column name' in str(e).lower():
                raise
            else:
                print(f"Column '{col}' already exists.")
    
    print("Adding newly calculated values")
    state_vectors[['vector_id'] + new_columns].to_sql('state_vector_weather', flights_connection, if_exists='append', index=False)

    flights_connection.commit()  # Commit the transaction after all updates

flights_connection.close()
weather_connection.close()
# Save database that now has the new columns somehow


ada167_1672524657_1672534041_KDEN_KSEA | 0/649
Loading time limits
Loading relevant weather data


In [None]:
# flights_connection.close()
# weather_connection.close()

In [None]:
# # Save database that now has the new columns somehow
# flights_connection.close()
# weather_connection.close()

In [None]:
# %%time
# scalars = ['tmpf', 'air_pressure', 'air_density', 'clouds']
# scalar_values = {scalar: np.repeat(np.nan, len(state_vectors)) for scalar in scalars}
# step = config['statistics']['interpolation']['flights']['step']
# for i, row in state_vectors.iloc[::step].iterrows():
#     clear()
#     print(f'{i}/{len(state_vectors)}')
#     target = {
#         'lon': row['lon'],
#         'lat': row['lat'],
#         'timestamp': row['time'],
#         'elevation': row['geoaltitude'],
#          }
#     values = interpolator.estimate_scalars(target, scalars, stations_data=all_stations_data)
#     for j, scalar in enumerate(scalars):
#         scalar_values[scalar][i] = values[j]
# for scalar in scalars:
#     state_vectors[scalar] = scalar_values[scalar]
#     state_vectors[scalar] = state_vectors[scalar].interpolate(method='linear')

# state_vectors

In [None]:
# compressed = pd.read_csv('tutorial_data/state_vectors_compressed.csv', index_col = 0)
# compressed['geolatitude'].plot()

In [None]:
# import os
# large_df = None
# path = '../data/flight/KDEN_KSEA/state_vectors/'
# files = [path + f for f in os.listdir(path) if f.endswith('.csv')]
# for file in files:
#     temp_df = pd.read_csv(file, index_col = 0)
#     temp_df['icao24'] = [file.split('/')[-1].split('_')[0]]*len(temp_df)
#     if large_df is None:
#         large_df = temp_df.copy()
#     else:
#         large_df = pd.concat([large_df, temp_df.copy()])


In [None]:
# large_df.to_csv('../data/flight/KDEN_KSEA/all_flights.csv')

In [None]:
# state_vectors

In [None]:
# flights_connection = sqlite3.connect(flights_database)
# cursor = flights_connection.cursor()

# # Create a new table with unique entries
# cursor.execute('''
# CREATE TABLE unique_state_vectors AS 
# SELECT * FROM state_vectors 
# GROUP BY vector_id;
# ''')

# flights_connection.commit()

# # Drop the old table
# cursor.execute('DROP TABLE state_vectors;')

# # Rename the new table to the original name
# cursor.execute('ALTER TABLE unique_state_vectors RENAME TO state_vectors;')

# flights_connection.commit()

# flights_connection.close()

In [None]:
# flights_connection.commit()

In [None]:
# flights_connection = sqlite3.connect(flights_database)
# cursor = flights_connection.cursor()

# # SQL to create a new table with unique entries, keeping the most recent row for each vector_id
# cursor.execute('''
# CREATE TABLE unique_state_vectors AS 
# SELECT * 
# FROM state_vectors 
# WHERE rowid IN (
#     SELECT MAX(rowid) 
#     FROM state_vectors 
#     GROUP BY vector_id
# );
# ''')

# flights_connection.commit()

# # Drop the old table
# cursor.execute('DROP TABLE state_vectors;')

# # Rename the new table to the original name
# cursor.execute('ALTER TABLE unique_state_vectors RENAME TO state_vectors;')

# flights_connection.commit()
# flights_connection.close()
