# Usage of `src.analysis.weather_interpolator` module.

This notebook outlies the basic usage of the `src.analysis.weather_interpolator` module. Used to interpolate weather conditions at any time, lon, lat, and elevation.
 
**Requirements**
 - A csv with weather station data for the whole country for a given time interval
 
**Helpful Links**


## Basic Setup

In [None]:
import sys
# This variable should indicate the path from this Jupyter Notebook to the root directory of the repo.
root_path = '../'
# Adds the repo's root to the list of paths
sys.path.append(root_path)

# Package to read yml files
import yaml
# Package to handle file paths
import os
# Package to deal with DataFrames
import pandas as pd
# Package to plot stuff
import matplotlib.pyplot as plt
# Package for numerical and array handling
import numpy as np
#
import sqlite3

# Function to clear output from jupyter notebook
from IPython.display import clear_output
# Package for compressing dataframes into file
from src.data import compressors
# Package for defining and fitting weather models
from src.models import weather
# Utilities package
from src.common import utils
# Package for interpolating and estimating weather
from src.analysis import weather_interpolator

%load_ext autoreload
%autoreload 2

root_path = os.path.normpath(root_path) # Path from this notebook to the root directory
config_path_from_root = os.path.normpath('config/config_tutorial.yml') # Path from root to the desired config file
config_path = os.path.join(root_path, config_path_from_root) # Defining path from this notebook to config file

# Loading config file
with open(config_path, 'r',  encoding='utf8') as file:
    config = yaml.safe_load(file)

# Defining "clear-output" function to feed into logger
def clear():
    clear_output(wait=True)

# Creates an instance of a logger class to log all that happens, optional (but encouraged).
logger = utils.Logger(config, clear_function=None)

# Creates an instance of the weather interpolator
interpolator = weather_interpolator.WeatherInterpolator(config, logger=logger)

# Defining location of data
flights_database = '../data/flight/KDEN_KSEA_2023-01-01_2023-01-31.sqlite'
weather_database = '../data/weather/1673827200_1685923200.sqlite'

## Interpolating weather at specific time, lat, lon, and elevation

By Default, the interpolator will look for data in the weather's out-dir as specified in the config file, unless it's given one directly.

The interpolator will also calibrate the weather data, unless specified that it has already beed calibrated as an argument

Weather model calibration happens inside the estimation function.

Target needs to be a dictionary with the necessary parameters, in this case we'll use the avegate values form the weather stations info

## Estimating scalar at given target location and time

In [None]:
flights_connection = sqlite3.connect(flights_database)
weather_connection = sqlite3.connect(weather_database)

flight_id = pd.read_sql_query("SELECT flight_id FROM flights ORDER BY RANDOM() LIMIT 1;", flights_connection).values[0, 0]

mean_time, max_time, min_time = pd.read_sql_query(f"""
                                SELECT AVG(time) as avg_time, MAX(time) as max_time, MIN(time) as min_time
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0]

mean_latitude, max_latitude, min_latitude = pd.read_sql_query(f"""
                                SELECT AVG(lat) as avg_lat, MAX(lat) as max_lat, MIN(lat) as min_lat
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0]

mean_longitude, max_longitude, min_longitude = pd.read_sql_query(f"""
                                SELECT AVG(lon) as avg_lon, MAX(lon) as max_lon, MIN(lon) as min_lon
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0]

mean_geoaltitude = pd.read_sql_query(f"""
                                SELECT AVG(geoaltitude) as avg_geoaltitude
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0, 0]

target = {'lon': mean_longitude,
         'lat': mean_latitude,
         'time': mean_time,
         'elevation': mean_geoaltitude,
         }

time_thresh = config['statistics']['interpolation']['weather']['time-thresh']
min_time -= time_thresh
max_time += time_thresh

latitude_range = max_latitude - min_latitude
longitude_range = max_longitude - min_longitude

max_latitude += latitude_range*0.1
min_latitude -= latitude_range*0.1
max_longitude += longitude_range*0.1
min_longitude -= longitude_range*0.1

flight_weather_data = pd.read_sql_query(f"""
                                SELECT ws.lat, ws.lon, ws.elevation, ws.sigma, wd.*
                                FROM weather_data as wd
                                JOIN weather_stations as ws ON ws.station_id = wd.station_id
                                WHERE wd.time BETWEEN {min_time} AND {max_time}
                                AND ws.lat BETWEEN {min_latitude} AND {max_latitude}
                                AND ws.lon BETWEEN {min_longitude} AND {max_longitude};
                               """,
                               weather_connection
                                )

state_vectors = pd.read_sql_query(f"""
                                SELECT DISTINCT state_vectors.*
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                               flights_connection)
# query

flights_connection.close()
weather_connection.close()

In [None]:
len(flight_weather_data)

In [None]:
%%time
interpolator.estimate_scalars(target, ['tmpf'], stations_data=flight_weather_data)


In [None]:
%%time
interpolator.estimate_scalars(target, ['air_pressure'], stations_data=flight_weather_data)


In [None]:
%%time
interpolator.estimate_scalars(target, ['air_density'], stations_data=flight_weather_data)


In [None]:
%%time
interpolator.estimate_scalars(target, ['clouds'], stations_data=flight_weather_data)


In [None]:
%%time
interpolator.estimate_scalars(target, ['severity'], stations_data=flight_weather_data)


In [None]:
%%time
target = {
    'lon': state_vectors.iloc[0]['lon'],
    'lat': state_vectors.iloc[0]['lat'],
    'time': state_vectors.iloc[0]['time'],
    'elevation': state_vectors.iloc[0]['geoaltitude']
}
interpolator.estimate_scalars(target, ['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt', 'severity'], stations_data=flight_weather_data)


In [None]:
%%time
state_vectors = interpolator.compute_flight_weather_quantities(['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt', 'severity'], state_vectors, stations_data=flight_weather_data, debug=False)
state_vectors


In [None]:
len(flight_ids)*(1*60 + 24)/60/60

## Looping thorugh flight_ids and computing weather

This code also creates a new state_vector_weather table to save the values

In [None]:
flights_connection = sqlite3.connect(flights_database)
weather_connection = sqlite3.connect(weather_database)

flight_ids = pd.read_sql_query("SELECT flight_id FROM flights;", flights_connection).values[:,0]
time_thresh = config['statistics']['interpolation']['weather']['time-thresh']

new_columns = ['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt', 'severity']

cursor = flights_connection.cursor()

# Create the new table if it doesn't exist
cursor.execute('''
    CREATE TABLE IF NOT EXISTS state_vector_weather (
        vector_id INTEGER PRIMARY KEY,
        tmpf REAL, 
        air_pressure REAL, 
        air_density REAL, 
        clouds REAL, 
        sknt REAL,
        severity REAL,
    );
''')
flights_connection.commit()

for i, flight_id in enumerate(flight_ids):
    clear_output(wait=True)
    print(f'{flight_id} | {i}/{len(flight_ids)}')

    print('Loading time limits')
    max_time, min_time = pd.read_sql_query(f"""
                                SELECT MAX(time) as max_time, MIN(time) as min_time
                                FROM state_vectors 
                                JOIN flights ON flights.flight_id = state_vectors.flight_id
                                WHERE state_vectors.flight_id = "{flight_id}";
                               """,
                              flights_connection
                              ).values[0]
    
    min_time -= time_thresh
    max_time += time_thresh

    print('Loading relevant weather data')
    flight_weather_data = pd.read_sql_query(f"""
                                    SELECT ws.lat, ws.lon, ws.elevation, ws.sigma, wd.*
                                    FROM weather_data as wd
                                    JOIN weather_stations as ws ON ws.station_id = wd.station_id
                                    WHERE wd.time BETWEEN {min_time} AND {max_time};
                                   """,
                                   weather_connection
                                    )

    print('Loading state vectors')
    state_vectors = pd.read_sql_query(f"""
                                    SELECT DISTINCT state_vectors.*
                                    FROM state_vectors 
                                    JOIN flights ON flights.flight_id = state_vectors.flight_id
                                    WHERE state_vectors.flight_id = "{flight_id}";
                                   """,
                                   flights_connection)
    
    print('Computing weather values')
    state_vectors = interpolator.compute_flight_weather_quantities(['tmpf', 'air_pressure', 'air_density', 'clouds', 'sknt', 'severity'], state_vectors, stations_data=flight_weather_data)
    
    print('Adding new columns')
    for col in new_columns:
        try:
            print(f"Attempting to add column '{col}' to 'state_vectors'.")
            cursor.execute(f"ALTER TABLE state_vectors ADD COLUMN {col} REAL;")
            flights_connection.commit()
            print(f"Column '{col}' added successfully.")
        except sqlite3.OperationalError as e:
            print(f"Error adding column '{col}': {e}")
            # If the error message is not about the column existing, re-raise the exception
            if not 'duplicate column name' in str(e).lower():
                raise
            else:
                print(f"Column '{col}' already exists.")
    
    print("Adding newly calculated values")
    for index, row in state_vectors.iterrows():
        insert_query = """
            INSERT INTO state_vector_weather (vector_id, tmpf, air_pressure, air_density, clouds, sknt, severity) 
            VALUES (?, ?, ?, ?, ?, ?, ?)
            ON CONFLICT(vector_id) DO UPDATE SET
            tmpf = excluded.tmpf, 
            air_pressure = excluded.air_pressure,
            air_density = excluded.air_density, 
            clouds = excluded.clouds, 
            sknt = excluded.sknt,
            severity = excluded.severity;
        """
        cursor.execute(insert_query, (row['vector_id'], row['tmpf'], row['air_pressure'], row['air_density'], row['clouds'], row['sknt'], row['severity']))
    flights_connection.commit()
# Commit the transaction after all updates

flights_connection.close()
weather_connection.close()
# Save database that now has the new columns somehow


In [None]:
def Integrate_Clouds(state_vectors):
    return np.sum(state_vectors['clouds'].values)/(state_vectors['time'].values[-1] - state_vectors['time'].values[0])

def Integrate_Wind_Speed(state_vectors):
    return np.sum(state_vectors['sknt'].values)/(state_vectors['time'].values[-1] - state_vectors['time'].values[0])

def Integrate_Path_Lenght(state_vectors):
    # Computes xy displacements
    dxy = np.array([utils.haversine_distance(row_a.lat, row_a.lon, row_b.lat, row_b.lon) for row_a, row_b in zip(state_vectors[:-1].itertuples(), state_vectors[1:].itertuples())])
    # Computes z displacement
    dz = np.array([np.abs(row_a.geoaltitude - row_b.geoaltitude) for row_a, row_b in zip(state_vectors[:-1].itertuples(), state_vectors[1:].itertuples())])
    # Compute displacements
    dr = np.sqrt(dxy**2 + dz**2)
    
    # Integrates
    return np.sum(dr)

def Integrate_Time(state_vectors):
    return (state_vectors['time'].values[-1] - state_vectors['time'].values[0])


for i, flight_id in enumerate(flight_ids):
    flights_connection = sqlite3.connect(flights_database)

    state_vectors = pd.read_sql_query(f"""
                                    SELECT DISTINCT sv.*, svw.tmpf, svw.air_pressure, svw.air_density, svw.clouds, svw.sknt
                                    FROM state_vectors AS sv
                                    JOIN flights ON flights.flight_id = sv.flight_id
                                    LEFT JOIN state_vector_weather AS svw ON sv.vector_id = svw.vector_id
                                    WHERE sv.flight_id = "{flight_id}";
                                   """,
                                   flights_connection).dropna(axis = 'columns')

    flights_connection.close()

    integral = Integrate_Path_Lenght(state_vectors)

    print(f'{flight_id} | {i}/{len(flight_ids)} | Path integral: {integral}')

In [None]:
# %%time
# scalars = ['tmpf', 'air_pressure', 'air_density', 'clouds']
# scalar_values = {scalar: np.repeat(np.nan, len(state_vectors)) for scalar in scalars}
# step = config['statistics']['interpolation']['flights']['step']
# for i, row in state_vectors.iloc[::step].iterrows():
#     clear()
#     print(f'{i}/{len(state_vectors)}')
#     target = {
#         'lon': row['lon'],
#         'lat': row['lat'],
#         'timestamp': row['time'],
#         'elevation': row['geoaltitude'],
#          }
#     values = interpolator.estimate_scalars(target, scalars, stations_data=all_stations_data)
#     for j, scalar in enumerate(scalars):
#         scalar_values[scalar][i] = values[j]
# for scalar in scalars:
#     state_vectors[scalar] = scalar_values[scalar]
#     state_vectors[scalar] = state_vectors[scalar].interpolate(method='linear')

# state_vectors

In [None]:
# compressed = pd.read_csv('tutorial_data/state_vectors_compressed.csv', index_col = 0)
# compressed['geolatitude'].plot()

In [None]:
# import os
# large_df = None
# path = '../data/flight/KDEN_KSEA/state_vectors/'
# files = [path + f for f in os.listdir(path) if f.endswith('.csv')]
# for file in files:
#     temp_df = pd.read_csv(file, index_col = 0)
#     temp_df['icao24'] = [file.split('/')[-1].split('_')[0]]*len(temp_df)
#     if large_df is None:
#         large_df = temp_df.copy()
#     else:
#         large_df = pd.concat([large_df, temp_df.copy()])


In [None]:
# large_df.to_csv('../data/flight/KDEN_KSEA/all_flights.csv')

In [None]:
# state_vectors

In [None]:
# flights_connection = sqlite3.connect(flights_database)
# cursor = flights_connection.cursor()

# # Create a new table with unique entries
# cursor.execute('''
# CREATE TABLE unique_state_vectors AS 
# SELECT * FROM state_vectors 
# GROUP BY vector_id;
# ''')

# flights_connection.commit()

# # Drop the old table
# cursor.execute('DROP TABLE state_vectors;')

# # Rename the new table to the original name
# cursor.execute('ALTER TABLE unique_state_vectors RENAME TO state_vectors;')

# flights_connection.commit()

# flights_connection.close()

In [None]:
# flights_connection.commit()

In [None]:
# flights_connection = sqlite3.connect(flights_database)
# cursor = flights_connection.cursor()

# # SQL to create a new table with unique entries, keeping the most recent row for each vector_id
# cursor.execute('''
# CREATE TABLE unique_state_vectors AS 
# SELECT * 
# FROM state_vectors 
# WHERE rowid IN (
#     SELECT MAX(rowid) 
#     FROM state_vectors 
#     GROUP BY vector_id
# );
# ''')

# flights_connection.commit()

# # Drop the old table
# cursor.execute('DROP TABLE state_vectors;')

# # Rename the new table to the original name
# cursor.execute('ALTER TABLE unique_state_vectors RENAME TO state_vectors;')

# flights_connection.commit()
# flights_connection.close()
