In [6]:
import sys
# This variable should indicate the path from this Jupyter Notebook to the root directory of the repo.
root_path = '../'
# Adds the repo's root to the list of paths
sys.path.append(root_path)

# Package to read yml files
import yaml
# Package to handle file paths
import os
# Package to deal with DataFrames
import pandas as pd
# Package to edit sqlite file
import sqlite3
# Function to clear output
from IPython.display import clear_output

# Package for compressing dataframes into file
from src.data import compressors
# Package for defining and fitting weather models
from src.models import weather
# Utilities package
from src.common import utils
# Package for interpolating and estimating weather
from src.analysis import weather_interpolator

# Path from this notebook to the root directory
root_path = os.path.normpath(root_path)
# Path from root to the desired config file
config_path_from_root = os.path.normpath('config/config_tutorial.yml') 
# Defining path from this notebook to config file
config_path = os.path.join(root_path, config_path_from_root) 

# Loading config file
with open(config_path, 'r',  encoding='utf8') as file:
    config = yaml.safe_load(file)

# Defining "clear-output" function to feed into logger
def clear():
    clear_output(wait=True)

# Creates an instance of a logger class to log all that happens, optional (but encouraged).
logger = utils.Logger(config, clear_function=None)

# Creates an isntance of the CsvCompressor
compressor = compressors.CsvCompressor(config, logger=logger)

# Creates an instance of the weather interpolator
interpolator = weather_interpolator.WeatherInterpolator(config, logger=logger)


In [7]:
def create_flights_table(conn):
    create_table_sql = '''
    CREATE TABLE IF NOT EXISTS flights (
        flight_id TEXT PRIMARY KEY,
        icao24 TEXT,
        callsign TEXT,
        departure_airport TEXT,
        arrival_airport TEXT
    );
    '''
    conn.execute(create_table_sql)

def create_state_vectors_table(conn):
    create_table_sql = '''
    CREATE TABLE IF NOT EXISTS state_vectors (
        vector_id INTEGER PRIMARY KEY AUTOINCREMENT,
        flight_id TEXT,
        time INT,
        time_normalized INT,
        lat REAL,
        lon REAL,
        geoaltitude REAL,
        baroaltitude REAL,
        heading REAL,
        velocity REAL,
        FOREIGN KEY (flight_id) REFERENCES flights(flight_id)
    );
    '''
    conn.execute(create_table_sql)

def create_station_data_table(conn):
    create_table_sql = '''
    CREATE TABLE IF NOT EXISTS weather_sations (
        station_id TEXT PRIMARY KEY,
        lon REAL,
        lat REAL,
        elevation REAL,
        sname TEXT,
        time_domain TEXT,
        archive_begin TEXT,
        archive_end TEXT,
        state TEXT,
        country TEXT,
        climate_site TEXT,
        wfo TEXT,
        tzname TEXT,
        ncdc81 TEXT,
        ncei91 TEXT,
        ugc_county TEXT,
        ugc_zone TEXT,
        county TEXT,
        network TEXT,
        online BOOL,
        sigma REAL
    );
    '''
    conn.execute(create_table_sql)
        
def create_weather_data_table(conn):
    create_table_sql = '''
    CREATE TABLE IF NOT EXISTS weather_data (
        weather_id INTEGER PRIMARY KEY AUTOINCREMENT,
        station_id TEXT,
        time INT,
        tmpf REAL,
        relh REAL,
        drct INT,
        sknt REAL,
        sknt_E REAL,
        sknt_N REAL,
        p01i REAL,
        skyc1 TEXT,
        skyc2 TEXT,
        skyc3 TEXT,
        skyc4 TEXT,
        skyl1 INT,
        skyl2 INT,
        skyl3 INT,
        skyl4 INT,
        wxcodes TEXT,
        wx REAL,
        ice_accretion_1hr REAL,
        METAR TEXT,
        FOREIGN KEY (station_id) REFERENCES weather_sations(station_id)
    );
    '''
    conn.execute(create_table_sql)

In [8]:
# print("Table renamed successfully.")

In [20]:
route = 'KSFO_KLAX'
data_file = 'KSFO_KLAX_2023-07-01_2023-07-31.csv'
sqlite_file = 'KSFO_KLAX_2023-07-01_2023-07-31.sqlite'

flights = pd.read_csv(f'../data/flight/{route}/{data_file}')
if 'estdepartureairport' in flights.columns:
    flights['departure_airport'] = flights['estdepartureairport']
    flights['arrival_airport'] = flights['estarrivalairport']
    flights.drop(['estdepartureairport', 'estarrivalairport'], axis='columns')

flights['flight_id'] = [f'{row["icao24"]}_{row["firstseen"]}_{row["lastseen"]}_{row["departure_airport"]}_{row["arrival_airport"]}' for _, row in flights.iterrows()]
    

flight_ids = [f[:-4] for f in os.listdir(f'../data/flight/{route}/state_vectors/') if len(f) == 42]
valid_mask = flights['flight_id'].apply(lambda x: x in flight_ids)
flights = flights[valid_mask]

# Connect to SQLite database
conn = sqlite3.connect(f'../data/flight/{sqlite_file}')
create_flights_table(conn)
flights.to_sql('flights', conn, if_exists='replace', index=True)

# Create state_vectors table
create_state_vectors_table(conn)

for i, flight_id in enumerate(flight_ids):
    clear_output(wait=True)
    print(f'flight_id: {flight_id} | {i}/{len(flight_ids)}')
    file = f'../data/flight/{route}/state_vectors/{flight_id}.csv'
    state_vectors = compressor.decode_to_dataframe_from_file(file)
    state_vectors['flight_id'] = [flight_id]*len(state_vectors)
    state_vectors['time_normalized'] = state_vectors['time'] - state_vectors.iloc[0]['time']
    state_vectors.to_sql('state_vectors', conn, if_exists='append', index=False)

conn.close()


flight_id: a600a6_1688699170_1688702451_KSFO_KLAX | 714/715
2023/11/14 08:27:38 : CSV Decoding data from ../data/flight/KSFO_KLAX/state_vectors/a600a6_1688699170_1688702451_KSFO_KLAX.csv


In [19]:
conn = sqlite3.connect(f'../data/flight/{route}/{sqlite_file}')

query = 'SELECT * FROM state_vectors WHERE flight_id = "a2b562_1688163324_1688171713_KDEN_KSEA"'

flights = pd.read_sql_query(query, conn)

conn.close()
flights

DatabaseError: Execution failed on sql 'SELECT * FROM state_vectors WHERE flight_id = "a2b562_1688163324_1688171713_KDEN_KSEA"': no such table: state_vectors

In [66]:
# sqlite_file = '1673827200_1685923200.sqlite'
# weather_dir = '../data/weather'
# stations_csv = 'stations_database.csv'

# os.system(f'rm {weather_dir}/../{sqlite_file}')
# stations = pd.read_csv(f'{weather_dir}/{stations_csv}', index_col=0)
# stations['station_id'] = stations['id']
# stations = stations.drop('id', axis='columns')
# stations

# # Connect to SQLite database
# conn = sqlite3.connect(f'{weather_dir}/../{sqlite_file}')
# create_station_data_table(conn)
# stations.to_sql('weather_sations', conn, if_exists='replace', index=True)

# # Create state_vectors table
# create_weather_data_table(conn)

# files = [f for f in os.listdir(weather_dir) if len(f) == 25]
# for i, file in enumerate(files):
#     clear_output(wait=True)
#     print(f'file: {file} | {i}/{len(files)}')
#     weather_data = pd.read_csv(f'{weather_dir}/{file}', index_col=0)
#     weather_data['time'] = weather_data['timestamp']
#     weather_data['station_id'] = weather_data['station']
#     weather_data['METAR'] = weather_data['metar']

#     drops = ['timestamp', 'station', 'valid', 'lon', 'lat', 'metar',
#      'temperature_model', 'wind_model_E',
#            'wind_model_N', 'tmpf_sea_level', 'sknt_E_sea_level',
#            'sknt_N_sea_level', 'elevation', 'smps', 'smps_E', 'smps_N', 'tmpc', 'sigma',
#             'tmpf_model', 'sknt_E_model', 'sknt_N_model']
#     for drop in drops:
#         if drop in weather_data.columns:
#             weather_data.drop(drop, axis = 'columns' ,inplace=True)
#     weather_data.to_sql('weather_data', conn, if_exists='append', index=False)
# conn.close()


file: 1674950400_1675036800.csv | 44/45


In [109]:
# import sqlite3
# import pandas as pd

# from src.common import utils

# conn = sqlite3.connect('../data/weather/1673827200_1685923200.sqlite')

# max_min_times_df = pd.read_sql_query("SELECT MAX(time) as max_time, MIN(time) as min_time FROM weather_data", conn)
# max_min_lats_df = pd.read_sql_query("SELECT MAX(lat) as max_lat, MIN(lat) as min_lat FROM weather_stations", conn)
# max_min_lons_df = pd.read_sql_query("SELECT MAX(lon) as max_lon, MIN(lon) as min_lon FROM weather_stations", conn)

# mid_t = np.mean(max_min_times_df.values)
# mid_lat = np.mean(max_min_lats_df.values)
# mid_lon = np.mean(max_min_lons_df.values)

# t_1 = max_min_times_df['min_time'].values[0] - 1
# t_2 = max_min_times_df['min_time'].values[0] + 3600 - 1

# distance_thresh = 10000

In [106]:
# query = f"""
#     SELECT wd.*, ws.lat, ws.lon 
#     FROM weather_data wd
#     JOIN weather_stations ws ON wd.station_id = ws.station_id
#     WHERE wd.time BETWEEN {t_1} AND {t_2}
# """
# weather_data_df = pd.read_sql_query(query, conn)

# # conn.close()

# weather_data_df

Unnamed: 0,weather_id,station_id,time,tmpf,relh,drct,sknt,sknt_E,sknt_N,p01i,...,skyl1,skyl2,skyl3,skyl4,wxcodes,wx,ice_accretion_1hr,METAR,lat,lon
0,24220274,04V,1671840780,30.2,50.470000,280.0,6.0,-5.908847,1.041889,0.000000,...,3600.000000,5000.0,11000.0,,,,,K04V 240013Z AUTO 28006KT 10SM M01/M10 A3021 R...,38.1000,-106.1700
1,24220275,04V,1671841980,30.2,54.590000,300.0,6.0,-5.196152,3.000000,0.000000,...,3600.000000,5000.0,11000.0,,,,,K04V 240033Z AUTO 30006KT 10SM M01/M09 A3021 R...,38.1000,-106.1700
2,24220276,04V,1671843180,30.2,54.590000,310.0,7.0,-5.362311,4.499513,0.000000,...,3600.000000,5000.0,11000.0,,,,,K04V 240053Z AUTO 31007KT 10SM M01/M09 A3022 R...,38.1000,-106.1700
3,24220777,04W,1671840900,1.4,71.070000,310.0,13.0,-9.958578,8.356239,0.000000,...,200.000000,500.0,9000.0,,,,,K04W 240015Z AUTO 31013G20KT 10SM SCT002 M17/M...,46.0229,-92.8952
4,24220778,04W,1671842100,1.4,71.070000,310.0,12.0,-9.192533,7.713451,0.000000,...,166.666667,500.0,9000.0,,,,,K04W 240035Z AUTO 31012G21KT 10SM M17/M21 A299...,46.0229,-92.8952
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16470,27027882,ZZV,1671842400,-2.0,69.969310,250.0,19.0,-17.854160,-6.498383,0.000100,...,2200.000000,4500.0,5000.0,,HZ,,0.02,KZZV 240040Z AUTO 25019KT 2SM HZ OVC022 M19/M2...,39.9444,-81.8921
16471,27027883,ZZV,1671842700,-2.0,70.500345,250.0,11.0,-10.336619,-3.762222,0.000100,...,2200.000000,4500.0,5000.0,,HZ,,0.02,KZZV 240045Z AUTO 25011KT 2 1/2SM HZ OVC022 M1...,39.9444,-81.8921
16472,27027884,ZZV,1671843000,-2.0,71.031379,240.0,11.0,-9.526279,-5.500000,0.000100,...,2200.000000,4500.0,5000.0,,HZ,,0.02,KZZV 240050Z AUTO 24011KT 3SM HZ OVC022 M19/M2...,39.9444,-81.8921
16473,27027885,ZZV,1671843180,-2.0,71.350000,250.0,15.0,-14.095389,-5.130302,0.000100,...,2200.000000,4500.0,5000.0,,HZ,,0.02,KZZV 240053Z AUTO 25015G21KT 3SM HZ OVC022 M19...,39.9444,-81.8921
