# Import Modules

In [None]:
import pandas as pd
import psycopg2
import time

# Input

In [None]:
thesis_path = r"C:\Users\cvaka\OneDrive\Master\Thesis"
#thesis_path = r"C:\Users\Christophe\OneDrive\Master\Thesis"
table = 'rwy_config_new'

datacols = ["plr1", "plr2", "ptr1", "ptr2", "alr1", "alr2", "atr1", "atr2"]
usecols = ["form_timestamp", "start", "period", "form_id", "wfs_id", "mas_datetime"] + datacols

# Connect to PostgreSQL

In [None]:
conn = psycopg2.connect( \
    database="thesis",
    user = "postgres", 
    password = "jonp8UMs8qDV4jEcwOC0",
    host = "localhost"
    )
cur = conn.cursor()

# Read and Process CFS

In [None]:
# Read
cfs = pd.read_csv(thesis_path + "/data/rwy_config/cfs.csv", usecols=usecols)
cfs = cfs.astype({
    "form_timestamp": "datetime64[ns]",
    "start": "datetime64[ns]",
    })
# cfs[datacols] = cfs[datacols].replace({"-": float("NaN")})
cfs[datacols] = cfs[datacols].fillna('-')
cfs["n_data"] = (cfs[datacols]=='-').sum(axis=1)

# Fix inconsistencies
cfs['end'] = cfs['period'].str.replace("--", "-").str.replace("=", "-").str.strip(to_strip=' -')
# Could be put into one regex pattern, too much work :D
replacements = [['([0,1,2]\d[0-5]\d)([0,1,2]\d[0-5]\d)', lambda m: m.group(1)+"-"+m.group(2)], # missing - at in the middle
['0000-UFN', '0000-2359'],  
['([0,1,2]\d)00-UFN', lambda m: m.group(1) + "00-" + "{0:0=2d}".format(int(m.group(1))-1) + '59'],
['([0,1,2]\d[0-5]\d)-UFN', lambda m: m.group(1) + "-{0:0=4d}".format(int(m.group(1))-1)]]
for replacement in replacements:
    cfs['end'] = cfs['end'].str.replace(replacement[0], replacement[1])

print("Ignored due to inconsitent reporting: ", cfs[~cfs['end'].str.fullmatch('[0,1,2]\d[0-5]\d-[0,1,2]\d[0-5]\d')][["form_id", "period"]])

cfs = cfs[cfs['end'].str.fullmatch('[0,1,2]\d[0-5]\d-[0,1,2]\d[0-5]\d')]
cfs['end'] = cfs['end'].str.replace(
    '([0,1,2]\d)([0-5]\d)-([0,1,2]\d)([0-5]\d)',
    lambda m: str(int(m.group(3))*60 + int(m.group(4)) - int(m.group(1))*60 -int(m.group(2)))).astype(int)
cfs['end'] = cfs['end'] + 24*60*(cfs['end']<0)
cfs['end'] = cfs['start'] + pd.to_timedelta(cfs['end'], unit='m')

cfs = cfs.reindex(columns=usecols[:2] + ["end", "n_data"] + usecols[2:])

# Sort such that the the most likely to be correct entry is first 
cfs.sort_values(by=["wfs_id", "n_data", "mas_datetime"], inplace=True)
# cfs.drop_duplicates(subset=['form_timestamp', 'start', 'end'], keep="first", inplace=True)
cfs.drop_duplicates(subset=['form_timestamp', 'start'], keep="first", inplace=True)
cfs.drop_duplicates(subset=['form_timestamp', 'end'], keep="first", inplace=True)

cfs.rename(columns= {'form_timestamp':'t_publish'}, inplace=True)
cfs['valid_range'] = '[' + cfs['start'].astype(str) + ', ' + cfs['end'].astype(str) + ')'
cfs.drop(['start', 'end'], axis=1, inplace=True)
cfs = cfs.reindex(columns=['t_publish', 'valid_range', 'wfs_id'] + datacols)

# Save to CSV
# cfs.drop(["form_id", "period", "n_data", "wfs_id", "mas_datetime"], axis=1, inplace=True)
cfs.to_csv("cfs_processed.csv", index=False)
cfs.to_csv("../../results/cfs_processed.csv")
cfs

# Upload CFS to PostgreSQL

In [None]:
# query = """
# DROP TABLE IF EXISTS rwy_config;

# CREATE TABLE rwy_config (
#     t_publish timestamp,
# --    t_start timestamp,
# --    t_end timestamp,
#     valid_range tstzrange,
#     wfs_id int,
#     plr1 text,
#     plr2 text,
#     ptr1 text,
#     ptr2 text,
#     alr1 text,
#     alr2 text,
#     atr1 text,
#     atr2 text,
# --    PRIMARY KEY (t_publish, t_start, t_end)
#     PRIMARY KEY (t_publish, valid_range)
# );

# CREATE INDEX rwy_config_t_publish
#     ON public.rwy_config USING btree
#     (t_publish ASC NULLS LAST)
# ;

# CREATE INDEX rwy_config_valid_range
#     ON public.rwy_config USING gist
#     (valid_range)
# ;

# -- CREATE INDEX rwy_config_t_end
# --     ON public.rwy_config USING btree
# --     (t_end ASC NULLS LAST)
# ;
# """
# cur.execute(query)
# conn.commit()

In [None]:
print("""
set PGPASSWORD=jonp8UMs8qDV4jEcwOC0
"C:\\Program Files\\PostgreSQL\\13\\bin\\psql.exe" thesis postgres
\copy public.rwy_config (t_publish, t_start, t_end, plr1, plr2, ptr1, ptr2, alr1, alr2, atr1, atr2) FROM 'C:/Users/cvaka/OneDrive/Master/Thesis/SQL/rwy_config/cfs_processed.csv' DELIMITER ',' CSV HEADER QUOTE '"' ESCAPE '''';
\copy public.rwy_config (t_publish, valid_range, wfs_id, plr1, plr2, ptr1, ptr2, alr1, alr2, atr1, atr2) FROM 'C:/Users/cvaka/OneDrive/Master/Thesis/SQL/rwy_config/cfs_processed.csv' DELIMITER ',' CSV HEADER QUOTE '"' ESCAPE '''';
""")