In [3]:
"""Implementation of the Recency-weighted historical forecast solution to the Run-way Functions:
Predict Reconfigurations at US Airports challenge.
https://www.drivendata.co/blog/airport-configuration-benchmark/
"""

from datetime import datetime
from pathlib import Path

from loguru import logger
import numpy as np
import pandas as pd
import typer
import copy
from sklearn.metrics import log_loss
from typing import Sequence, Tuple, Dict
import matplotlib as mpl

from src.utils import make_all_predictions, read_airport_configs, make_all_predictions_test

DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"


#TODO Change HERE
feature_directory = Path("./data")
prediction_path = Path("./test_prediction.csv")


In [4]:
# logger.info("Computing my predictions for {}", prediction_time)

open_train_labels = pd.read_csv(
    feature_directory / "open_train_labels.csv.bz2", parse_dates=["timestamp"]
)
training_samples = (
open_train_labels.groupby(["airport", "lookahead"])
.sample(100, random_state=314)
.set_index(["airport", "timestamp", "lookahead"])
.sort_index()
)
training_labels = (
    open_train_labels.set_index(["airport", "timestamp", "lookahead"])
    .loc[training_samples.index]  # select the sampled timestamp/lookaheads
    .reset_index()
    .sort_values(["airport", "timestamp", "lookahead", "config"])
)
submission_format = training_labels.copy().assign(active=np.nan)

airport_directories = sorted(path for path in feature_directory.glob("k*"))

airport_config_df_map = {}
for airport_directory in sorted(airport_directories):
    airport_code, airport_config_df = read_airport_configs(airport_directory)
    print(airport_code)
    airport_config_df_map[airport_code] = airport_config_df
for airport_code, df in airport_config_df_map.items():
    print(f"{airport_code}: {len(df):>8,}")
#    submission_format = pd.read_csv(
#        feature_directory / "partial_submission_format.csv", parse_dates=["timestamp"]
#    )
print(f"{len(submission_format):,} rows x {len(submission_format.columns)} columns")

submission = submission_format.copy()
submission["active"] = np.nan

make_all_predictions(airport_config_df_map, submission)

katl
kclt
kden
kdfw
kjfk
kmem
kmia
kord
kphx
ksea
katl:   12,431
kclt:   11,141
kden:   12,145
kdfw:   10,997
kjfk:   10,894
kmem:   12,958
kmia:   11,400
kord:   14,355
kphx:    9,050
ksea:   10,776
304,800 rows x 5 columns


100%|██████████| 11918/11918 [01:43<00:00, 114.73it/s]


In [5]:
submission.loc[group.index, "active"]

NameError: name 'group' is not defined

In [6]:
for airport, group in training_labels.groupby(["airport"]):
    break

In [7]:
group

Unnamed: 0,airport,timestamp,lookahead,config,active
0,katl,2020-11-07 01:00:00,120,katl:D_10_8L_A_10_8L,0.0
1,katl,2020-11-07 01:00:00,120,katl:D_10_8R_9L_A_10_8L_9R,0.0
2,katl,2020-11-07 01:00:00,120,katl:D_10_8R_A_10_8R,0.0
3,katl,2020-11-07 01:00:00,120,katl:D_26L_27L_A_26R_27L_28,0.0
4,katl,2020-11-07 01:00:00,120,katl:D_26L_27R_28_A_26R_27L_28,0.0
...,...,...,...,...,...
32395,katl,2021-10-17 00:00:00,300,katl:D_8R_9L_A_8L_9R,0.0
32396,katl,2021-10-17 00:00:00,300,katl:D_8R_9L_A_8R_9L,0.0
32397,katl,2021-10-17 00:00:00,300,katl:D_8R_9R_A_10_8L_9R,0.0
32398,katl,2021-10-17 00:00:00,300,katl:D_9L_A_9R,0.0


In [16]:
submission.loc[group.index, "active"][25:50]

25    0.002179
26    0.026322
27    0.007407
28    0.007407
29    0.007407
30    0.007407
31    0.007407
32    0.007407
33    0.007407
34    0.007407
35    0.007407
36    0.090658
37    0.007407
38    0.007407
39    0.007407
40    0.007407
41    0.007407
42    0.007407
43    0.011388
44    0.007407
45    0.630625
46    0.072084
47    0.012714
48    0.007407
49    0.007407
Name: active, dtype: float64

In [15]:
group["active"][25:50]

25    0.0
26    0.0
27    0.0
28    0.0
29    0.0
30    0.0
31    0.0
32    0.0
33    0.0
34    0.0
35    0.0
36    0.0
37    0.0
38    0.0
39    0.0
40    0.0
41    0.0
42    0.0
43    0.0
44    0.0
45    0.0
46    1.0
47    0.0
48    0.0
49    0.0
Name: active, dtype: float64

In [2]:

from datetime import datetime
from pathlib import Path

from loguru import logger
import numpy as np
import pandas as pd
import typer
import copy
from sklearn.metrics import log_loss
from typing import Sequence, Tuple, Dict
import matplotlib as mpl

from src.utils import make_all_predictions, read_airport_configs, make_all_predictions_test

In [2]:
pd.read_csv("./runtime/data/katl/katl_airport_config.csv.bz2")

Unnamed: 0,airport_config,timestamp
0,D_26L_27R_A_26R_27L_28,2021-10-16T01:00:00
1,D_26L_27R_A_26R_27L_28,2021-10-16T01:20:00
2,D_26L_27R_A_26R_27L_28,2021-10-16T01:40:00
3,D_8R_9L_A_10_8L_9R,2021-10-16T02:00:00
4,D_26L_27R_A_26R_27L_28,2021-10-16T02:20:00
...,...,...
95,D_26L_27R_A_26R_27L_28,2021-10-17T08:40:00
96,D_26L_27R_A_26R_27L_28,2021-10-17T09:00:00
97,D_8R_9L_A_10_8L_9R,2021-10-17T09:20:00
98,D_8R_9L_A_10_8L_9R,2021-10-17T09:40:00


In [5]:
pd.read_csv("./runtime/data/ksea/ksea_lamp.csv.bz2")

Unnamed: 0,forecast_timestamp,temperature,wind_direction,wind_speed,wind_gust,cloud_ceiling,visibility,cloud,lightning_prob,precip,timestamp
0,2020-11-02T01:00:00,54.0,2.0,6.0,0.0,8.0,7.0,CL,N,False,2021-10-16T01:00:00
1,2020-11-01T20:00:00,53.0,22.0,6.0,0.0,8.0,7.0,SC,N,False,2021-10-16T01:20:00
2,2020-11-01T13:00:00,46.0,1.0,4.0,0.0,8.0,7.0,CL,N,False,2021-10-16T01:40:00
3,2020-11-02T03:00:00,54.0,9.0,4.0,0.0,8.0,7.0,CL,N,False,2021-10-16T02:00:00
4,2020-11-02T12:00:00,46.0,9.0,5.0,0.0,8.0,7.0,CL,N,False,2021-10-16T02:20:00
...,...,...,...,...,...,...,...,...,...,...,...
95,2020-11-02T12:00:00,53.0,16.0,3.0,0.0,8.0,7.0,CL,N,False,2021-10-17T08:40:00
96,2020-11-01T13:00:00,50.0,30.0,4.0,0.0,8.0,7.0,CL,N,False,2021-10-17T09:00:00
97,2020-11-03T02:00:00,58.0,9.0,3.0,0.0,8.0,7.0,FW,N,False,2021-10-17T09:20:00
98,2020-11-02T01:00:00,54.0,20.0,5.0,0.0,8.0,7.0,CL,N,False,2021-10-17T09:40:00


In [6]:
pd.read_csv("./data/ksea/ksea_lamp.csv.bz2")

Unnamed: 0,timestamp,forecast_timestamp,temperature,wind_direction,wind_speed,wind_gust,cloud_ceiling,visibility,cloud,lightning_prob,precip
0,2020-11-01T00:30:00,2020-11-01T01:00:00,53.0,36.0,8.0,0.0,8.0,7.0,BK,N,False
1,2020-11-01T00:30:00,2020-11-02T01:00:00,56.0,36.0,3.0,0.0,8.0,7.0,FW,N,False
2,2020-11-01T00:30:00,2020-11-02T00:00:00,58.0,32.0,2.0,0.0,8.0,7.0,FW,N,False
3,2020-11-01T00:30:00,2020-11-01T23:00:00,59.0,34.0,3.0,0.0,8.0,7.0,FW,N,False
4,2020-11-01T00:30:00,2020-11-01T22:00:00,58.0,34.0,4.0,0.0,8.0,7.0,FW,N,False
...,...,...,...,...,...,...,...,...,...,...,...
233220,2021-10-31T23:30:00,2021-11-01T03:00:00,49.0,4.0,7.0,0.0,8.0,7.0,CL,N,False
233221,2021-10-31T23:30:00,2021-11-01T02:00:00,51.0,1.0,7.0,0.0,8.0,7.0,CL,N,False
233222,2021-10-31T23:30:00,2021-11-01T01:00:00,54.0,35.0,6.0,0.0,8.0,7.0,CL,N,False
233223,2021-10-31T23:30:00,2021-11-01T13:00:00,43.0,6.0,5.0,0.0,8.0,7.0,CL,N,False
