In [2]:
import pandas as pd
from sklearn import datasets
from sklearn import linear_model
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np

from sklearn.metrics import log_loss, make_scorer, accuracy_score
from sklearn.preprocessing import scale
from sklearn.impute import SimpleImputer
from sklearn.ensemble import GradientBoostingClassifier
import pickle
from pathlib import Path


In [2]:
prediction_path = Path("./prediction.csv")

submission_format = pd.read_csv("../data/partial_submission_format.csv", parse_dates=["timestamp"])
test_data = pd.read_csv("./testing_data.csv")
airports = test_data["airport"].unique().tolist()

In [3]:
air="ksea"
airport_test_data = test_data.copy()
airport_test_data = airport_test_data[airport_test_data["airport"] == air]

possible_labels = pd.read_csv(f"{air}_possibel_config")["0"].values.tolist()

feature_cols = ["temperature", "wind_speed", "wind_gust", "cloud_ceiling", "visibility", \
              "cloud", "lightning_prob", "precip","wind_direction_cos", "wind_direction_sin", "depart1", "deaprt2", "depart3", "depart4", \
                    "arrive1", "arrive2", "arrive3", "arrive4", "lookahead"]
for i in range(len(possible_labels)):
    feature_cols.append('cur_config_hot'+str(i))

X = airport_test_data.loc[:, feature_cols]

imp = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=-10)
imp = imp.fit(X)
X = imp.transform(X)

model_file = open(f"{air}_trained_model.pkl", "rb")
model = pickle.load(model_file)
model_file.close()

predicted_probabilities = model.predict_proba(X).tolist()
to_add = np.setdiff1d(np.array(range(len(possible_labels))),model.classes_)
cur_classes = model.classes_
for i in range(len(predicted_probabilities)):
    for j in to_add:
        print(f"{air}")
        predicted_probabilities[i].insert(j, 1e-8)


predicted_probabilities = normalize(np.array(predicted_probabilities), axis=1, norm="l1")

# Check this code!
submission_format.loc[submission_format["airport"] == air, "active"] = predicted_probabilities.flatten()

In [4]:
submission_format.loc[submission_format["airport"] == air]["active"].sum()

9720.0

In [5]:
airport_test_data

Unnamed: 0.1,Unnamed: 0,airport,temperature,wind_speed,wind_gust,cloud_ceiling,visibility,cloud,lightning_prob,precip,...,cur_config_hot33,cur_config_hot34,cur_config_hot35,cur_config_hot36,cur_config_hot37,cur_config_hot38,cur_config_hot39,cur_config_hot40,cur_config_hot41,lookahead
87480,2479,ksea,46.0,8.0,0.0,6.0,7.0,10.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0
87481,2480,ksea,46.0,8.0,0.0,6.0,7.0,10.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0
87482,2481,ksea,46.0,8.0,0.0,6.0,7.0,10.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,90.0
87483,2482,ksea,46.0,8.0,0.0,6.0,7.0,10.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,120.0
87484,2483,ksea,47.0,10.0,0.0,6.0,7.0,10.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97195,2194,ksea,77.0,7.0,0.0,8.0,7.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,240.0
97196,2195,ksea,77.0,7.0,0.0,8.0,7.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,270.0
97197,2196,ksea,73.0,6.0,0.0,8.0,7.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,300.0
97198,2197,ksea,73.0,6.0,0.0,8.0,7.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,330.0


In [14]:
from sklearn.model_selection import train_test_split
possible_labels = pd.read_csv(f"{air}_possibel_config")["0"].values.tolist()
train = train[train["airport"] == air]
X = train.loc[:, feature_cols]
y = train.actual_label
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.03, random_state=12)
# Some weather data is missing :(
imp = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=-10)
imp = imp.fit(x_train)
x_train = imp.transform(x_train)
x_test = imp.transform(x_test)

In [15]:
predicted_probabilities = model.predict_proba(x_test).tolist()
to_add = np.setdiff1d(np.array(range(len(possible_labels))),model.classes_)
cur_classes = model.classes_
for i in range(len(predicted_probabilities)):
  for j in to_add:
     predicted_probabilities[i].insert(j, 1e-8)
y_test_onehot = []
for i in y_test:
  temp = [0]*(len(possible_labels))

  temp[int(i)] = 1
  y_test_onehot.append(temp)

predicted_probabilities = normalize(np.array(predicted_probabilities), axis=1, norm="l1")

score = log_loss(np.array(y_test_onehot).flatten(), predicted_probabilities.flatten())

In [16]:
score

0.03988808346905761

In [13]:
for i in y_test:
    print(i)

4.0
9.0
2.0
8.0
9.0
20.0
10.0
6.0
10.0
19.0
12.0
2.0
11.0
0.0
41.0
9.0
7.0
10.0
3.0
5.0
4.0
0.0
10.0
9.0
41.0
15.0
20.0
9.0
26.0
18.0
28.0
5.0
7.0
5.0
12.0
9.0
15.0
16.0
5.0
24.0
13.0
0.0
7.0
9.0
20.0
4.0
5.0
25.0
9.0
9.0
12.0
27.0
0.0
9.0
9.0
19.0
10.0
18.0
3.0
0.0
0.0
6.0
1.0
18.0
4.0
16.0
4.0
10.0
9.0
13.0
6.0
30.0
22.0
6.0
4.0
16.0
5.0
0.0
12.0
4.0
23.0
26.0
1.0
37.0
18.0
5.0
5.0
6.0
2.0
22.0
0.0
15.0
19.0
24.0
7.0
23.0
13.0
5.0
26.0
5.0
2.0
21.0
30.0
16.0
0.0
10.0
27.0
36.0
6.0
21.0
9.0
5.0
5.0
6.0
15.0
0.0
1.0
20.0
24.0
6.0
23.0
4.0
9.0
1.0
6.0
22.0
5.0
32.0
2.0
15.0
26.0
18.0
7.0
19.0
18.0
10.0
41.0
41.0
9.0
5.0
14.0
1.0
3.0
6.0
5.0
10.0
7.0
4.0
36.0
19.0
16.0
11.0
10.0
18.0
20.0
9.0
15.0
15.0
8.0
9.0
6.0
4.0
30.0
23.0
6.0
18.0
2.0
15.0
5.0
9.0
6.0
16.0
27.0
2.0
24.0
7.0
4.0
5.0
1.0
12.0
6.0
30.0
6.0
12.0
0.0
6.0
0.0
5.0
9.0
11.0
5.0
27.0
19.0
6.0
3.0
15.0
15.0
7.0
39.0
5.0
18.0
9.0
21.0
19.0
5.0
4.0
12.0
9.0
2.0
20.0
7.0
4.0
17.0
8.0
9.0
12.0
4.0
7.0
5.0
5.0
26.0
20.0
40.0
33.0

9.0
24.0
8.0
19.0
1.0
5.0
3.0
12.0
22.0
3.0
9.0
18.0
1.0
5.0
4.0
6.0
20.0
9.0
20.0
0.0
21.0
6.0
0.0
41.0
6.0
7.0
20.0
6.0
18.0
12.0
26.0
24.0
22.0
4.0
7.0
5.0
34.0
9.0
27.0
30.0
18.0
6.0
15.0
5.0
5.0
2.0
9.0
24.0
16.0
5.0
4.0
17.0
2.0
6.0
10.0
27.0
9.0
5.0
0.0
6.0
1.0
10.0
19.0
5.0
5.0
5.0
0.0
18.0
26.0
11.0
6.0
37.0
9.0
5.0
31.0
36.0
4.0
13.0
24.0
22.0
5.0
5.0
27.0
25.0
0.0
17.0
9.0
12.0
9.0
32.0
11.0
4.0
0.0
32.0
1.0
3.0
21.0
4.0
22.0
27.0
0.0
6.0
17.0
6.0
19.0
6.0
27.0
8.0
27.0
16.0
18.0
9.0
2.0
9.0
6.0
7.0
9.0
11.0
5.0
3.0
5.0
20.0
4.0
9.0
9.0
10.0
20.0
6.0
4.0
30.0
41.0
9.0
15.0
5.0
18.0
1.0
9.0
5.0
10.0
18.0
20.0
4.0
16.0
12.0
4.0
11.0
5.0
27.0
41.0
8.0
18.0
6.0
22.0
25.0
9.0
11.0
8.0
1.0
5.0
0.0
10.0
1.0
20.0
24.0
5.0
7.0
23.0
3.0
10.0
1.0
13.0
22.0
7.0
5.0
5.0
5.0
20.0
6.0
20.0
29.0
18.0
14.0
15.0
30.0
1.0
5.0
4.0
2.0
13.0
2.0
9.0
21.0
9.0
6.0
24.0
22.0
15.0
8.0
19.0
5.0
20.0
8.0
7.0
5.0
10.0
2.0
3.0
23.0
0.0
3.0
27.0
6.0
5.0
5.0
8.0
7.0
33.0
5.0
7.0
4.0
9.0
17.0
9.0
10.0
30.0


5.0
5.0
13.0
3.0
41.0
28.0
6.0
3.0
6.0
25.0
12.0
8.0
5.0
14.0
15.0
5.0
27.0
37.0
5.0
5.0
0.0
6.0
0.0
20.0
26.0
5.0
1.0
16.0
9.0
17.0
9.0
0.0
0.0
1.0
5.0
13.0
2.0
9.0
5.0
8.0
2.0
6.0
6.0
0.0
2.0
15.0
19.0
0.0
19.0
9.0
5.0
5.0
7.0
18.0
1.0
23.0
5.0
23.0
2.0
2.0
4.0
21.0
19.0
16.0
9.0
0.0
5.0
5.0
24.0
1.0
26.0
27.0
6.0
6.0
30.0
26.0
3.0
33.0
20.0
11.0
0.0
6.0
15.0
34.0
5.0
10.0
5.0
10.0
6.0
0.0
6.0
3.0
9.0
22.0
20.0
16.0
6.0
18.0
19.0
22.0
17.0
9.0
15.0
19.0
20.0
5.0
24.0
22.0
11.0
9.0
6.0
26.0
5.0
1.0
4.0
9.0
37.0
12.0
5.0
12.0
30.0
26.0
8.0
10.0
5.0
30.0
4.0
1.0
1.0
7.0
3.0
9.0
16.0
24.0
15.0
5.0
5.0
15.0
5.0
17.0
9.0
24.0
15.0
0.0
9.0
6.0
9.0
7.0
6.0
17.0
4.0
12.0
8.0
12.0
6.0
9.0
9.0
20.0
24.0
18.0
5.0
18.0
1.0
9.0
10.0
18.0
15.0
5.0
5.0
0.0
5.0
41.0
8.0
3.0
26.0
4.0
26.0
6.0
23.0
10.0
6.0
5.0
20.0
5.0
4.0
6.0
16.0
5.0
5.0
13.0
5.0
13.0
9.0
2.0
22.0
5.0
0.0
5.0
20.0
12.0
0.0
23.0
30.0
5.0
4.0
10.0
9.0
5.0
10.0
12.0
5.0
9.0
15.0
27.0
32.0
15.0
7.0
2.0
5.0
9.0
10.0
11.0
5.0
7.0
10.0
10.

9.0
5.0
5.0
18.0
6.0
9.0
18.0
26.0
19.0
9.0
7.0
32.0
0.0
4.0
8.0
5.0
1.0
27.0
27.0
14.0
5.0
2.0
6.0
8.0
19.0
9.0
2.0
0.0
5.0
17.0
0.0
18.0
11.0
5.0
10.0
6.0
5.0
5.0
34.0
1.0
8.0
30.0
11.0
20.0
23.0
19.0
4.0
6.0
10.0
13.0
5.0
20.0
7.0
0.0
6.0
5.0
6.0
10.0
5.0
5.0
5.0
2.0
41.0
2.0
9.0
9.0
3.0
9.0
22.0
5.0
1.0
15.0
5.0
6.0
8.0
27.0
3.0
26.0
9.0
9.0
12.0
29.0
9.0
7.0
1.0
9.0
5.0
18.0
19.0
2.0
15.0
0.0
19.0
18.0
5.0
5.0
8.0
10.0
15.0
6.0
18.0
5.0
16.0
6.0
1.0
37.0
6.0
30.0
16.0
5.0
2.0
9.0
6.0
30.0
3.0
6.0
16.0
11.0
1.0
7.0
9.0
6.0
5.0
23.0
9.0
6.0
2.0
6.0
19.0
2.0
8.0
4.0
9.0
9.0
5.0
6.0
15.0
5.0
16.0
16.0
31.0
5.0
26.0
0.0
3.0
4.0
9.0
29.0
9.0
37.0
3.0
5.0
9.0
9.0
18.0
9.0
9.0
5.0
9.0
12.0
37.0
8.0
9.0
22.0
5.0
18.0
5.0
26.0
8.0
9.0
5.0
9.0
6.0
19.0
20.0
2.0
0.0
5.0
1.0
20.0
18.0
0.0
24.0
28.0
5.0
0.0
0.0
5.0
1.0
0.0
7.0
10.0
10.0
6.0
4.0
0.0
20.0
1.0
9.0
5.0
17.0
13.0
5.0
5.0
1.0
32.0
9.0
1.0
4.0
5.0
4.0
30.0
6.0
5.0
5.0
5.0
9.0
15.0
20.0
6.0
5.0
5.0
5.0
5.0
3.0
5.0
5.0
19.0
9.0
2.0
9.0


22.0
11.0
22.0
6.0
25.0
12.0
9.0
19.0
27.0
5.0
10.0
11.0
5.0
0.0
0.0
5.0
19.0
0.0
5.0
23.0
5.0
19.0
2.0
14.0
4.0
7.0
27.0
5.0
4.0
29.0
4.0
3.0
7.0
5.0
5.0
5.0
9.0
7.0
5.0
9.0
27.0
10.0
23.0
9.0
16.0
6.0
15.0
0.0
9.0
0.0
23.0
36.0
1.0
1.0
12.0
9.0
7.0
6.0
10.0
13.0
9.0
4.0
15.0
2.0
12.0
24.0
0.0
5.0
5.0
16.0
5.0
5.0
33.0
9.0
18.0
16.0
9.0
5.0
1.0
5.0
2.0
20.0
9.0
5.0
9.0
6.0
20.0
18.0
0.0
2.0
5.0
10.0
12.0
4.0
19.0
1.0
5.0
8.0
12.0
9.0
4.0
5.0
9.0
9.0
15.0
4.0
9.0
10.0
8.0
30.0
4.0
3.0
2.0
20.0
22.0
0.0
27.0
20.0
10.0
16.0
15.0
5.0
5.0
7.0
6.0
4.0
23.0
9.0
1.0
21.0
18.0
24.0
27.0
31.0
27.0
2.0
17.0
3.0
3.0
3.0
9.0
20.0
5.0
6.0
5.0
26.0
18.0
5.0
14.0
1.0
8.0
16.0
9.0
2.0
23.0
26.0
41.0
29.0
22.0
37.0
22.0
0.0
26.0
19.0
9.0
26.0
22.0
19.0
5.0
5.0
26.0
0.0
23.0
0.0
19.0
5.0
8.0
5.0
41.0
0.0
26.0
9.0
8.0
19.0
12.0
6.0
9.0
0.0
20.0
30.0
10.0
38.0
2.0
23.0
5.0
11.0
4.0
9.0
19.0
9.0
6.0
2.0
5.0
16.0
9.0
4.0
15.0
19.0
19.0
9.0
6.0
0.0
0.0
5.0
5.0
19.0
13.0
0.0
20.0
37.0
3.0
18.0
27.0
8.0
37.0
9

12.0
0.0
5.0
20.0
1.0
9.0
0.0
8.0
37.0
9.0
4.0
12.0
3.0
9.0
18.0
5.0
3.0
35.0
29.0
9.0
3.0
14.0
5.0
9.0
25.0
8.0
23.0
3.0
10.0
6.0
21.0
9.0
0.0
5.0
9.0
6.0
1.0
5.0
5.0
7.0
3.0
12.0
22.0
3.0
20.0
15.0
18.0
10.0
18.0
12.0
6.0
10.0
2.0
19.0
4.0
23.0
1.0
21.0
6.0
6.0
20.0
14.0
10.0
6.0
9.0
4.0
7.0
18.0
6.0
15.0
13.0
9.0
15.0
6.0
35.0
9.0
18.0
2.0
2.0
7.0
5.0
0.0
6.0
5.0
5.0
6.0
18.0
1.0
9.0
6.0
17.0
26.0
30.0
9.0
26.0
1.0
35.0
22.0
9.0
10.0
25.0
5.0
8.0
11.0
5.0
9.0
8.0
16.0
1.0
0.0
9.0
41.0
1.0
0.0
18.0
15.0
14.0
5.0
5.0
4.0
20.0
0.0
9.0
5.0
5.0
23.0
1.0
5.0
19.0
23.0
19.0
2.0
12.0
3.0
0.0
26.0
8.0
41.0
5.0
6.0
10.0
9.0
15.0
37.0
4.0
23.0
7.0
14.0
9.0
14.0
5.0
5.0
10.0
5.0
20.0
5.0
20.0
14.0
7.0
5.0
9.0
9.0
6.0
9.0
5.0
6.0
26.0
16.0
21.0
0.0
20.0
7.0
9.0
5.0
21.0
5.0
10.0
6.0
17.0
5.0
19.0
13.0
9.0
9.0
9.0
20.0
5.0
0.0
9.0
12.0
4.0
1.0
19.0
30.0
1.0
10.0
7.0
21.0
2.0
4.0
26.0
5.0
26.0
9.0
19.0
12.0
6.0
19.0
4.0
15.0
6.0
1.0
6.0
5.0
20.0
5.0
6.0
5.0
0.0
8.0
22.0
5.0
2.0
16.0
24.0
4.0
9.0
5

22.0
41.0
5.0
28.0
8.0
2.0
11.0
15.0
1.0
37.0
5.0
10.0
17.0
0.0
23.0
23.0
41.0
15.0
17.0
4.0
26.0
1.0
9.0
16.0
11.0
5.0
24.0
3.0
18.0
20.0
9.0
5.0
18.0
24.0
5.0
2.0
17.0
5.0
4.0
24.0
6.0
26.0
12.0
1.0
10.0
30.0
8.0
10.0
5.0
17.0
8.0
34.0
5.0
5.0
6.0
7.0
24.0
22.0
0.0
26.0
17.0
5.0
1.0
15.0
12.0
16.0
26.0
22.0
5.0
12.0
11.0
0.0
5.0
9.0
6.0
9.0
18.0
15.0
5.0
10.0
9.0
3.0
6.0
9.0
2.0
7.0
19.0
5.0
23.0
0.0
26.0
9.0
19.0
2.0
19.0
24.0
37.0
20.0
12.0
1.0
5.0
2.0
2.0
17.0
4.0
17.0
6.0
15.0
9.0
32.0
6.0
1.0
26.0
5.0
11.0
4.0
5.0
21.0
27.0
36.0
7.0
3.0
26.0
26.0
9.0
3.0
31.0
13.0
3.0
9.0
9.0
1.0
11.0
5.0
5.0
17.0
26.0
10.0
15.0
25.0
0.0
8.0
5.0
9.0
19.0
7.0
3.0
16.0
18.0
20.0
19.0
7.0
5.0
5.0
4.0
5.0
1.0
2.0
2.0
6.0
6.0
24.0
27.0
2.0
41.0
28.0
6.0
38.0
6.0
19.0
7.0
13.0
28.0
6.0
6.0
6.0
0.0
5.0
6.0
5.0
4.0
7.0
5.0
10.0
30.0
30.0
26.0
4.0
20.0
8.0
5.0
5.0
26.0
28.0
9.0
6.0
26.0
8.0
10.0
2.0
15.0
9.0
15.0
15.0
20.0
5.0
25.0
15.0
7.0
13.0
26.0
6.0
18.0
32.0
7.0
18.0
18.0
3.0
1.0
26.0
24.0
15.0
15.

8.0
0.0
6.0
18.0
0.0
6.0
2.0
15.0
0.0
11.0
4.0
20.0
0.0
20.0
10.0
5.0
0.0
5.0
18.0
41.0
1.0
9.0
0.0
37.0
12.0
5.0
19.0
15.0
18.0
0.0
20.0
9.0
5.0
37.0
10.0
18.0
5.0
2.0
9.0
4.0
8.0
4.0
6.0
5.0
5.0
9.0
19.0
24.0
24.0
18.0
37.0
9.0
41.0
0.0
1.0
0.0
6.0
25.0
9.0
7.0
15.0
18.0
30.0
5.0
5.0
22.0
8.0
7.0
13.0
15.0
32.0
5.0
0.0
18.0
19.0
11.0
6.0
9.0
8.0
9.0
5.0
6.0
13.0
0.0
5.0
8.0
6.0
9.0
6.0
6.0
5.0
17.0
0.0
4.0
12.0
5.0
9.0
6.0
26.0
10.0
0.0
25.0
11.0
19.0
22.0
9.0
20.0
6.0
3.0
6.0
6.0
6.0
15.0
28.0
5.0
5.0
13.0
16.0
15.0
9.0
1.0
6.0
15.0
1.0
20.0
5.0
9.0
6.0
23.0
37.0
20.0
1.0
5.0
0.0
37.0
29.0
4.0
6.0
5.0
41.0
8.0
5.0
3.0
10.0
7.0
7.0
6.0
33.0
10.0
24.0
7.0
6.0
29.0
37.0
22.0
0.0
29.0
9.0
9.0
15.0
5.0
8.0
5.0
15.0
13.0
10.0
22.0
9.0
41.0
19.0
5.0
30.0
26.0
18.0
5.0
33.0
19.0
29.0
15.0
14.0
5.0
20.0
21.0
1.0
1.0
18.0
0.0
24.0
18.0
7.0
7.0
10.0
20.0
6.0
0.0
23.0
2.0
1.0
5.0
9.0
5.0
18.0
37.0
11.0
23.0
0.0
32.0
5.0
10.0
24.0
0.0
4.0
37.0
5.0
13.0
6.0
5.0
5.0
1.0
41.0
3.0
19.0
0.0
27.0
6.0


3.0
19.0
27.0
37.0
30.0
26.0
19.0
15.0
27.0
11.0
20.0
6.0
5.0
5.0
2.0
4.0
5.0
20.0
21.0
6.0
19.0
5.0
5.0
19.0
5.0
5.0
27.0
7.0
9.0
16.0
16.0
15.0
21.0
0.0
12.0
5.0
6.0
5.0
5.0
9.0
4.0
5.0
26.0
12.0
10.0
9.0
2.0
1.0
9.0
16.0
9.0
2.0
9.0
20.0
18.0
9.0
29.0
7.0
19.0
9.0
11.0
26.0
9.0
15.0
26.0
15.0
26.0
15.0
21.0
17.0
15.0
6.0
12.0
20.0
10.0
1.0
9.0
10.0
4.0
0.0
9.0
9.0
41.0
9.0
5.0
5.0
24.0
1.0
7.0
0.0
18.0
7.0
4.0
15.0
41.0
9.0
5.0
21.0
5.0
2.0
6.0
20.0
24.0
12.0
12.0
19.0
1.0
19.0
0.0
1.0
18.0
4.0
1.0
9.0
16.0
5.0
7.0
18.0
27.0
5.0
26.0
35.0
10.0
18.0
12.0
26.0
16.0
9.0
1.0
37.0
26.0
5.0
7.0
23.0
10.0
15.0
30.0
14.0
27.0
20.0
9.0
5.0
41.0
5.0
5.0
5.0
9.0
4.0
13.0
12.0
15.0
22.0
21.0
4.0
19.0
6.0
22.0
5.0
37.0
5.0
35.0
22.0
6.0
2.0
5.0
3.0
9.0
5.0
13.0
5.0
9.0
9.0
10.0
9.0
5.0
5.0
2.0
9.0
12.0
9.0
15.0
41.0
19.0
5.0
9.0
16.0
5.0
23.0
12.0
24.0
9.0
5.0
0.0
15.0
41.0
0.0
5.0
15.0
4.0
14.0
6.0
7.0
0.0
5.0
6.0
5.0
3.0
5.0
25.0
8.0
6.0
12.0
0.0
9.0
24.0
4.0
5.0
26.0
7.0
15.0
9.0
9.0
4.0
19.0

32.0
41.0
15.0
5.0
5.0
18.0
6.0
26.0
26.0
3.0
19.0
15.0
16.0
8.0
27.0
41.0
32.0
41.0
0.0
25.0
26.0
15.0
12.0
5.0
5.0
24.0
3.0
11.0
5.0
29.0
5.0
5.0
15.0
5.0
9.0
4.0
21.0
5.0
30.0
32.0
9.0
35.0
19.0
5.0
6.0
0.0
41.0
23.0
16.0
35.0
28.0
5.0
11.0
3.0
5.0
32.0
16.0
9.0
12.0
22.0
5.0
1.0
23.0
1.0
5.0
9.0
6.0
9.0
10.0
37.0
41.0
12.0
26.0
1.0
5.0
2.0
25.0
1.0
17.0
10.0
30.0
6.0
19.0
9.0
1.0
9.0
12.0
8.0
12.0
20.0
15.0
0.0
22.0
9.0
37.0
4.0
30.0
5.0
1.0
0.0
23.0
2.0
6.0
8.0
9.0
22.0
17.0
5.0
0.0
16.0
16.0
20.0
26.0
5.0
25.0
9.0
27.0
5.0
28.0
9.0
10.0
5.0
23.0
24.0
5.0
5.0
12.0
5.0
9.0
3.0
5.0
6.0
22.0
6.0
20.0
2.0
18.0
16.0
8.0
3.0
15.0
24.0
12.0
5.0
8.0
1.0
15.0
5.0
9.0
5.0
9.0
10.0
6.0
20.0
9.0
5.0
9.0
6.0
5.0
5.0
2.0
1.0
6.0
18.0
7.0
18.0
31.0
15.0
5.0
5.0
0.0
9.0
19.0
1.0
5.0
5.0
4.0
5.0
6.0
10.0
20.0
10.0
5.0
9.0
9.0
17.0
6.0
2.0
2.0
24.0
9.0
5.0
10.0
10.0
9.0
19.0
5.0
6.0
17.0
6.0
30.0
25.0
5.0
6.0
20.0
9.0
20.0
4.0
5.0
21.0
9.0
6.0
30.0
9.0
5.0
19.0
2.0
3.0
5.0
30.0
36.0
10.0
12.0
5.0
3

22.0
5.0
10.0
19.0
5.0
36.0
5.0
5.0
8.0
6.0
6.0
23.0
0.0
9.0
11.0
6.0
17.0
2.0
32.0
10.0
17.0
19.0
18.0
5.0
23.0
9.0
10.0
5.0
22.0
24.0
4.0
12.0
9.0
19.0
5.0
5.0
5.0
1.0
5.0
30.0
2.0
8.0
26.0
5.0
24.0
22.0
6.0
1.0
5.0
2.0
14.0
26.0
24.0
13.0
9.0
0.0
5.0
0.0
8.0
30.0
25.0
20.0
10.0
9.0
9.0
19.0
22.0
2.0
0.0
9.0
10.0
24.0
5.0
12.0
10.0
0.0
4.0
1.0
6.0
2.0
5.0
1.0
5.0
6.0
1.0
1.0
15.0
5.0
6.0
7.0
5.0
5.0
21.0
4.0
5.0
3.0
7.0
5.0
22.0
3.0
10.0
18.0
0.0
5.0
5.0
6.0
5.0
5.0
5.0
5.0
10.0
20.0
18.0
19.0
41.0
6.0
9.0
9.0
2.0
6.0
18.0
9.0
5.0
2.0
7.0
2.0
13.0
0.0
5.0
0.0
5.0
0.0
5.0
1.0
9.0
32.0
6.0
16.0
12.0
4.0
5.0
19.0
1.0
5.0
5.0
10.0
22.0
7.0
4.0
19.0
5.0
5.0
4.0
1.0
20.0
4.0
6.0
10.0
27.0
5.0
26.0
5.0
26.0
11.0
1.0
0.0
4.0
22.0
12.0
5.0
10.0
5.0
10.0
5.0
7.0
33.0
41.0
6.0
6.0
10.0
2.0
5.0
5.0
5.0
37.0
24.0
4.0
9.0
9.0
1.0
32.0
6.0
4.0
5.0
0.0
6.0
16.0
0.0
6.0
9.0
6.0
26.0
10.0
5.0
5.0
19.0
9.0
18.0
5.0
30.0
3.0
24.0
5.0
30.0
27.0
5.0
3.0
21.0
24.0
5.0
18.0
5.0
19.0
20.0
5.0
7.0
21.0
12.0
9

20.0
33.0
16.0
5.0
5.0
26.0
6.0
40.0
8.0
2.0
32.0
15.0
4.0
5.0
5.0
30.0
19.0
5.0
18.0
10.0
20.0
15.0
22.0
26.0
38.0
11.0
2.0
9.0
16.0
27.0
19.0
5.0
30.0
2.0
5.0
5.0
9.0
41.0
30.0
9.0
27.0
14.0
9.0
6.0
22.0
19.0
8.0
26.0
19.0
19.0
6.0
21.0
9.0
6.0
10.0
0.0
30.0
10.0
19.0
26.0
18.0
9.0
17.0
5.0
5.0
8.0
20.0
9.0
14.0
20.0
19.0
5.0
9.0
26.0
7.0
27.0
6.0
36.0
19.0
5.0
19.0
19.0
5.0
1.0
20.0
4.0
5.0
20.0
27.0
10.0
27.0
1.0
8.0
27.0
10.0
9.0
5.0
4.0
12.0
10.0
5.0
23.0
5.0
10.0
25.0
32.0
7.0
36.0
5.0
4.0
9.0
5.0
15.0
37.0
10.0
27.0
1.0
6.0
20.0
1.0
10.0
26.0
12.0
11.0
15.0
8.0
41.0
9.0
41.0
12.0
5.0
8.0
5.0
19.0
16.0
1.0
5.0
5.0
9.0
9.0
4.0
2.0
19.0
5.0
1.0
0.0
2.0
5.0
19.0
5.0
4.0
20.0
16.0
22.0
9.0
11.0
5.0
15.0
1.0
26.0
8.0
12.0
20.0
16.0
9.0
5.0
6.0
7.0
1.0
5.0
4.0
5.0
19.0
10.0
5.0
37.0
5.0
10.0
5.0
9.0
5.0
12.0
2.0
6.0
0.0
9.0
30.0
1.0
5.0
5.0
30.0
8.0
5.0
41.0
6.0
7.0
19.0
9.0
28.0
4.0
6.0
2.0
15.0
11.0
6.0
26.0
15.0
9.0
11.0
2.0
5.0
7.0
11.0
9.0
26.0
5.0
6.0
2.0
9.0
24.0
2.0
13.0
37.0


In [16]:
train = pd.read_csv("../LR_training/training_data.csv")
for air in airports:
    train_port = train[train["airport"] == air]
    possible_labels = pd.read_csv(f"{air}_possibel_config")["0"].values.tolist()
    for i in range(len(possible_labels)):
        print(air+" "+str(i)+" "+ str(train_port[f"cur_config_hot{i}"].sum()))

katl 0 492.0
katl 1 528.0
katl 2 768.0
katl 3 600.0
katl 4 1104.0
katl 5 1092.0
katl 6 240.0
katl 7 564.0
katl 8 768.0
katl 9 38688.0
katl 10 780.0
katl 11 432.0
katl 12 588.0
katl 13 1032.0
katl 14 264.0
katl 15 408.0
katl 16 720.0
katl 17 1056.0
katl 18 228.0
katl 19 27348.0
katl 20 336.0
katl 21 456.0
katl 22 528.0
katl 23 408.0
katl 24 996.0
katl 25 0.0
katl 26 8712.0
kclt 0 3276.0
kclt 1 24804.0
kclt 2 1896.0
kclt 3 804.0
kclt 4 3096.0
kclt 5 1008.0
kclt 6 40284.0
kclt 7 5556.0
kclt 8 1152.0
kclt 9 816.0
kclt 10 1212.0
kclt 11 3924.0
kclt 12 1308.0
kden 0 1488.0
kden 1 3108.0
kden 2 1296.0
kden 3 396.0
kden 4 780.0
kden 5 540.0
kden 6 7908.0
kden 7 3348.0
kden 8 480.0
kden 9 2940.0
kden 10 4212.0
kden 11 1584.0
kden 12 468.0
kden 13 1728.0
kden 14 432.0
kden 15 1656.0
kden 16 444.0
kden 17 2352.0
kden 18 1884.0
kden 19 1668.0
kden 20 1536.0
kden 21 492.0
kden 22 372.0
kden 23 1200.0
kden 24 7620.0
kden 25 3900.0
kden 26 2328.0
kden 27 6612.0
kden 28 2064.0
kden 29 888.0
kden 30 33

In [19]:
config = pd.read_csv("../data/katl/katl_airport_config.csv.bz2")

In [22]:
config[config["airport_config"]=="D_9L_A_9R"]

Unnamed: 0,timestamp,airport_config
12347,2021-10-28T03:53:00,D_9L_A_9R


In [23]:
opened = pd.read_csv("../data/partial_submission_format.csv", parse_dates=["timestamp"])

In [24]:
start_time = opened["timestamp"].min()

In [25]:
end_time = opened["timestamp"].max()

In [26]:
start_time

Timestamp('2021-01-04 06:00:00')

In [27]:
end_time

Timestamp('2021-06-20 23:00:00')

In [28]:
end_time-start_time

Timedelta('167 days 17:00:00')

In [30]:
(167*24+17)*60/30

8050.0

In [31]:
possible_labels

['ksea:D_16C_A_16C_16R',
 'ksea:D_16C_A_16L_16R',
 'ksea:D_16L_A_16C',
 'ksea:D_16L_A_16C_16L',
 'ksea:D_16L_A_16C_16R',
 'ksea:D_16L_A_16L_16R',
 'ksea:D_16L_A_16R',
 'ksea:D_34C_A_34C_34L',
 'ksea:D_34R_A_34C',
 'ksea:D_34R_A_34C_34R',
 'ksea:D_34R_A_34L_34R',
 'ksea:other']

In [33]:
airports

['katl',
 'kclt',
 'kden',
 'kdfw',
 'kjfk',
 'kmem',
 'kmia',
 'kord',
 'kphx',
 'ksea']

In [40]:
for air in airports:
    possible_labels = pd.read_csv(f"{air}_possibel_config")["0"].values.tolist()
    actual_possible_labels=submission_format[submission_format["airport"]==air]["config"].unique().tolist()
    print(possible_labels == actual_possible_labels)

True
True
True
True
True
True
True
True
True
True


In [43]:
submission_format

Unnamed: 0,airport,timestamp,lookahead,config,active
0,katl,2021-01-04 06:00:00,30,katl:D_10_8L_A_10_8L,3.703704e-02
1,katl,2021-01-04 06:00:00,30,katl:D_10_8R_9L_A_10_8L_9R,3.703704e-02
2,katl,2021-01-04 06:00:00,30,katl:D_10_8R_A_10_8R,3.703704e-02
3,katl,2021-01-04 06:00:00,30,katl:D_26L_27L_A_26R_27L_28,3.703704e-02
4,katl,2021-01-04 06:00:00,30,katl:D_26L_27R_28_A_26R_27L_28,3.703704e-02
...,...,...,...,...,...
2468875,ksea,2021-06-20 23:00:00,360,ksea:D_34C_A_34C_34L,1.213033e-07
2468876,ksea,2021-06-20 23:00:00,360,ksea:D_34R_A_34C,5.491238e-09
2468877,ksea,2021-06-20 23:00:00,360,ksea:D_34R_A_34C_34R,9.114326e-05
2468878,ksea,2021-06-20 23:00:00,360,ksea:D_34R_A_34L_34R,9.928730e-01


In [4]:
cur_config = pd.read_csv(f"../data/katl/katl_airport_config.csv.bz2", parse_dates=["timestamp"])

In [5]:
cur_config

Unnamed: 0,timestamp,airport_config
0,2020-11-01 01:11:00,D_8R_9L_A_10_8L_9R
1,2020-11-01 01:57:00,D_8R_9L_A_10_8L_9R
2,2020-11-01 02:53:00,D_8R_9L_A_10_8L_9R
3,2020-11-01 03:54:00,D_8R_9L_A_10_8L_9R
4,2020-11-01 04:52:00,D_8R_9L_A_10_8L_9R
...,...,...
12426,2021-10-31 20:56:00,D_26L_27R_A_26R_27L_28
12427,2021-10-31 21:53:00,D_26L_27R_A_26R_27L_28
12428,2021-10-31 22:04:00,D_26L_27R_A_26R_27L_28
12429,2021-10-31 22:54:00,D_26L_27R_A_26R_27L_28


In [6]:
cur_config[cur_config["airport_config"]=="D_9L_A_9R"]

Unnamed: 0,timestamp,airport_config
12347,2021-10-28 03:53:00,D_9L_A_9R


In [7]:
cur_config.loc[12340:12350]

Unnamed: 0,timestamp,airport_config
12340,2021-10-27 22:52:00,D_26L_27R_8R_9L_A_10_8L_9R
12341,2021-10-27 23:30:00,D_8R_9L_A_10_8L_9R
12342,2021-10-27 23:53:00,D_8R_9L_A_10_8L_9R
12343,2021-10-28 00:52:00,D_8R_9L_A_10_8L_9R
12344,2021-10-28 01:52:00,D_8R_9L_A_10_8L_9R
12345,2021-10-28 03:01:00,D_8R_9L_A_10_8L_9R
12346,2021-10-28 03:27:00,D_8R_9L_A_10_8L_9R
12347,2021-10-28 03:53:00,D_9L_A_9R
12348,2021-10-29 16:53:00,D_26L_27R_A_26R_27L_28
12349,2021-10-29 17:40:00,D_26L_27R_A_26R_27L_28


In [8]:
1/12347

8.099133392726978e-05

In [9]:
cur_config.value_counts().sort_values(ascending=False)

timestamp            airport_config        
2020-11-01 01:11:00  D_8R_9L_A_10_8L_9R        1
2020-11-02 08:56:00  D_26L_27R_A_26R_27L_28    1
2020-11-01 16:31:00  D_26L_27R_A_26R_27L_28    1
2020-11-01 09:06:00  D_8L_9L_A_10_8L_9R        1
2020-11-01 02:53:00  D_8R_9L_A_10_8L_9R        1
                                              ..
2021-10-31 19:59:00  D_26L_27R_A_26R_27L_28    1
2021-10-31 20:56:00  D_26L_27R_A_26R_27L_28    1
2021-10-31 21:53:00  D_26L_27R_A_26R_27L_28    1
2021-10-31 22:04:00  D_26L_27R_A_26R_27L_28    1
2021-10-31 23:53:00  D_26L_27R_A_26R_27L_28    1
Length: 12431, dtype: int64

In [11]:
config_timecourse = (
    cur_config.set_index("timestamp")
    .airport_config.resample("15min")
    .ffill()
    .dropna()
)
config_timecourse.value_counts().sort_values(ascending=False)

D_26L_27R_A_26R_27L_28       15761
D_8R_9L_A_10_8L_9R           10317
D_26L_27R_28_A_26R_27L_28      464
D_26L_27R_A_26L_27L_28         409
D_26L_28_A_26L_28              403
                             ...  
D_27L_27R_A_27L_28               1
D_10_9R_A_10_8L_9R               1
D_26L_27R_28_A_26R_27R_28        1
D_27R_8R_9L_A_10_9R              1
D_8R_A_10_8L                     1
Name: airport_config, Length: 156, dtype: int64

In [16]:
config_timecourse.value_counts()["D_9L_A_9R"]

148

In [17]:
config_timecourse.value_counts().sum()

35035

In [18]:
148/35035

0.004224347081489939

In [24]:
labeled_submission = pd.read_csv("../data/prescreened_train_labels.csv.bz2")

In [25]:
labeled_submission[labeled_submission["airport"]=="katl"]

Unnamed: 0,airport,timestamp,lookahead,config,active
0,katl,2020-11-06T23:00:00,30,katl:D_10_8L_A_10_8L,0.0
1,katl,2020-11-06T23:00:00,30,katl:D_10_8R_9L_A_10_8L_9R,0.0
2,katl,2020-11-06T23:00:00,30,katl:D_10_8R_A_10_8R,0.0
3,katl,2020-11-06T23:00:00,30,katl:D_26L_27L_A_26R_27L_28,0.0
4,katl,2020-11-06T23:00:00,30,katl:D_26L_27R_28_A_26R_27L_28,0.0
...,...,...,...,...,...
2678827,katl,2021-10-17T10:00:00,360,katl:D_8R_9L_A_8L_9R,0.0
2678828,katl,2021-10-17T10:00:00,360,katl:D_8R_9L_A_8R_9L,0.0
2678829,katl,2021-10-17T10:00:00,360,katl:D_8R_9R_A_10_8L_9R,0.0
2678830,katl,2021-10-17T10:00:00,360,katl:D_9L_A_9R,0.0


In [23]:
labeled_submission[labeled_submission["airport"]=="katl"].iloc[10000:10020]

Unnamed: 0,airport,timestamp,lookahead,config,active
10000,katl,2021-01-05T12:00:00,330,katl:D_26L_27R_A_26R_27R_28,0.037037
10001,katl,2021-01-05T12:00:00,330,katl:D_26L_27R_A_26R_28,0.037037
10002,katl,2021-01-05T12:00:00,330,katl:D_26L_27R_A_27L_28,0.037037
10003,katl,2021-01-05T12:00:00,330,katl:D_26L_28_A_26L_28,0.037037
10004,katl,2021-01-05T12:00:00,330,katl:D_26L_28_A_26R_27L_28,0.037037
10005,katl,2021-01-05T12:00:00,330,katl:D_26L_28_A_26R_28,0.037037
10006,katl,2021-01-05T12:00:00,330,katl:D_26R_27R_A_26R_27L_28,0.037037
10007,katl,2021-01-05T12:00:00,330,katl:D_26R_28_A_26R_28,0.037037
10008,katl,2021-01-05T12:00:00,330,katl:D_8L_9L_A_10_8L_9R,0.037037
10009,katl,2021-01-05T12:00:00,330,katl:D_8R_9L_A_10_8L_9R,0.037037


In [29]:
(0*4+(148/35035)*5)/9

0.002346859489716633

In [32]:
2/365

0.005479452054794521

148/35035