In [1]:
import pandas as pd
from sklearn import datasets
from sklearn import linear_model
import datetime as dt
import numpy as np

def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))


open_training_labels = pd.read_csv("../data/open_train_labels.csv.bz2", parse_dates=["timestamp"], compression = "bz2")
airports = open_training_labels["airport"].unique().tolist()
start_time = open_training_labels["timestamp"].min()
max_labels = 0
needed_reports = dict()

In [7]:
for air in airports:
    possible_labels = open_training_labels[open_training_labels["airport"] == air]["config"].unique()
    if max_labels < len(possible_labels.tolist()):
        max_labels = len(possible_labels.tolist())
    possible_config = possible_labels.copy()
    counter = 0
    for i in possible_labels:
        possible_config[counter] = i.split(":")[1]
        counter += 1
    cur_config = pd.read_csv(f"../data/{air}/{air}_airport_config.csv.bz2", parse_dates=["timestamp"])
    possible_config = cur_config["airport_config"].unique()
    
    arrivals = pd.read_csv(f"./{air}_landing_dist.csv")
    departures = pd.read_csv(f"./{air}_take_off_dist.csv")
    dpTimeStamps = []
    arTimeStamps = []
    for i, r in arrivals.iterrows():
        arTimeStamps.append(start_time+dt.timedelta(minutes=i*30))
    for i, r in departures.iterrows():
        dpTimeStamps.append(start_time+dt.timedelta(minutes=i*30))
    arrivals["timestamp"] = arTimeStamps
    departures["timestamp"] = dpTimeStamps

    weather_file = f"../data/{air}/{air}_lamp.csv.bz2"
    weather_data = pd.read_csv(weather_file, parse_dates=["timestamp", "forecast_timestamp"])

    weather_data["cloud"] = 0*(weather_data["cloud"]=="CL")+ \
                                    1*(weather_data["cloud"]=="FW") + \
                                    5*(weather_data["cloud"]=="SC") + \
                                        9*(weather_data["cloud"]=="BK")+ \
                                            10*(weather_data["cloud"]=="OV")
    weather_data["lightning_prob"] = 1*(weather_data["lightning_prob"]=="L") + \
                                                    2*(weather_data["lightning_prob"]=="M") + \
                                                        3*(weather_data["lightning_prob"]=="H")
    weather_data["precip"] = (weather_data["precip"])*1
    weather_data["wind_direction_cos"] = np.cos(weather_data["wind_direction"]*10*np.pi/360)
    weather_data["wind_direction_sin"] = np.sin(weather_data["wind_direction"]*10*np.pi/360)
    weather_data = weather_data.drop('wind_direction', 1)
    pd.DataFrame(possible_labels).to_csv(f"{air}_possibel_config")
    needed_reports[air] = (weather_data, departures, arrivals,cur_config, possible_config, possible_labels)
    print("Preprocess stage 2: "+str(air))



Preprocess stage 2: katl
Preprocess stage 2: kclt
Preprocess stage 2: kden
Preprocess stage 2: kdfw
Preprocess stage 2: kjfk
Preprocess stage 2: kmem
Preprocess stage 2: kmia
Preprocess stage 2: kord
Preprocess stage 2: kphx
Preprocess stage 2: ksea


In [8]:
possible_labels

Unnamed: 0,timestamp,forecast_timestamp,temperature,wind_speed,wind_gust,cloud_ceiling,visibility,cloud,lightning_prob,precip,wind_direction_cos,wind_direction_sin
0,2020-11-01 00:30:00,2020-11-01 01:00:00,53.0,8.0,0.0,8.0,7.0,9,0,0,-1.000000,1.224647e-16
1,2020-11-01 00:30:00,2020-11-02 01:00:00,56.0,3.0,0.0,8.0,7.0,1,0,0,-1.000000,1.224647e-16
2,2020-11-01 00:30:00,2020-11-02 00:00:00,58.0,2.0,0.0,8.0,7.0,1,0,0,-0.939693,3.420201e-01
3,2020-11-01 00:30:00,2020-11-01 23:00:00,59.0,3.0,0.0,8.0,7.0,1,0,0,-0.984808,1.736482e-01
4,2020-11-01 00:30:00,2020-11-01 22:00:00,58.0,4.0,0.0,8.0,7.0,1,0,0,-0.984808,1.736482e-01
...,...,...,...,...,...,...,...,...,...,...,...,...
233220,2021-10-31 23:30:00,2021-11-01 03:00:00,49.0,7.0,0.0,8.0,7.0,0,0,0,0.939693,3.420201e-01
233221,2021-10-31 23:30:00,2021-11-01 02:00:00,51.0,7.0,0.0,8.0,7.0,0,0,0,0.996195,8.715574e-02
233222,2021-10-31 23:30:00,2021-11-01 01:00:00,54.0,6.0,0.0,8.0,7.0,0,0,0,-0.996195,8.715574e-02
233223,2021-10-31 23:30:00,2021-11-01 13:00:00,43.0,5.0,0.0,8.0,7.0,0,0,0,0.866025,5.000000e-01


In [9]:

training_file = pd.DataFrame({"airport": [], "temperature": [], "wind_speed": [], "wind_gust": [], "cloud_ceiling": [], "visibility": [], \
                                "cloud": [], "lightning_prob": [], "precip": [],"wind_direction_cos": [], "wind_direction_sin": [], "depart1": [], "deaprt2": [], "depart3": [], "depart4": [], \
                                    "arrive1": [], "arrive2": [], "arrive3": [], "arrive4": [], "lookahead": [], "actual_label": []})
for i in range(max_labels):
    training_file.insert(18+i, 'cur_config_hot'+str(i), [])


In [5]:
training_file.shape

3.141592653589793

In [10]:

opened = opened[opened["active"]==1]

training_file.to_csv("training_data.csv")

counter = 0
for i, r in opened.iterrows():

    label = r["config"]
    timestamp = r["timestamp"]
    lookahead = r["lookahead"]

    


    # Remove for "testing" code
    mask = needed_reports[r["airport"]][0]["timestamp"] <= timestamp
    masked_weather_data = needed_reports[r["airport"]][0][mask]

    # Get weather data
    latest_weather_intercept = masked_weather_data[masked_weather_data["timestamp"] == masked_weather_data["timestamp"].max()]

    get_nearest = latest_weather_intercept[latest_weather_intercept["forecast_timestamp"] == nearest(latest_weather_intercept["forecast_timestamp"], timestamp + dt.timedelta(minutes=lookahead))]
    weather_features = get_nearest.values.tolist()[0][2:]

    # Get latest take-off and landing estimates
    departure_reports = needed_reports[r["airport"]][1]
    mask = departure_reports["timestamp"] <= timestamp
    masked_departure = departure_reports[mask]
    latest_departure_report = masked_departure[masked_departure["timestamp"] == masked_departure["timestamp"].max()]
    departure_features = latest_departure_report.values.tolist()[0][int(lookahead/30-1):int(lookahead/30-1)+4]
    arrival_reports = needed_reports[r["airport"]][2]
    mask = arrival_reports["timestamp"] <= timestamp
    masked_arrival = arrival_reports[mask]
    latest_arrival_report = masked_arrival[masked_arrival["timestamp"] == masked_arrival["timestamp"].max()]
    arrival_features = latest_arrival_report.values.tolist()[0][int(lookahead/30-1):int(lookahead/30-1)+4]
    # Get current configiration
    cur_config = needed_reports[r["airport"]][3]
    mask = cur_config["timestamp"] <= timestamp
    masked_cur_config = cur_config[mask]
    latest_cur_config = masked_cur_config[masked_cur_config["timestamp"] == masked_cur_config["timestamp"].max()]
    cur_config_feature = [0]*max_labels
    cur_config_index = np.where(needed_reports[r["airport"]][4]==latest_cur_config.values.tolist()[0][1])
    cur_config_index = cur_config_index[0][0] if len(cur_config_index[0])>0 else r["airport"][4].size-1
    cur_config_feature[cur_config_index] = 1

    total_features = [r["airport"]] + weather_features + departure_features + arrival_features + cur_config_feature + [r["lookahead"], np.where(needed_reports[r["airport"]][5]==r["config"])[0][0]]
    training_file.loc[len(training_file)] = (total_features)
    if counter % 50 == 0:
        print(str(counter) + " out of " + str(opened.shape[0]))
        training_file.to_csv('training_data.csv', mode='a', index=True, header=False)
        training_file = pd.DataFrame({"airport": [], "temperature": [], "wind_direction": [], "wind_speed": [], "wind_gust": [], "cloud_ceiling": [], "visibility": [], \
                                "cloud": [], "lightning_prob": [], "precip": [], "depart1": [], "deaprt2": [], "depart3": [], "depart4": [], \
                                    "arrive1": [], "arrive2": [], "arrive3": [], "arrive4": [], "lookahead": [], "actual_label": []})
        for k in range(max_labels):
            training_file.insert(18+k, 'cur_config_hot'+str(k), [])
    counter +=1
    if counter % 200 == 0:
        break
training_file.to_csv('training_data.csv', mode='a', index=True, header=False)

NameError: name 'opened' is not defined

In [89]:
total_features

['katl',
 67.0,
 8.0,
 10.0,
 16.0,
 6.0,
 7.0,
 10,
 0,
 0,
 26,
 24,
 5,
 19,
 24,
 24,
 29,
 38,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 240,
 19]

In [None]:
(training_file).to_csv("training_data.csv")

In [6]:
import pandas as pd
train = pd.read_csv("training_data.csv")

In [91]:
pd.read_csv('training_data.csv')

Unnamed: 0.1,Unnamed: 0,airport,temperature,wind_direction,wind_speed,wind_gust,cloud_ceiling,visibility,cloud,lightning_prob,...,cur_config_hot34,cur_config_hot35,cur_config_hot36,cur_config_hot37,cur_config_hot38,cur_config_hot39,cur_config_hot40,cur_config_hot41,lookahead,actual_label
0,0,katl,66.0,8.0,13.0,0.0,4.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,240.0,19.0
1,1,katl,66.0,8.0,13.0,0.0,4.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,270.0,19.0
2,2,katl,68.0,8.0,12.0,19.0,4.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,300.0,19.0
3,3,katl,68.0,8.0,12.0,19.0,4.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,330.0,19.0
4,4,katl,69.0,8.0,12.0,18.0,4.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,360.0,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,44,katl,66.0,8.0,11.0,0.0,5.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,120.0,19.0
294,45,katl,66.0,8.0,11.0,0.0,5.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,19.0
295,46,katl,66.0,8.0,11.0,17.0,5.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,180.0,19.0
296,47,katl,67.0,8.0,10.0,16.0,6.0,7.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,210.0,19.0


In [4]:
import pandas as pd
my_data = pd.read_csv("training_data.csv")
open_data = pd.read_csv("../data/open_train_labels.csv.bz2", parse_dates=["timestamp"])
open_data

Unnamed: 0,airport,timestamp,lookahead,config,active
0,katl,2020-11-06 23:00:00,30,katl:D_10_8L_A_10_8L,0.0
1,katl,2020-11-06 23:00:00,30,katl:D_10_8R_9L_A_10_8L_9R,0.0
2,katl,2020-11-06 23:00:00,30,katl:D_10_8R_A_10_8R,0.0
3,katl,2020-11-06 23:00:00,30,katl:D_26L_27L_A_26R_27L_28,0.0
4,katl,2020-11-06 23:00:00,30,katl:D_26L_27R_28_A_26R_27L_28,0.0
...,...,...,...,...,...
22640539,ksea,2021-10-17 10:00:00,360,ksea:D_34C_A_34C_34L,0.0
22640540,ksea,2021-10-17 10:00:00,360,ksea:D_34R_A_34C,0.0
22640541,ksea,2021-10-17 10:00:00,360,ksea:D_34R_A_34C_34R,0.0
22640542,ksea,2021-10-17 10:00:00,360,ksea:D_34R_A_34L_34R,0.0


In [5]:
open_data = open_data[open_data["active"] == 1]

In [6]:
open_data.shape

(891360, 5)

In [7]:
my_data.shape

(891360, 21)

In [12]:
my_data = pd.read_csv("training_data.csv")
my_data.insert(0, "airport", open_data["airport"].tolist())

In [13]:
my_data

Unnamed: 0.1,airport,Unnamed: 0,temperature,wind_direction,wind_speed,wind_gust,cloud_ceiling,visibility,cloud,lightning_prob,...,deaprt2,depart3,depart4,arrive1,arrive2,arrive3,arrive4,current_config,lookahead,actual_label
0,katl,0,69.0,7.0,8.0,0.0,8.0,7.0,5.0,0.0,...,10.0,39.0,47.0,0.0,30.0,43.0,61.0,D_8R_9L_A_10_8L_9R,30.0,katl:D_8R_9L_A_10_8L_9R
1,katl,1,66.0,7.0,9.0,0.0,8.0,7.0,5.0,0.0,...,39.0,47.0,13.0,30.0,43.0,61.0,43.0,D_8R_9L_A_10_8L_9R,60.0,katl:D_8R_9L_A_10_8L_9R
2,katl,2,64.0,7.0,9.0,0.0,8.0,7.0,1.0,0.0,...,47.0,13.0,31.0,43.0,61.0,43.0,54.0,D_8R_9L_A_10_8L_9R,90.0,katl:D_8R_9L_A_10_8L_9R
3,katl,3,64.0,7.0,9.0,0.0,8.0,7.0,1.0,0.0,...,13.0,31.0,21.0,61.0,43.0,54.0,40.0,D_8R_9L_A_10_8L_9R,120.0,katl:D_8R_9L_A_10_8L_9R
4,katl,4,64.0,7.0,9.0,0.0,8.0,7.0,1.0,0.0,...,31.0,21.0,12.0,43.0,54.0,40.0,28.0,D_8R_9L_A_10_8L_9R,150.0,katl:D_8R_9L_A_10_8L_9R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
891355,ksea,891355,52.0,16.0,5.0,0.0,7.0,7.0,10.0,0.0,...,1.0,6.0,13.0,3.0,1.0,10.0,14.0,D_16L_A_16L_16R,240.0,ksea:D_16L_A_16L_16R
891356,ksea,891356,53.0,17.0,5.0,0.0,7.0,7.0,10.0,0.0,...,6.0,13.0,10.0,1.0,10.0,14.0,14.0,D_16L_A_16L_16R,270.0,ksea:D_16L_A_16L_16R
891357,ksea,891357,53.0,17.0,5.0,0.0,7.0,7.0,10.0,0.0,...,13.0,10.0,28.0,10.0,14.0,14.0,8.0,D_16L_A_16L_16R,300.0,ksea:D_16L_A_16L_16R
891358,ksea,891358,54.0,15.0,4.0,0.0,6.0,7.0,10.0,0.0,...,10.0,28.0,14.0,14.0,14.0,8.0,14.0,D_16L_A_16L_16R,330.0,ksea:D_16L_A_16L_16R


In [10]:
open_data["airport"]

19          katl
46          katl
73          katl
100         katl
127         katl
            ... 
22640489    ksea
22640501    ksea
22640513    ksea
22640525    ksea
22640537    ksea
Name: airport, Length: 891360, dtype: object

In [14]:
my_data.to_csv("training_data.csv")

# New stuff

In [3]:
import pandas as pd
from sklearn import datasets
from sklearn import linear_model
import datetime as dt
import numpy as np

def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))


open_training_labels = pd.read_csv("../data/open_train_labels.csv.bz2", parse_dates=["timestamp"], compression = "bz2")
airports = open_training_labels["airport"].unique().tolist()
start_time = open_training_labels["timestamp"].min()
max_labels = 0
needed_reports = dict()



In [4]:
for air in airports:
    possible_labels = open_training_labels[open_training_labels["airport"] == air]["config"].unique()
    if max_labels < len(possible_labels.tolist()):
        max_labels = len(possible_labels.tolist())
    possible_config = possible_labels.copy()
    counter = 0
    for i in possible_labels:
        possible_config[counter] = i.split(":")[1]
        counter += 1
    cur_config = pd.read_csv(f"../data/{air}/{air}_airport_config.csv.bz2", parse_dates=["timestamp"])
    
    arrivals = pd.read_csv(f"./{air}_landing_dist.csv")
    departures = pd.read_csv(f"./{air}_take_off_dist.csv")
    dpTimeStamps = []
    arTimeStamps = []
    for i, r in arrivals.iterrows():
        arTimeStamps.append(start_time+dt.timedelta(minutes=i*30))
    for i, r in departures.iterrows():
        dpTimeStamps.append(start_time+dt.timedelta(minutes=i*30))
    arrivals["timestamp"] = arTimeStamps
    departures["timestamp"] = dpTimeStamps

    weather_file = f"../data/{air}/{air}_lamp.csv.bz2"
    weather_data = pd.read_csv(weather_file, parse_dates=["timestamp", "forecast_timestamp"])

    weather_data["cloud"] = 0*(weather_data["cloud"]=="CL")+ \
                                    1*(weather_data["cloud"]=="FW") + \
                                    5*(weather_data["cloud"]=="SC") + \
                                        9*(weather_data["cloud"]=="BK")+ \
                                            10*(weather_data["cloud"]=="OV")
    weather_data["lightning_prob"] = 1*(weather_data["lightning_prob"]=="L") + \
                                                    2*(weather_data["lightning_prob"]=="M") + \
                                                        3*(weather_data["lightning_prob"]=="H")
    weather_data["precip"] = (weather_data["precip"])*1
    pd.DataFrame(possible_labels).to_csv(f"{air}_possibel_config")
    needed_reports[air] = (weather_data, departures, arrivals,cur_config, possible_config, possible_labels)
    print("Preprocess stage 2: "+str(air))

opened = open_training_labels.copy()



Preprocess stage 2: katl
Preprocess stage 2: kclt
Preprocess stage 2: kden
Preprocess stage 2: kdfw
Preprocess stage 2: kjfk
Preprocess stage 2: kmem
Preprocess stage 2: kmia
Preprocess stage 2: kord
Preprocess stage 2: kphx
Preprocess stage 2: ksea


In [3]:
possible_labels

array(['ksea:D_16C_A_16C_16R', 'ksea:D_16C_A_16L_16R', 'ksea:D_16L_A_16C',
       'ksea:D_16L_A_16C_16L', 'ksea:D_16L_A_16C_16R',
       'ksea:D_16L_A_16L_16R', 'ksea:D_16L_A_16R', 'ksea:D_34C_A_34C_34L',
       'ksea:D_34R_A_34C', 'ksea:D_34R_A_34C_34R', 'ksea:D_34R_A_34L_34R',
       'ksea:other'], dtype=object)

In [22]:
training_file = pd.DataFrame({"airport": [], "temperature": [], "wind_direction": [], "wind_speed": [], "wind_gust": [], "cloud_ceiling": [], "visibility": [], \
                                "cloud": [], "lightning_prob": [], "precip": [], "depart1": [], "deaprt2": [], "depart3": [], "depart4": [], \
                                    "arrive1": [], "arrive2": [], "arrive3": [], "arrive4": [], "lookahead": [], "actual_label": []})
for i in range(max_labels):
    training_file.insert(18+i, 'cur_config_hot'+str(i), [])

training_file.to_csv("training_data.csv")
counter = 0
opened = opened[opened["active"]==1]
for i, r in opened.iterrows():

    label = r["config"]
    timestamp = r["timestamp"]
    lookahead = r["lookahead"]

    


    # Remove for "testing" code
    mask = needed_reports[r["airport"]][0]["timestamp"] <= timestamp
    masked_weather_data = needed_reports[r["airport"]][0][mask]

    # Get weather data
    latest_weather_intercept = masked_weather_data[masked_weather_data["timestamp"] == masked_weather_data["timestamp"].max()]

    get_nearest = latest_weather_intercept[latest_weather_intercept["forecast_timestamp"] == nearest(latest_weather_intercept["forecast_timestamp"], timestamp + dt.timedelta(minutes=lookahead))]
    weather_features = get_nearest.values.tolist()[0][2:]

    # Get latest take-off and landing estimates
    departure_reports = needed_reports[r["airport"]][1]
    mask = departure_reports["timestamp"] <= timestamp
    masked_departure = departure_reports[mask]
    latest_departure_report = masked_departure[masked_departure["timestamp"] == masked_departure["timestamp"].max()]
    departure_features = latest_departure_report.values.tolist()[0][int(lookahead/30-1):int(lookahead/30-1)+4]
    arrival_reports = needed_reports[r["airport"]][2]
    mask = arrival_reports["timestamp"] <= timestamp
    masked_arrival = arrival_reports[mask]
    latest_arrival_report = masked_arrival[masked_arrival["timestamp"] == masked_arrival["timestamp"].max()]
    arrival_features = latest_arrival_report.values.tolist()[0][int(lookahead/30-1):int(lookahead/30-1)+4]
    # Get current configiration
    cur_config = needed_reports[r["airport"]][3]
    mask = cur_config["timestamp"] <= timestamp
    masked_cur_config = cur_config[mask]
    latest_cur_config = masked_cur_config[masked_cur_config["timestamp"] == masked_cur_config["timestamp"].max()]
    cur_config_feature = [0]*max_labels
    cur_config_index = np.where(needed_reports[r["airport"]][4]==latest_cur_config.values.tolist()[0][1])
    cur_config_index = cur_config_index[0][0] if len(cur_config_index[0])>0 else needed_reports[r["airport"]][5].size-1
    cur_config_feature[cur_config_index] = 1

    total_features = [r["airport"]] + weather_features + departure_features + arrival_features + cur_config_feature + [r["lookahead"], np.where(needed_reports[r["airport"]][5]==r["config"])[0][0]]
    training_file.loc[len(training_file)] = (total_features)
    print(counter)
    if counter % 13 == 0:
        print(str(counter) + " out of " + str(opened.shape[0]))
        training_file.to_csv('training_data.csv', mode='a', index=True, header=False)
        training_file = pd.DataFrame({"airport": [], "temperature": [], "wind_direction": [], "wind_speed": [], "wind_gust": [], "cloud_ceiling": [], "visibility": [], \
                                "cloud": [], "lightning_prob": [], "precip": [], "depart1": [], "deaprt2": [], "depart3": [], "depart4": [], \
                                    "arrive1": [], "arrive2": [], "arrive3": [], "arrive4": [], "lookahead": [], "actual_label": []})
        for k in range(max_labels):
            training_file.insert(18+k, 'cur_config_hot'+str(k), [])
    elif counter % 20 == 0:
        break
    
    counter +=1
print(training_file)
training_file.to_csv('training_data.csv', mode='a', index=True, header=False)

0
0 out of 891360
1
2
3
4
5
6
7
8
9
10
11
12
13
13 out of 891360
14
15
16
17
18
19
20
  airport  temperature  wind_direction  wind_speed  wind_gust  cloud_ceiling  \
0    katl         63.0             6.0         9.0        0.0            8.0   
1    katl         63.0             6.0         9.0        0.0            8.0   
2    katl         63.0             6.0         9.0        0.0            8.0   
3    katl         61.0             6.0         9.0        0.0            8.0   
4    katl         61.0             6.0         9.0        0.0            8.0   
5    katl         61.0             6.0         9.0        0.0            8.0   
6    katl         60.0             6.0         9.0        0.0            8.0   

   visibility  cloud  lightning_prob  precip  ...  cur_config_hot34  \
0         7.0    1.0             0.0     0.0  ...               0.0   
1         7.0    1.0             0.0     0.0  ...               0.0   
2         7.0    1.0             0.0     0.0  ...           

In [24]:
print(pd.read_csv('training_data.csv'))

    Unnamed: 0 airport  temperature  wind_direction  wind_speed  wind_gust  \
0            0    katl         69.0             7.0         8.0        0.0   
1            0    katl         66.0             7.0         9.0        0.0   
2            1    katl         64.0             7.0         9.0        0.0   
3            2    katl         64.0             7.0         9.0        0.0   
4            3    katl         64.0             7.0         9.0        0.0   
5            4    katl         63.0             6.0        10.0        0.0   
6            5    katl         62.0             7.0         9.0        0.0   
7            6    katl         62.0             7.0         9.0        0.0   
8            7    katl         62.0             6.0        10.0        0.0   
9            8    katl         62.0             6.0        10.0        0.0   
10           9    katl         61.0             6.0        10.0        0.0   
11          10    katl         61.0             6.0        10.0 

In [15]:
needed_reports

{'katl': (                 timestamp  forecast_timestamp  temperature  wind_direction  \
  0      2020-11-01 00:30:00 2020-11-01 06:00:00         55.0             9.0   
  1      2020-11-01 00:30:00 2020-11-02 01:00:00         54.0            32.0   
  2      2020-11-01 00:30:00 2020-11-02 00:00:00         56.0            32.0   
  3      2020-11-01 00:30:00 2020-11-01 23:00:00         60.0            32.0   
  4      2020-11-01 00:30:00 2020-11-01 22:00:00         64.0            32.0   
  ...                    ...                 ...          ...             ...   
  234745 2021-10-31 23:30:00 2021-11-01 04:00:00         51.0            32.0   
  234746 2021-10-31 23:30:00 2021-11-01 00:00:00         56.0            32.0   
  234747 2021-10-31 23:30:00 2021-11-01 01:00:00         54.0            32.0   
  234748 2021-10-31 23:30:00 2021-11-01 16:00:00         61.0             6.0   
  234749 2021-10-31 23:30:00 2021-11-01 13:00:00         50.0             2.0   
  
          wind_sp