# Scope of this workbook

Here, we want to actually model the whole thing for different datasets. 

In [23]:
# Imports
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

import datetime

from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


#Settings
pd.set_option("display.max_rows", 70)
pd.set_option("display.max_columns", 101)

# full_df_unedited

In [24]:
full_df_unedited = pd.read_pickle('../data/processed/full_df_unedited.pkl')

In [25]:
full_df_unedited.head()

Unnamed: 0,dep_ap_sched,arr_ap_sched,dep_sched_date,dep_sched_time,arr_sched_date,arr_sched_time,m_offblockdt,m_onblockdt,ac_registration_x,dep_delay,Ac Type Code,trans_time,sched_trans_time,Crew Group,TLC_trans,crew_type_change,Sched Groundtime,Act Groundtime,mingt,arr_leg_outbound,catering_duration,pax_boarding_duration,block_time,flt_event_number
4,East Melissaberg,East Carmen,2019-06-01,2019-06-01 02:30:00,2019-06-01,2019-06-01 04:15:00,2019-06-01 03:02:00,2019-06-01 04:43:00,ECLGNX,32.0,DH4,0,0,Start,"['Renee Fisher_nan_nan_nan_ca', 'Rebecca Castr...",[],35.0,21.0,35.0,Keithberg,26.0,26.0,101.0,1
7,East Latashaview,East Carmen,2019-06-01,2019-06-01 03:15:00,2019-06-01,2019-06-01 07:05:00,2019-06-01 03:30:00,2019-06-01 07:07:00,ECLBIX,15.0,320,0,0,Start,"['Nicholas Evans_nan_nan_nan_ca', 'Jessica Her...",[],45.0,61.0,40.0,Juliemouth,27.0,15.0,217.0,1
9,New Jessica,East Carmen,2019-06-01,2019-06-01 03:25:00,2019-06-01,2019-06-01 06:45:00,2019-06-01 03:50:00,2019-06-01 07:01:00,ECLBAX,25.0,320,0,0,Start,"['Sean Weeks_nan_nan_nan_ca', 'Tony Lloyd_nan_...",[],95.0,94.0,45.0,South Nathaniel,27.0,25.0,191.0,1
13,East Allisontown,East Carmen,2019-06-01,2019-06-01 04:00:00,2019-06-01,2019-06-01 04:35:00,2019-06-01 04:04:00,2019-06-01 04:36:00,ECLWFX,4.0,E95,0,0,Start,"['Frederick Ramirez_nan_nan_nan_cp', 'Ariel Wi...",[],55.0,120.0,40.0,Yoderburgh,18.0,11.0,32.0,1
15,Port Courtneytown,East Carmen,2019-06-01,2019-06-01 04:00:00,2019-06-01,2019-06-01 04:35:00,2019-06-01 04:14:00,2019-06-01 04:57:00,ECLGBX,14.0,DH4,0,0,Start,"['Heather Ryan_nan_nan_nan_ca', 'Jeff Hays_nan...",[],35.0,23.0,30.0,West Ana,15.0,12.0,43.0,1


In [26]:
# For the moment, let's in this step drop all variables we will not use. ToDo: Check which step is the best step to do that
columns_to_drop = [
    'TLC_trans',
    'catering_duration',
    'dep_sched_date', # the date itself has no real information. ToDo: Maybe we should extract the day of week here
    'dep_sched_time', # the time has a value but is dropped for the moment due to formatting. 
    'arr_sched_date',
    'arr_sched_time',
    'm_offblockdt',
    'm_onblockdt'
]

full_df_unedited = full_df_unedited.drop(columns_to_drop, axis = 1)

full_df_unedited = full_df_unedited[full_df_unedited['Act Groundtime']<180]

In [27]:
# Creating dummy variables for all categorical variables

# Note: Onehotencoder is the better solution, however for simplicity let's use pandas for the moment
#full_df_unedited = OneHotEncoder().fit_transform(full_df_unedited)
#full_df_unedited

# Get object columns
full_df_unedited_objectcolumns = full_df_unedited.select_dtypes(include = 'object')
varlist = full_df_unedited_objectcolumns.columns.values.tolist()

#get dummies
full_df_unedited_encoded = pd.get_dummies(full_df_unedited, columns = varlist, drop_first = True)

In [28]:
full_df_unedited_encoded.shape

(3837, 304)

In [29]:
# We split the forecast in two different forecasts - one for ground time and one for block time

X_train_block, X_test_block, y_train_block, y_test_block = train_test_split(full_df_unedited_encoded.drop(['block_time'], axis = 1), full_df_unedited_encoded['block_time'], test_size=0.33, random_state=42)
X_train_ground, X_test_ground, y_train_ground, y_test_ground = train_test_split(full_df_unedited_encoded.drop(['Act Groundtime'], axis = 1), full_df_unedited_encoded['Act Groundtime'], test_size=0.33, random_state=42)

In [30]:
# Gradientboosting as base classifier

# Blocktime
base_model_block = GradientBoostingRegressor()

base_model_block.fit(X_train_block, y_train_block)
base_model_block.predict(X_test_block)


# Groundtime

base_model_ground = GradientBoostingRegressor()

base_model_ground.fit(X_train_ground, y_train_ground)
base_model_ground.predict(X_test_ground)


array([  14.98141688,   90.31005136,   61.83242638, ...,   62.03673736,
       -208.98147572,   78.65529136])

In [31]:
# Blocktime
r2_block = base_model_block.score(X_test_block, y_test_block)
rmse_block = np.sqrt(mean_squared_error(y_test_block, base_model_block.predict(X_test_block)))
print("The r^2 for Block time is " + str(round(r2_block,4)))
print("The RMSE for Block time is " + str(round(rmse_block,2)) + " minutes.")
print("\n")

# Groundtime
r2_ground = base_model_ground.score(X_test_ground, y_test_ground)
rmse_ground = np.sqrt(mean_squared_error(y_test_ground, base_model_ground.predict(X_test_ground)))
print("The r^2 for Ground time is " + str(round(r2_ground,4)))
print("The RMSE for Ground time is " + str(round(rmse_ground,2)) + " minutes.")

The r^2 for Block time is 0.7943
The RMSE for Block time is 18.25 minutes.


The r^2 for Ground time is 0.8256
The RMSE for Ground time is 23.76 minutes.


# full_df_unedited with delay prediction

In [32]:
full_df = pd.read_pickle('../data/processed/full_df_unedited_wdelay.pkl')
full_df.head()

Unnamed: 0,dep_ap_sched,arr_ap_sched,dep_sched_date,dep_sched_time,arr_sched_date,arr_sched_time,m_offblockdt,m_onblockdt,ac_registration_x,dep_delay,Ac Type Code,trans_time,sched_trans_time,Crew Group,TLC_trans,crew_type_change,Sched Groundtime,Act Groundtime,mingt,arr_leg_outbound,catering_duration,pax_boarding_duration,block_time,flt_event_number,block_delay,ground_delay
4,East Melissaberg,East Carmen,2019-06-01,2019-06-01 02:30:00,2019-06-01,2019-06-01 04:15:00,2019-06-01 03:02:00,2019-06-01 04:43:00,ECLGNX,32.0,DH4,0,0,Start,"['Renee Fisher_nan_nan_nan_ca', 'Rebecca Castr...",[],35.0,21.0,35.0,Keithberg,26.0,26.0,101.0,1,-4.0,32.0
7,East Latashaview,East Carmen,2019-06-01,2019-06-01 03:15:00,2019-06-01,2019-06-01 07:05:00,2019-06-01 03:30:00,2019-06-01 07:07:00,ECLBIX,15.0,320,0,0,Start,"['Nicholas Evans_nan_nan_nan_ca', 'Jessica Her...",[],45.0,61.0,40.0,Juliemouth,27.0,15.0,217.0,1,-13.0,15.0
9,New Jessica,East Carmen,2019-06-01,2019-06-01 03:25:00,2019-06-01,2019-06-01 06:45:00,2019-06-01 03:50:00,2019-06-01 07:01:00,ECLBAX,25.0,320,0,0,Start,"['Sean Weeks_nan_nan_nan_ca', 'Tony Lloyd_nan_...",[],95.0,94.0,45.0,South Nathaniel,27.0,25.0,191.0,1,-9.0,25.0
13,East Allisontown,East Carmen,2019-06-01,2019-06-01 04:00:00,2019-06-01,2019-06-01 04:35:00,2019-06-01 04:04:00,2019-06-01 04:36:00,ECLWFX,4.0,E95,0,0,Start,"['Frederick Ramirez_nan_nan_nan_cp', 'Ariel Wi...",[],55.0,120.0,40.0,Yoderburgh,18.0,11.0,32.0,1,-3.0,4.0
15,Port Courtneytown,East Carmen,2019-06-01,2019-06-01 04:00:00,2019-06-01,2019-06-01 04:35:00,2019-06-01 04:14:00,2019-06-01 04:57:00,ECLGBX,14.0,DH4,0,0,Start,"['Heather Ryan_nan_nan_nan_ca', 'Jeff Hays_nan...",[],35.0,23.0,30.0,West Ana,15.0,12.0,43.0,1,8.0,14.0


We repeat the basic experiment, but this time we do delay prediction instead of ground/block time prediction

In [33]:
# For the moment, let's in this step drop all variables we will not use. ToDo: Check which step is the best step to do that
columns_to_drop_delaypred = [
    'TLC_trans',
    'catering_duration',
    'dep_sched_date', # the date itself has no real information. ToDo: Maybe we should extract the day of week here
    'dep_sched_time', # the time has a value but is dropped for the moment due to formatting. 
    'arr_sched_date',
    'arr_sched_time',
    'm_offblockdt',
    'm_onblockdt',
    # to ensure proper comparison in between ground/block time and ground/block delay prediction, we drop the targets of ground/block time prediction. 
    'Act Groundtime',
    #'block_time'
]

full_df = full_df[full_df['Act Groundtime']<180]


full_df = full_df.drop(columns_to_drop_delaypred, axis = 1)

In [34]:
# Creating dummy variables for all categorical variables
# Note: Onehotencoder is the better solution, however for simplicity let's use pandas for the moment

# Get object columns
full_df_objectcolumns = full_df.select_dtypes(include = 'object')
varlist = full_df_objectcolumns.columns.values.tolist()

# get dummies
full_df_encoded = pd.get_dummies(full_df, columns = varlist, drop_first = True)

In [35]:
# We split the forecast in two different forecasts - one for ground delay and one for block delay

X_train_blockdelay, X_test_blockdelay, y_train_blockdelay, y_test_blockdelay = train_test_split(
    full_df_encoded.drop(['block_delay'], axis = 1), full_df_encoded['block_delay'], test_size=0.33, random_state=42)
    
X_train_grounddelay, X_test_grounddelay, y_train_grounddelay, y_test_grounddelay = train_test_split(
    full_df_encoded.drop(['ground_delay'], axis = 1), full_df_encoded['ground_delay'], test_size=0.33, random_state=42)


In [36]:
# Gradientboosting as base classifier

# Blocktime
base_model_blockdelay = GradientBoostingRegressor()

base_model_blockdelay.fit(X_train_blockdelay, y_train_blockdelay)
base_model_blockdelay.predict(X_test_blockdelay)


# Groundtime

base_model_grounddelay = GradientBoostingRegressor()

base_model_grounddelay.fit(X_train_grounddelay, y_train_grounddelay)
base_model_grounddelay.predict(X_test_grounddelay)


array([1.15792438e+02, 1.44612806e-02, 6.59940396e+01, ...,
       3.39997679e+01, 1.99985403e+01, 1.09975256e+01])

In [37]:
# Blocktime
r2_blockdelay = base_model_blockdelay.score(X_test_blockdelay, y_test_blockdelay)
rmse_blockdelay = np.sqrt(mean_squared_error(y_test_blockdelay, base_model_blockdelay.predict(X_test_blockdelay)))
print("The r^2 for Block delay is " + str(round(r2_blockdelay,4)))
print("The RMSE for Block delay is " + str(round(rmse_blockdelay,2)) + " minutes.")
print("\n")

# Groundtime
r2_grounddelay = base_model_grounddelay.score(X_test_grounddelay, y_test_grounddelay)
rmse_grounddelay = np.sqrt(mean_squared_error(y_test_grounddelay, base_model_grounddelay.predict(X_test_grounddelay)))
print("The r^2 for Ground delay is " + str(round(r2_grounddelay,4)))
print("The RMSE for Ground delay is " + str(round(rmse_grounddelay,2)) + " minutes.")

The r^2 for Block delay is 0.3359
The RMSE for Block delay is 5.52 minutes.


The r^2 for Ground delay is 0.9343
The RMSE for Ground delay is 7.79 minutes.


# Processed Dataset

In [56]:
full_df = pd.read_pickle('../data/finalized/full_df.pkl')

#columns_to_drop_full_df = [
#    #'TLC_trans',
#    'catering_duration',
#    'dep_sched_date', # the date itself has no real information. ToDo: Maybe we should extract the day of week here
#    'dep_sched_time', # the time has a value but is dropped for the moment due to formatting. 
#    'arr_sched_date',
#    'arr_sched_time',
#    'm_offblockdt',
#    'm_onblockdt',
#    # to ensure proper comparison in between ground/block time and ground/block delay prediction, we drop the targets of ground/block time prediction. 
#    'Act Groundtime',
#    #'block_time'
#]

#full_df = full_df.drop(columns_to_drop_full_df, axis = 1)
#full_df = full_df.select_dtypes(exclude=['datetime64'])


#*******************************temp************************************
#test ob es was bringt, de facto keine ground columns mehr zu haben
full_df = full_df.drop(['catering_duration','arr_leg_outbound','mingt','sched_turnaround'], axis = 1) # maybe improve and impute sched_turnaround
#***********************************************************************

full_df['Route'] = full_df['dep_ap_sched'] + "-" + full_df['arr_ap_sched']
full_df = full_df.drop(['dep_ap_sched','arr_ap_sched'], axis = 1)

full_df = full_df.dropna(how = 'any')
full_df

Unnamed: 0,ac_registration_x,Ac Type Code,trans_time,sched_trans_time,Sched Groundtime,Act Groundtime,block_time,flt_event_number,block_delay,ground_delay,rows_to_drop_grounddelay,Route
0,ECLBAX,320,0,0,95.0,94.0,191.0,1,-9.0,25.0,0,New Jessica-East Carmen
1,ECLBAX,320,60,60,75.0,120.0,126.0,2,-9.0,15.0,0,East Carmen-South Nathaniel
2,ECLBAX,320,68,75,80.0,86.0,131.0,3,-14.0,51.0,0,South Nathaniel-East Carmen
3,ECLBAX,320,64,70,50.0,32.0,74.0,4,-11.0,43.0,0,East Carmen-Joneshaven
5,ECLBAX,320,73,45,45.0,38.0,82.0,1,2.0,20.0,0,East Carmen-Joneshaven
...,...,...,...,...,...,...,...,...,...,...,...,...
12179,ECLXEX,320,0,0,45.0,39.0,84.0,2,-11.0,21.0,0,East Carmen-Lisamouth
12180,ECLXEX,320,37,45,50.0,85.0,91.0,3,-9.0,4.0,0,Lisamouth-East Carmen
12181,ECLXEX,320,0,0,45.0,54.0,66.0,4,-9.0,30.0,0,East Carmen-West Jason
12182,ECLXEX,320,41,45,45.0,57.0,70.0,5,-10.0,30.0,0,West Jason-East Carmen


## Train/Test Split unencoded

In [57]:
## We split the forecast in two different forecasts - one for ground delay and one for block delay#
#
#X_train_blockdelay, X_test_blockdelay, y_train_blockdelay, y_test_blockdelay = train_test_split(
#    full_df.drop(['block_delay'], axis = 1), full_df['block_delay'], test_size=0.33, random_state=42)
#
## Filtering out rows which are skewing ground delay prediction
#full_df_grounddelay = full_df[full_df['rows_to_drop_grounddelay']<1]
#
#X_train_grounddelay, X_test_grounddelay, y_train_grounddelay, y_test_grounddelay = train_test_split(
#    full_df_grounddelay.drop(['ground_delay'], axis = 1), full_df_grounddelay['ground_delay'], test_size=0.33, random_state=42)

## Train/Test Split encoded

In [58]:
# Creating dummy variables for all categorical variables
# Note: Onehotencoder is the better solution, however for simplicity let's use pandas for the moment

# Get object columns
full_df_objectcolumns = full_df.select_dtypes(include = 'object')
varlist = full_df_objectcolumns.columns.values.tolist()

# get dummies
full_df_encoded = pd.get_dummies(full_df, columns = varlist, drop_first = True)

In [59]:
# We split the forecast in two different forecasts - one for ground delay and one for block delay

# dropping the ground delay filter row
full_df_encoded_blockdelay = full_df_encoded.drop(['rows_to_drop_grounddelay'], axis = 1)
X_train_blockdelay_encoded, X_test_blockdelay_encoded, y_train_blockdelay_encoded, y_test_blockdelay_encoded = train_test_split(
    full_df_encoded_blockdelay.drop(['block_delay'], axis = 1), full_df_encoded_blockdelay['block_delay'], test_size=0.33, random_state=42)

# Filtering out rows which are skewing ground delay prediction
full_df_encoded_grounddelay = full_df_encoded[full_df_encoded['rows_to_drop_grounddelay']<1]

X_train_grounddelay_encoded, X_test_grounddelay_encoded, y_train_grounddelay_encoded, y_test_grounddelay_encoded = train_test_split(
    full_df_encoded_grounddelay.drop(['ground_delay'], axis = 1), full_df_encoded_grounddelay['ground_delay'], test_size=0.33, random_state=42)


## Linear Regression


In [60]:
# Blocktime
lr_block = LinearRegression()

lr_block.fit(X_train_blockdelay_encoded, y_train_blockdelay_encoded)
lr_block.predict(X_test_blockdelay_encoded)

# Groundtime
lr_ground = LinearRegression()

lr_ground.fit(X_train_grounddelay_encoded, y_train_grounddelay_encoded)
lr_ground.predict(X_test_grounddelay_encoded)

array([22.57450771,  5.45970654, 27.53950381, ..., 50.48270297,
       61.97063732, 59.24753523])

In [61]:
# Blocktime
r2_blockdelay = lr_block.score(X_test_blockdelay_encoded, y_test_blockdelay_encoded)
rmse_blockdelay = np.sqrt(mean_squared_error(y_test_blockdelay_encoded, lr_block.predict(X_test_blockdelay_encoded)))
print("The r^2 for Block delay is " + str(round(r2_blockdelay,4)))
print("The RMSE for Block delay is " + str(round(rmse_blockdelay,2)) + " minutes.")
print("\n")

# Groundtime
r2_grounddelay = lr_ground.score(X_test_grounddelay_encoded, y_test_grounddelay_encoded)
rmse_grounddelay = np.sqrt(mean_squared_error(y_test_grounddelay_encoded, lr_ground.predict(X_test_grounddelay_encoded)))
print("The r^2 for Ground delay is " + str(round(r2_grounddelay,4)))
print("The RMSE for Ground delay is " + str(round(rmse_grounddelay,2)) + " minutes.")

The r^2 for Block delay is -774717649220668.2
The RMSE for Block delay is 204409879.88 minutes.


The r^2 for Ground delay is -10873046045002.713
The RMSE for Ground delay is 76104863.49 minutes.


## Setup of Modelling

In [62]:
# Imports 
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV


In [63]:
# Initialize estimators

#reg1 = LinearRegression()
reg2 = Lasso()
#reg3 = Ridge()
reg4 = GradientBoostingRegressor()
#reg5 = SVR()

# Initialize hyperparameters for each dictionary
#param1 = {}

param2 = {}
param2['regressor__alpha'] = [x for x in np.linspace(0.0000001,2,num = 10)]
param2['regressor'] = [reg2]

param4 = {}
param4['regressor__n_estimators'] = [300] #[10,50,100,150,250]#[10,50,100,150,200,250]
param4['regressor__max_depth'] = [15,18]#,22] #[2,3,6,8,10,12,18] #[2,3,4,6,8,10,12,15,18]
param4['regressor__max_leaf_nodes'] = [16,18] #[6,8,12,None] #[6,8,12,15,19,25,None]
param4['regressor'] = [reg4]


# Create Pipeline
pipeline = Pipeline([('regressor', reg2)])
params = [param2, param4]

# Train grid search model
gs_block = GridSearchCV(pipeline, params, cv=3, n_jobs=-1, scoring='neg_root_mean_squared_error').fit(X_train_blockdelay_encoded,y_train_blockdelay_encoded)
gs_ground = GridSearchCV(pipeline, params, cv=3, n_jobs=-1, scoring='neg_root_mean_squared_error').fit(X_train_grounddelay_encoded,y_train_grounddelay_encoded)
print(gs_block.best_estimator_, gs_block.best_params_)
print(gs_ground.best_estimator_, gs_ground.best_params_)

#Evaluation
# Blocktime
r2_blockdelay = gs_block.best_estimator_.score(X_test_blockdelay_encoded, y_test_blockdelay_encoded)
rmse_blockdelay = np.sqrt(mean_squared_error(y_test_blockdelay_encoded, gs_block.best_estimator_.predict(X_test_blockdelay_encoded)))
print("The r^2 for Block delay is " + str(round(r2_blockdelay,4)))
print("The RMSE for Block delay is " + str(round(rmse_blockdelay,2)) + " minutes.")
print("\n")

# Groundtime
r2_grounddelay = gs_ground.best_estimator_.score(X_test_grounddelay_encoded, y_test_grounddelay_encoded)
rmse_grounddelay = np.sqrt(mean_squared_error(y_test_grounddelay_encoded, gs_ground.best_estimator_.predict(X_test_grounddelay_encoded)))
print("The r^2 for Ground delay is " + str(round(r2_grounddelay,4)))
print("The RMSE for Ground delay is " + str(round(rmse_grounddelay,2)) + " minutes.")



Pipeline(steps=[('regressor',
                 GradientBoostingRegressor(max_depth=18, max_leaf_nodes=18,
                                           n_estimators=300))]) {'regressor': GradientBoostingRegressor(max_depth=15, max_leaf_nodes=18, n_estimators=300), 'regressor__max_depth': 18, 'regressor__max_leaf_nodes': 18, 'regressor__n_estimators': 300}
Pipeline(steps=[('regressor',
                 GradientBoostingRegressor(max_depth=15, max_leaf_nodes=18,
                                           n_estimators=300))]) {'regressor': GradientBoostingRegressor(max_depth=15, max_leaf_nodes=18, n_estimators=300), 'regressor__max_depth': 15, 'regressor__max_leaf_nodes': 18, 'regressor__n_estimators': 300}
The r^2 for Block delay is 0.6609
The RMSE for Block delay is 4.28 minutes.


The r^2 for Ground delay is 0.6851
The RMSE for Ground delay is 12.95 minutes.


In [46]:
# filtering out all columns does not help apparently. 

## Gradient Boosted Tree

In [47]:
# Gradientboosting as base classifier

# Blocktime
base_model_blockdelay = GradientBoostingRegressor()

base_model_blockdelay.fit(X_train_blockdelay_encoded, y_train_blockdelay_encoded)
base_model_blockdelay.predict(X_test_blockdelay_encoded)


# Groundtime

base_model_grounddelay = GradientBoostingRegressor()

base_model_grounddelay.fit(X_train_grounddelay_encoded, y_train_grounddelay_encoded)
base_model_grounddelay.predict(X_test_grounddelay_encoded)


array([12.1289374 , 40.48937364,  9.6042367 , ...,  4.33293307,
        8.50829022, 17.45423131])

In [None]:
# Blocktime
r2_blockdelay = base_model_blockdelay.score(X_test_blockdelay_encoded, y_test_blockdelay_encoded)
rmse_blockdelay = np.sqrt(mean_squared_error(y_test_blockdelay_encoded, base_model_blockdelay.predict(X_test_blockdelay_encoded)))
print("The r^2 for Block delay is " + str(round(r2_blockdelay,4)))
print("The RMSE for Block delay is " + str(round(rmse_blockdelay,2)) + " minutes.")
print("\n")

# Groundtime
r2_grounddelay = base_model_grounddelay.score(X_test_grounddelay_encoded, y_test_grounddelay_encoded)
rmse_grounddelay = np.sqrt(mean_squared_error(y_test_grounddelay_encoded, base_model_grounddelay.predict(X_test_grounddelay_encoded)))
print("The r^2 for Ground delay is " + str(round(r2_grounddelay,4)))
print("The RMSE for Ground delay is " + str(round(rmse_grounddelay,2)) + " minutes.")

The r^2 for Block delay is 0.4503
The RMSE for Block delay is 5.43 minutes.


The r^2 for Ground delay is 0.6768
The RMSE for Ground delay is 13.98 minutes.
