# Setting up a baseline model for benchmarking
The general architecture of the model will be using two indermediate models to predict the onblock and offblock delays which will be ultimately added to the scheduled times: next_landing_time = last_landing_time + onblock_sched + onblock_delay + offblock_sched + offblock_delay

Two simple baseline models will be established to predict the consecuetivee landing times of a given connection chain:
1. Predicting delays as means of delays
1. Random forests regression

In [28]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.dummy import DummyRegressor
from sklearn.ensemble import RandomForestRegressor


In [15]:
# Adjust settings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
%matplotlib inline

# to make this notebook's output stable across runs
np.random.seed(42)

In [16]:
# Load preprocessed dataset

data_path = '../data/interim/'
df = pd.read_pickle(os.path.join(data_path, 'cleaned.pkl'))

In [17]:
# Drop columns which will not be used --> Is this the best place to do so?
df = df.drop(['flt_leg', 'flt_TLC_trans', 'flt_crewt_change', 'flt_offblock', 'flt_onblock', 'flt_sched_dep', 'flt_sched_arr'], axis=1)

In [18]:
# One-hot encode all catgorical variables
df_one_hot = pd.get_dummies(df, drop_first=True)
df_one_hot.dropna(axis=0, how='any', inplace=True)

In [19]:
df_one_hot.head()

Unnamed: 0,flt_dep_delay,flt_tt,flt_sched_tt,flt_act_gt,gnd_sched_tat,block_delay,routing,sched_gt,act_gt,flt_dep_airpt_Barnesmouth,flt_dep_airpt_Castroville,flt_dep_airpt_Christophershire,flt_dep_airpt_Coleland,flt_dep_airpt_Collinsshire,flt_dep_airpt_Coryborough,flt_dep_airpt_Davidtown,flt_dep_airpt_Dawsonhaven,flt_dep_airpt_Desireeton,flt_dep_airpt_Dudleyton,flt_dep_airpt_East Allisontown,flt_dep_airpt_East Carmen,flt_dep_airpt_East Dakota,flt_dep_airpt_East Elizabethside,flt_dep_airpt_East Holly,flt_dep_airpt_East Jasonmouth,flt_dep_airpt_East Latashaview,flt_dep_airpt_East Lisafurt,flt_dep_airpt_East Melindachester,flt_dep_airpt_East Melissaberg,flt_dep_airpt_East Michael,flt_dep_airpt_East Toddchester,flt_dep_airpt_Ellisside,flt_dep_airpt_Erichaven,flt_dep_airpt_Fisherhaven,flt_dep_airpt_Garrettland,flt_dep_airpt_Gilbertland,flt_dep_airpt_Haynesside,flt_dep_airpt_Jamesview,flt_dep_airpt_Jenkinsside,flt_dep_airpt_Johnathanberg,flt_dep_airpt_Joneshaven,flt_dep_airpt_Juliemouth,flt_dep_airpt_Kaylashire,flt_dep_airpt_Keithberg,flt_dep_airpt_Kennethfort,flt_dep_airpt_Kristashire,flt_dep_airpt_Lake Danielburgh,flt_dep_airpt_Lake David,flt_dep_airpt_Lake Gerald,flt_dep_airpt_Lake Gregory,flt_dep_airpt_Lake Joelbury,flt_dep_airpt_Lake Kevin,flt_dep_airpt_Lake Lawrencechester,flt_dep_airpt_Lake Richardberg,flt_dep_airpt_Lake Sarahview,flt_dep_airpt_Lake Williammouth,flt_dep_airpt_Lambertport,flt_dep_airpt_Lisamouth,flt_dep_airpt_Lloydtown,flt_dep_airpt_Marioborough,flt_dep_airpt_Maryland,flt_dep_airpt_Matthewland,flt_dep_airpt_Mezaport,flt_dep_airpt_Michaelfurt,flt_dep_airpt_Millerborough,flt_dep_airpt_Mooretown,flt_dep_airpt_Navarroville,flt_dep_airpt_New Amy,flt_dep_airpt_New Bobby,flt_dep_airpt_New Harrytown,flt_dep_airpt_New Jessica,flt_dep_airpt_New Johnhaven,flt_dep_airpt_New Stacyburgh,flt_dep_airpt_North Aaron,flt_dep_airpt_North Amanda,flt_dep_airpt_North Anamouth,flt_dep_airpt_North Anna,flt_dep_airpt_North Christopherburgh,flt_dep_airpt_North Davidfurt,flt_dep_airpt_North Frankshire,flt_dep_airpt_North Gregory,flt_dep_airpt_North Jeffrey,flt_dep_airpt_North Jeremy,flt_dep_airpt_North Richard,flt_dep_airpt_North Stephenville,flt_dep_airpt_Patriciaborough,flt_dep_airpt_Paulbury,flt_dep_airpt_Philliphaven,flt_dep_airpt_Port Amberfort,flt_dep_airpt_Port Bobby,flt_dep_airpt_Port Courtneytown,flt_dep_airpt_Port Julieview,flt_dep_airpt_Port Maryside,flt_dep_airpt_Port Roberthaven,flt_dep_airpt_Port Ryan,flt_dep_airpt_Racheltown,flt_dep_airpt_Randyview,flt_dep_airpt_Reneemouth,flt_dep_airpt_Richardsonburgh,flt_dep_airpt_Rowefurt,flt_dep_airpt_Russellchester,flt_dep_airpt_Ryanland,flt_dep_airpt_South Cory,flt_dep_airpt_South Haileyberg,flt_dep_airpt_South Loriview,flt_dep_airpt_South Nathaniel,flt_dep_airpt_South Victoria,flt_dep_airpt_Stephanieland,flt_dep_airpt_Stephenstad,flt_dep_airpt_Susanmouth,flt_dep_airpt_Vaughnberg,flt_dep_airpt_Weeksbury,flt_dep_airpt_West Ana,flt_dep_airpt_West Bryanstad,flt_dep_airpt_West Danielport,flt_dep_airpt_West Jason,flt_dep_airpt_West Jennifer,flt_dep_airpt_West Ruth,flt_dep_airpt_Whiteland,flt_dep_airpt_Yoderburgh,flt_dep_airpt_Youngland,flt_arr_airpt_Barnesmouth,flt_arr_airpt_Castroville,flt_arr_airpt_Christophershire,flt_arr_airpt_Coleland,flt_arr_airpt_Collinsshire,flt_arr_airpt_Coryborough,flt_arr_airpt_Davidtown,flt_arr_airpt_Dawsonhaven,flt_arr_airpt_Desireeton,flt_arr_airpt_Dudleyton,flt_arr_airpt_East Allisontown,flt_arr_airpt_East Carmen,flt_arr_airpt_East Dakota,flt_arr_airpt_East Elizabethside,flt_arr_airpt_East Holly,flt_arr_airpt_East Jasonmouth,flt_arr_airpt_East Latashaview,flt_arr_airpt_East Lisafurt,flt_arr_airpt_East Melindachester,flt_arr_airpt_East Melissaberg,flt_arr_airpt_East Michael,flt_arr_airpt_East Toddchester,flt_arr_airpt_Ellisside,flt_arr_airpt_Erichaven,flt_arr_airpt_Fisherhaven,flt_arr_airpt_Garrettland,flt_arr_airpt_Gilbertland,flt_arr_airpt_Haynesside,flt_arr_airpt_Jamesview,flt_arr_airpt_Jenkinsside,flt_arr_airpt_Johnathanberg,flt_arr_airpt_Joneshaven,flt_arr_airpt_Juliemouth,flt_arr_airpt_Kaylashire,flt_arr_airpt_Keithberg,flt_arr_airpt_Kennethfort,flt_arr_airpt_Kristashire,flt_arr_airpt_Lake Danielburgh,flt_arr_airpt_Lake David,flt_arr_airpt_Lake Gerald,flt_arr_airpt_Lake Gregory,flt_arr_airpt_Lake Kevin,flt_arr_airpt_Lake Lawrencechester,flt_arr_airpt_Lake Richardberg,flt_arr_airpt_Lake Sarahview,flt_arr_airpt_Lake Williammouth,flt_arr_airpt_Lambertport,flt_arr_airpt_Lisamouth,flt_arr_airpt_Lloydtown,flt_arr_airpt_Marioborough,flt_arr_airpt_Maryland,flt_arr_airpt_Matthewland,flt_arr_airpt_Mezaport,flt_arr_airpt_Michaelfurt,flt_arr_airpt_Millerborough,flt_arr_airpt_Mooretown,flt_arr_airpt_Navarroville,flt_arr_airpt_New Amy,flt_arr_airpt_New Bobby,flt_arr_airpt_New Harrytown,flt_arr_airpt_New Jessica,flt_arr_airpt_New Johnhaven,flt_arr_airpt_New Stacyburgh,flt_arr_airpt_North Amanda,flt_arr_airpt_North Anamouth,flt_arr_airpt_North Anna,flt_arr_airpt_North Christopherburgh,flt_arr_airpt_North Davidfurt,flt_arr_airpt_North Frankshire,flt_arr_airpt_North Gregory,flt_arr_airpt_North Jeffrey,flt_arr_airpt_North Jeremy,flt_arr_airpt_North Richard,flt_arr_airpt_North Stephenville,flt_arr_airpt_Patriciaborough,flt_arr_airpt_Paulbury,flt_arr_airpt_Philliphaven,flt_arr_airpt_Port Amberfort,flt_arr_airpt_Port Bobby,flt_arr_airpt_Port Courtneytown,flt_arr_airpt_Port Julieview,flt_arr_airpt_Port Maryside,flt_arr_airpt_Port Roberthaven,flt_arr_airpt_Port Ryan,flt_arr_airpt_Racheltown,flt_arr_airpt_Randyview,flt_arr_airpt_Reneemouth,flt_arr_airpt_Richardsonburgh,flt_arr_airpt_Rowefurt,flt_arr_airpt_Russellchester,flt_arr_airpt_Ryanland,flt_arr_airpt_South Cory,flt_arr_airpt_South Haileyberg,flt_arr_airpt_South Loriview,flt_arr_airpt_South Nathaniel,flt_arr_airpt_South Victoria,flt_arr_airpt_Stephanieland,flt_arr_airpt_Stephenstad,flt_arr_airpt_Susanmouth,flt_arr_airpt_Vaughnberg,flt_arr_airpt_Weeksbury,flt_arr_airpt_West Ana,flt_arr_airpt_West Bryanstad,flt_arr_airpt_West Danielport,flt_arr_airpt_West Jason,flt_arr_airpt_West Jennifer,flt_arr_airpt_West Ruth,flt_arr_airpt_Whiteland,flt_arr_airpt_Yoderburgh,flt_arr_airpt_Youngland,flt_ac_reg_ECLBBX,flt_ac_reg_ECLBCX,flt_ac_reg_ECLBDX,flt_ac_reg_ECLBEX,flt_ac_reg_ECLBFX,flt_ac_reg_ECLBIX,flt_ac_reg_ECLBJX,flt_ac_reg_ECLBKX,flt_ac_reg_ECLBLX,flt_ac_reg_ECLBMX,flt_ac_reg_ECLBNX,flt_ac_reg_ECLBOX,flt_ac_reg_ECLBPX,flt_ac_reg_ECLBQX,flt_ac_reg_ECLBRX,flt_ac_reg_ECLBSX,flt_ac_reg_ECLBTX,flt_ac_reg_ECLBUX,flt_ac_reg_ECLBVX,flt_ac_reg_ECLBWX,flt_ac_reg_ECLBXX,flt_ac_reg_ECLBYX,flt_ac_reg_ECLBZX,flt_ac_reg_ECLDAX,flt_ac_reg_ECLDBX,flt_ac_reg_ECLDCX,flt_ac_reg_ECLDDX,flt_ac_reg_ECLDEX,flt_ac_reg_ECLDFX,flt_ac_reg_ECLDGX,flt_ac_reg_ECLGAX,flt_ac_reg_ECLGBX,flt_ac_reg_ECLGCX,flt_ac_reg_ECLGEX,flt_ac_reg_ECLGFX,flt_ac_reg_ECLGGX,flt_ac_reg_ECLGHX,flt_ac_reg_ECLGIX,flt_ac_reg_ECLGJX,flt_ac_reg_ECLGKX,flt_ac_reg_ECLGLX,flt_ac_reg_ECLGMX,flt_ac_reg_ECLGNX,flt_ac_reg_ECLGOX,flt_ac_reg_ECLGPX,flt_ac_reg_ECLGQX,flt_ac_reg_ECLGRX,flt_ac_reg_ECLWAX,flt_ac_reg_ECLWBX,flt_ac_reg_ECLWCX,flt_ac_reg_ECLWDX,flt_ac_reg_ECLWEX,flt_ac_reg_ECLWFX,flt_ac_reg_ECLWGX,flt_ac_reg_ECLWHX,flt_ac_reg_ECLWIX,flt_ac_reg_ECLWJX,flt_ac_reg_ECLWKX,flt_ac_reg_ECLWLX,flt_ac_reg_ECLWMX,flt_ac_reg_ECLWNX,flt_ac_reg_ECLWOX,flt_ac_reg_ECLWPX,flt_ac_reg_ECLWQX,flt_ac_reg_ECLXAX,flt_ac_reg_ECLXBX,flt_ac_reg_ECLXCX,flt_ac_reg_ECLXDX,flt_ac_reg_ECLXEX,flt_change_code_other problem,flt_change_code_rotational problem,flt_change_code_technical problem,flt_ac_type_DH4,flt_ac_type_E95,flt_crewg_B,flt_crewg_B2,flt_crewg_C,flt_crewg_Start,sched_dep_d_2019-06-02,sched_dep_d_2019-06-03,sched_dep_d_2019-06-04,sched_dep_d_2019-06-05,sched_dep_d_2019-06-06,sched_dep_d_2019-06-07,sched_dep_d_2019-06-08,sched_dep_d_2019-06-09,sched_dep_d_2019-06-10,sched_dep_d_2019-06-11,sched_dep_d_2019-06-12,sched_dep_d_2019-06-13,sched_dep_d_2019-06-14,sched_dep_d_2019-06-15,sched_dep_d_2019-06-16,sched_dep_d_2019-06-17,sched_dep_d_2019-06-18,sched_dep_d_2019-06-19,sched_dep_d_2019-06-20,sched_dep_d_2019-06-21,sched_dep_d_2019-06-22,sched_dep_d_2019-06-23,sched_dep_d_2019-06-24,sched_dep_d_2019-06-25,sched_dep_d_2019-06-26,sched_dep_d_2019-06-27,sched_dep_d_2019-06-28,sched_dep_d_2019-06-29,sched_dep_d_2019-06-30
1,15.0,60.0,60.0,120.0,95.0,6.0,True,75.0,120.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,43.0,64.0,70.0,32.0,80.0,32.0,True,50.0,32.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,23.0,0.0,0.0,94.0,50.0,8.0,True,45.0,94.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,51.0,0.0,0.0,5.0,45.0,49.0,True,50.0,5.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13,23.0,90.0,95.0,75.0,95.0,22.0,True,60.0,75.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [20]:
# Generate train/test splits for both intermediate models

X_train_offblock, X_test_offblock, y_train_offblock, y_test_offblock = train_test_split(df_one_hot.drop(['block_delay'], axis=1), df_one_hot['block_delay'], test_size=0.33, random_state=42)
X_train_onblock, X_test_onblock, y_train_onblock, y_test_onblock = train_test_split(df_one_hot.drop(['flt_dep_delay'], axis=1), df_one_hot['flt_dep_delay'], test_size=0.33, random_state=42)

In [21]:
# Implement first baseline model as mean of delays
dummy_regr_off = DummyRegressor(strategy='mean')
dummy_regr_off.fit(X_train_offblock, y_train_offblock)
dummy_regr_off.predict(X_test_offblock)

dummy_regr_on = DummyRegressor(strategy='mean')
dummy_regr_on.fit(X_train_onblock, y_train_onblock)
dummy_regr_on.predict(X_test_onblock)

array([38.90579369, 38.90579369, 38.90579369, ..., 38.90579369,
       38.90579369, 38.90579369])

In [22]:
r2_offblock = dummy_regr_off.score(X_test_offblock, y_test_offblock)
rmse_offblock = np.sqrt(mean_squared_error(y_test_offblock, dummy_regr_off.predict(X_test_offblock)))
print('The r^2 for offblock time is ' + str(round(r2_offblock, 4)))
print('The RMSE for offblock time is ' + str(round(rmse_offblock, 2)) + 'minutes.')
print('\n')

r2_onblock = dummy_regr_on.score(X_test_onblock, y_test_onblock)
rmse_onblock = np.sqrt(mean_squared_error(y_test_onblock, dummy_regr_on.predict(X_test_onblock)))
print('The r^2 for onblock time is ' + str(round(r2_onblock, 4)))
print('The RMSE for onblock time is ' + str(round(rmse_onblock, 2)) + 'minutes.')
print('\n')

The r^2 for offblock time is -0.0014
The RMSE for offblock time is 25.36minutes.


The r^2 for onblock time is -0.0003
The RMSE for onblock time is 24.12minutes.




In [25]:
# Implement second baseline model as random forest regression
rand_for_off = RandomForestRegressor(n_estimators = 10, random_state=42)
rand_for_off.fit(X_train_offblock, y_train_offblock)
Y_predict_off = rand_for_off.predict(X_test_offblock)

rand_for_on = RandomForestRegressor(n_estimators = 10, random_state=42)
rand_for_on.fit(X_train_onblock, y_train_onblock)
rand_for_on.predict(X_test_onblock)

array([17.4, 15.7, 36.2, ..., 47.9, 16.7, 19.8])

In [29]:
r2_offblock = r2_score(y_test_offblock, Y_predict_off)
rmse_offblock = mean_squared_error(y_test_offblock, Y_predict_off, squared=False)
print('The r^2 for offblock time is ' + str(round(r2_offblock, 4)))
print('The RMSE for offblock time is ' + str(round(rmse_offblock, 2)) + 'minutes.')
print('\n')

r2_onblock = rand_for_on.score(X_test_onblock, y_test_onblock)
rmse_onblock = np.sqrt(mean_squared_error(y_test_onblock, rand_for_on.predict(X_test_onblock)))
print('The r^2 for onblock time is ' + str(round(r2_onblock, 4)))
print('The RMSE for onblock time is ' + str(round(rmse_onblock, 2)) + 'minutes.')
print('\n')

The r^2 for offblock time is 0.9354
The RMSE for offblock time is 6.44minutes.


The r^2 for onblock time is 0.9573
The RMSE for onblock time is 4.98minutes.


