# Building a model for predicting airline flight delays 

## Boilerplate code for notebook initialization 

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")
    
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd 
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import scale

## Load Data

In [2]:
df = pd.read_csv('./1912_bts_flights.csv')

## Numeric Features

In [3]:
# Define data

y = df['DepDel15']

x = df[['CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay', 'ArrDel15', 'DistanceGroup', 'DayofMonth', 'DayOfWeek']]

col_num = [col for col in x.columns if x[col].dtype in ['int64', 'float64']]
x = x[col_num]

# Split features into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

# Replace NaN values with mean
x_train = x_train.fillna(x_train.mean())
x_test = x_test.fillna(x_train.mean())
y_train = y_train.fillna(y_train.mean())
y_test = y_test.fillna(y_train.mean())

## Deep Neural Network

In [4]:
%%time

from sklearn.neural_network import MLPRegressor

model_nn = MLPRegressor(hidden_layer_sizes=(100,100)).fit(x_train, y_train)
y_test_pred = model_nn.predict(x_test)


CPU times: user 6min 46s, sys: 15min 28s, total: 22min 15s
Wall time: 2min


In [5]:
print(''.join(['R^2 Score: ', str(model_nn.score(x_test, y_test))]))

R^2 Score: 0.7014087677291687


## Conclusions

The model produced an R^2 score of 0.70. While R^2 does not provide all of the information regarding how effective a model is, it does provide a good basis that this model will produce decent predictions.