# Building a model for predicting airline flight delays 

## Boilerplate code for notebook initialization 

In [28]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")
    
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd 
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import scale

## Load Data

df = pd.read_csv('./1912_bts_flights.csv')

## Numeric Features

In [30]:
# Define data

y = df['DepDel15']

x = df[['CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay']]

col_num = [col for col in x.columns if x[col].dtype in ['int64', 'float64']]
x = x[col_num]

# Split features into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

# Replace NaN values with mean
x_train = x_train.fillna(x_train.mean())
x_test = x_test.fillna(x_train.mean())
y_train = y_train.fillna(y_train.mean())
y_test = y_test.fillna(y_train.mean())

## Deep Neural Network

In [31]:
%%time

from sklearn.neural_network import MLPRegressor

model_nn = MLPRegressor(hidden_layer_sizes=(100,100)).fit(x_train, y_train)
y_test_pred = model_nn.predict(x_test)


Wall time: 3min 35s


In [36]:
print(''.join(['R^2 Score: ', str(model_nn.score(x_test, y_test))]))

R^2 Score: 0.6980763091666291


## Conclusions

The model produced an R^2 score of .698

In [None]:
%%time

from sklearn.preprocessing import MinMaxScaler

scalar = MinMaxScaler().fit(x)
x_train, x_test, y_train, y_test = train_test_split(scalar.transform(x), y, test_size=.2, random_state=1)

from sklearn.naive_bayes import MultinomialNB


model_nb = MultinomialNB().fit(x_train, y_train)
y_test_predict = model_nb.predict(x_test)


print('Results from (Naive Bayes - Multinomial):')
print('Classification Report:')

print(classification_report(y_test, y_test_predict))
print('AUC Score:')
print(roc_auc_score(y_test, y_test_predict))