## Importing the libraries

In [99]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [100]:
tf.__version__

'2.9.1'

## Importing the dataset

In [101]:
dataset=pd.read_csv("Air.csv")
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

In [102]:
dataset

Unnamed: 0,Flight,AirportFrom,AirportTo,DayOfWeek,Time,Length,Delay
0,269,SFO,IAH,3,15,205,1
1,1558,PHX,CLT,3,15,222,1
2,2400,LAX,DFW,3,20,165,1
3,2466,SFO,DFW,3,20,195,1
4,108,ANC,SEA,3,30,202,0
...,...,...,...,...,...,...,...
539378,178,OGG,SNA,5,1439,326,0
539379,398,SEA,ATL,5,1439,305,0
539380,609,SFO,MKE,5,1439,255,0
539381,78,HNL,SFO,5,1439,313,1


In [103]:
dataset.shape

(539383, 7)

In [104]:
y=y.reshape(len(y),1)
y

array([[1],
       [1],
       [1],
       ...,
       [0],
       [1],
       [1]], dtype=int64)

## Taking care of missing values

In [105]:
dataset.isnull().sum()

Flight         0
AirportFrom    0
AirportTo      0
DayOfWeek      0
Time           0
Length         0
Delay          0
dtype: int64

In [106]:
X

array([[269, 'SFO', 'IAH', 3, 15, 205],
       [1558, 'PHX', 'CLT', 3, 15, 222],
       [2400, 'LAX', 'DFW', 3, 20, 165],
       ...,
       [609, 'SFO', 'MKE', 5, 1439, 255],
       [78, 'HNL', 'SFO', 5, 1439, 313],
       [1442, 'LAX', 'PHL', 5, 1439, 301]], dtype=object)

## Encoding the Airline Airfrom and Airto using Label encoder

In [107]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
X[:,1]=encoder.fit_transform(X[:,1])
X[:,2]=encoder.fit_transform(X[:,2])

In [108]:
X[:,1]

array([253, 217, 154, ..., 253, 128, 154], dtype=object)

In [109]:
X[:,2]

array([135, 60, 80, ..., 184, 253, 216], dtype=object)

## Using Backward elimination for feature selection

In [90]:
import statsmodels
# import statsmodels.formula.api as sm
import statsmodels.api as sm
X=np.append(arr=np.ones((539383,1)).astype(int),values=X,axis=1)
X_opt=np.array(X[:,[0,1,2,3,4,5]],dtype=int)
regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit()
regressor_OLS.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.028
Model:,OLS,Adj. R-squared:,0.028
Method:,Least Squares,F-statistic:,3069.0
Date:,"Fri, 29 Jul 2022",Prob (F-statistic):,0.0
Time:,22:40:30,Log-Likelihood:,-380680.0
No. Observations:,539383,AIC:,761400.0
Df Residuals:,539377,BIC:,761400.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.2258,0.003,72.798,0.000,0.220,0.232
x1,-1.055e-05,3.23e-07,-32.680,0.000,-1.12e-05,-9.92e-06
x2,0.0001,8.41e-06,16.136,0.000,0.000,0.000
x3,0.0003,8.42e-06,31.416,0.000,0.000,0.000
x4,-0.0069,0.000,-19.822,0.000,-0.008,-0.006
x5,0.0003,2.4e-06,111.331,0.000,0.000,0.000

0,1,2,3
Omnibus:,2033993.312,Durbin-Watson:,1.75
Prob(Omnibus):,0.0,Jarque-Bera (JB):,80307.564
Skew:,0.205,Prob(JB):,0.0
Kurtosis:,1.155,Cond. No.,15100.0


## As all the features are statistically significant over here for the prediction of dependent variable as per the above result so therefore we are considering all of them

## Splitting the dataset into training set and testing set

In [110]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

## Feature Scaling

In [111]:
from sklearn.preprocessing import MinMaxScaler
sc=MinMaxScaler(feature_range=(0,1))
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

## Initializing the ANN

In [112]:
ann=tf.keras.models.Sequential()

## Adding the first input layer over here

In [113]:
ann.add(tf.keras.layers.Dense(units=7,activation="relu"))

## Adding the hidden input layer over here

In [114]:
ann.add(tf.keras.layers.Dense(units=7,activation="relu"))

## Adding the output layer over here

In [115]:
ann.add(tf.keras.layers.Dense(units=1,activation="sigmoid"))

## Compiling 
As this one is classification model i mean to say as we want to predict the binary outcome over here 

In [116]:
ann.compile(optimizer="adam",loss="binary_crossentropy",metrics=['accuracy'])

## Training the model 


In [121]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x2a51863eda0>

## Predicting the test set results

In [122]:
y_pred=ann.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[0.52603334 0.        ]
 [0.44992423 0.        ]
 [0.65929472 0.        ]
 ...
 [0.59524494 0.        ]
 [0.44992423 0.        ]
 [0.37641177 0.        ]]


## Making the single prediction

THERE IS A FLIGHT HAVING FOLLOWING FEATURES PREDICT WHETHER IT WILL DELAY OR NOT
### FLIGHT IS 270 
### AirportFrom =LAX
### AirportTo=DFW
### DayOfWeek=3
### Time=22
### Length=207

# Observations

### Now the flight having above features will not delay and over here we are setting the threshold of 50% if the probability is more than 50% then it means flight will delay rather flight will not delay and as we have only 29% chance for the delay of flight so therefore over here we will assume that flight will not delay 

In [128]:
print(ann.predict(sc.transform([[270,154,80,3,22,207]]))) 
print(ann.predict(sc.transform([[270,154,80,3,22,207]]))>0.5) 

[[0.2911813]]
[[False]]
