In [0]:
from warnings import simplefilter

simplefilter(action='ignore', category=FutureWarning)

In [0]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import pickle as pkl
import math
import csv
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import preprocessing
from sklearn.metrics import classification_report
from sklearn.ensemble import ExtraTreesClassifier
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE 
from imblearn.under_sampling import RandomUnderSampler
from imblearn.under_sampling import NearMiss
from collections import Counter

In [None]:
#flight_data = pd.read_csv(r'C:\Users\badri\Desktop\ML-Project\Flight Data\Flight_Data.csv')
from pathlib import Path

# 1) Define your project root (where you launched the script / notebook)
BASE_DIR = Path().resolve()      # or: Path(__file__).parent if in a .py module

# 2) Point at your data folder
DATA_DIR = BASE_DIR / 'data'

# 3) Build each file’s path dynamically
flight_csv   = DATA_DIR / 'filtered_flights.csv'
weather_csv  = DATA_DIR / 'Weather' / 'Weather_Data.csv'
final_csv   = DATA_DIR / 'Flight_Weather.csv'

# 4) Read them with pandas
flight_data  = pd.read_csv(flight_csv)
weather_data = pd.read_csv(weather_csv)
flight_weather_data = pd.read_csv(final_csv)

In [0]:
link = 'https://drive.google.com/open?id=17DbBIxtBcBM9Q4MWbeF46ml8WNv_ATKl'

fluff, id = link.split('=')

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
downloaded = drive.CreateFile({'id':id}) 

downloaded.GetContentFile('Flight_Weather.csv')

dataframe = pd.read_csv('Flight_Weather.csv')

In [0]:
data_label_encoder = preprocessing.LabelEncoder()

encoded_list = data_label_encoder.fit_transform(dataframe['Airport'])

dataframe['EncodedAirport'] = encoded_list

In [0]:
y = dataframe['ArrDel15']

dataframe = dataframe.drop(['ArrDel15','FlightDate','Date','Airport','ArrDelayMinutes'],axis = 1)

X = dataframe

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [10]:
print(" Original DataSet Shape : ",Counter(y_train))

ros = RandomOverSampler(random_state = 42)

X_ros, y_ros = ros.fit_resample(X_train, y_train)

print(" RandomOverSampled DataSet Shape : ",Counter(y_ros))

smote = SMOTE(random_state = 42)

X_smote, y_smote = smote.fit_resample(X_train, y_train)

print(" SMOTE DataSet Shape : ",Counter(y_smote))

 Original DataSet Shape :  Counter({0.0: 1097424, 1.0: 291150})
 RandomOverSampled DataSet Shape :  Counter({1.0: 1097424, 0.0: 1097424})
 SMOTE DataSet Shape :  Counter({1.0: 1097424, 0.0: 1097424})


In [11]:
print(" Original DataSet Shape : ",Counter(y_train))

rus = RandomUnderSampler(random_state = 42)

X_rus, y_rus = rus.fit_resample(X_train,y_train)

print(" RandomUnderampled DataSet Shape : ",Counter(y_rus))

nm = NearMiss()

X_nm, y_nm = nm.fit_resample(X_train, y_train)

print(" NearMiss DataSet Shape : ",Counter(y_nm))

 Original DataSet Shape :  Counter({0.0: 1097424, 1.0: 291150})
 RandomUnderampled DataSet Shape :  Counter({0.0: 291150, 1.0: 291150})
 NearMiss DataSet Shape :  Counter({0.0: 291150, 1.0: 291150})


In [0]:
clf = ExtraTreesClassifier(n_estimators = 100)

In [13]:
clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

filename = "ETC_Normal.pkl"

pkl.dump(clf,open(filename,'wb'))

print(" Classification Report ")

print("\n\n")

print(metrics.classification_report(y_test,y_pred))

 Classification Report 



              precision    recall  f1-score   support

         0.0       0.94      0.97      0.95    365951
         1.0       0.87      0.75      0.81     96908

    accuracy                           0.93    462859
   macro avg       0.90      0.86      0.88    462859
weighted avg       0.92      0.93      0.92    462859



In [14]:
clf.fit(X_ros,y_ros)

y_pred_ros = clf.predict(X_test)

filename = "ETC_ROS.pkl"

pkl.dump(clf,open(filename,'wb'))

print(" Classification Report ")

print("\n\n")

print(metrics.classification_report(y_test,y_pred_ros))

 Classification Report 



              precision    recall  f1-score   support

         0.0       0.94      0.97      0.95    365951
         1.0       0.88      0.75      0.81     96908

    accuracy                           0.92    462859
   macro avg       0.91      0.86      0.88    462859
weighted avg       0.92      0.92      0.92    462859



In [15]:
clf.fit(X_smote,y_smote)

y_pred_smote = clf.predict(X_test)

filename = "ETC_SMOTE.pkl"

pkl.dump(clf,open(filename,'wb'))

print(" Classification Report")

print("\n\n")

print(metrics.classification_report(y_test,y_pred_smote))

 Classification Report



              precision    recall  f1-score   support

         0.0       0.94      0.96      0.95    365951
         1.0       0.83      0.76      0.80     96908

    accuracy                           0.92    462859
   macro avg       0.89      0.86      0.87    462859
weighted avg       0.92      0.92      0.92    462859



In [16]:
clf.fit(X_rus,y_rus)

y_pred_rus = clf.predict(X_test)

filename = "ETC_RUS.pkl"

pkl.dump(clf,open(filename,'wb'))

print(" Classification Report")

print("\n\n")

print(metrics.classification_report(y_test,y_pred_rus))

 Classification Report



              precision    recall  f1-score   support

         0.0       0.96      0.92      0.94    365951
         1.0       0.73      0.84      0.78     96908

    accuracy                           0.90    462859
   macro avg       0.84      0.88      0.86    462859
weighted avg       0.91      0.90      0.90    462859



In [17]:
clf.fit(X_nm,y_nm)

y_pred_nm = clf.predict(X_test)

filename = "ETC_NM.pkl"

pkl.dump(clf,open(filename,'wb'))

print(" Classification Report")

print("\n\n")

print(metrics.classification_report(y_test,y_pred_nm))

 Classification Report



              precision    recall  f1-score   support

         0.0       0.95      0.79      0.86    365951
         1.0       0.52      0.84      0.64     96908

    accuracy                           0.80    462859
   macro avg       0.73      0.82      0.75    462859
weighted avg       0.86      0.80      0.82    462859

