In [29]:
import csv
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import datetime
import numpy as np

In [35]:
file_path = "ORIE_4741_data_selection/2016_to_2020_flight_reduced_columns_with_weather_top_10.csv"
data = pd.read_csv(file_path, index_col = False)
data = data.drop('Unnamed: 0', axis = 1)

In [36]:
data.columns

Index(['Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'FlightDate',
       'Reporting_Airline', 'DOT_ID_Reporting_Airline',
       'Flight_Number_Reporting_Airline', 'Origin', 'OriginCityName',
       'OriginState', 'Dest', 'DestCityName', 'DestState', 'CRSDepTime',
       'DepTime', 'DepDelay', 'TaxiOut', 'WheelsOff', 'WheelsOn', 'TaxiIn',
       'CRSArrTime', 'ArrTime', 'ArrDelay', 'CRSElapsedTime', 'Cancelled',
       'CancellationCode', 'Diverted', 'Flights', 'Distance', 'DivArrDelay',
       'DivActualElapsedTime', 'DepTimeLocal', 'DepTimeUTC', 'Severe-Cold',
       'Fog', 'Hail', 'Rain', 'Snow', 'Storm', 'Other Precipitation',
       'Severe-Cold_Severity', 'Fog_Severity', 'Hail_Severity',
       'Rain_Severity', 'Snow_Severity', 'Storm_Severity',
       'Other Precipitation_Severity'],
      dtype='object')

In [37]:
# Use to generate top 10-to-10 only
list_of_airports = set(['ATL', 'DFW', 'DEN', 'ORD', 'LAX', 'CLT', 'LAS', 'PHX', 'MCO', 'SEA'])
data = data[data['Dest'].isin(list_of_airports)]

In [38]:
data.head(10)

Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,Flight_Number_Reporting_Airline,Origin,...,Snow,Storm,Other Precipitation,Severe-Cold_Severity,Fog_Severity,Hail_Severity,Rain_Severity,Snow_Severity,Storm_Severity,Other Precipitation_Severity
88,2016,1,1,1,5,2016-01-01,AA,19805,48,PHX,...,False,False,False,,,,,,,
89,2016,1,1,2,6,2016-01-02,AA,19805,48,PHX,...,False,False,False,,,,,,,
90,2016,1,1,3,7,2016-01-03,AA,19805,48,PHX,...,False,False,False,,,,,,,
91,2016,1,1,4,1,2016-01-04,AA,19805,48,PHX,...,False,False,False,,,,,,,
92,2016,1,1,5,2,2016-01-05,AA,19805,48,PHX,...,False,False,False,,,,Light,,,
93,2016,1,1,6,3,2016-01-06,AA,19805,48,PHX,...,False,False,False,,,,,,,
94,2016,1,1,7,4,2016-01-07,AA,19805,48,PHX,...,False,False,False,,,,Light,,,
95,2016,1,1,8,5,2016-01-08,AA,19805,48,PHX,...,False,False,False,,,,,,,
96,2016,1,1,9,6,2016-01-09,AA,19805,48,PHX,...,False,False,False,,,,,,,
97,2016,1,1,10,7,2016-01-10,AA,19805,48,PHX,...,False,False,False,,,,,,,


## Missing/Corrupted Data

In [39]:
# Remove cancelled flights
df = data[data.Cancelled == 0]
df.drop(columns = ['Cancelled'], inplace = True)

In [40]:
# Remove data without ArrTime
df = df[~df.ArrTime.isnull()]
# Reset index
df.reset_index(inplace = True, drop = True)
# Drop column CancellationCode, Flights
# Reporting_Airline and DOT_ID_Reporting_Airline the same info, keep DOT_ID_Reporting_Airline
df.drop(columns = ['CancellationCode','Flights'], inplace = True)

In [41]:
missing_stats = df.isnull().sum()
missing_stats = missing_stats[missing_stats != 0]

In [42]:
missing_stats

ArrDelay                           3722
DivArrDelay                     1965362
DivActualElapsedTime            1965362
Severe-Cold_Severity            1969084
Fog_Severity                    1918068
Hail_Severity                   1968984
Rain_Severity                   1864646
Snow_Severity                   1951725
Storm_Severity                  1968797
Other Precipitation_Severity    1969084
dtype: int64

In [43]:
# Fill DepDelay null with 0
df.DepDelay.fillna(0,inplace=True)

In [44]:
df2 = df[(df.Diverted == 0) & (np.isnan(df.ArrDelay))]
df2['ArrDelay'].fillna(0, inplace = True)
df[(df.Diverted == 0) & (np.isnan(df.ArrDelay))] = df2

In [45]:
diverted = df[df.Diverted == 1]
def fill_arrDelay_for_diverted(x):
    x['ArrDelay'] = x['DivArrDelay']
    return x
diverted = diverted.apply(fill_arrDelay_for_diverted, axis = 1)
df[df['ArrDelay'].isnull()] = diverted
df.drop(columns = ['DivArrDelay','DivActualElapsedTime','Diverted'],inplace = True)

In [46]:
missing_stats = df.isnull().sum()
missing_stats = missing_stats[missing_stats != 0]
missing_stats

Severe-Cold_Severity            1969084
Fog_Severity                    1918068
Hail_Severity                   1968984
Rain_Severity                   1864646
Snow_Severity                   1951725
Storm_Severity                  1968797
Other Precipitation_Severity    1969084
dtype: int64

In [14]:
# Fill DivArrDelay with 0
# df.DivArrDelay.fillna(0,inplace=True)
# Fill DivActualElapsedTime with 0
# df.DivActualElapsedTime.fillna(0,inplace=True)
#Concatenating Reporting_Airline with Flight_Number_Reporting_Airline to form flight number
# df.Flight_Number_Reporting_Airline = df.Reporting_Airline.astype(str) + df.Flight_Number_Reporting_Airline.astype(str)
# df.drop(columns = ['Reporting_Airline'], inplace = True)
# # Change dtype of FlightDate into datetime.date
# df['FlightDate'] = df['FlightDate'].apply(lambda x: datetime.date.fromisoformat(x))

In [15]:
# Parse CRSDepTime, DepTime, WheelsOff, WheelsOn, CRSArrTime, ArrTime into datetime
# for col in ['CRSDepTime', 'DepTime', 'WheelsOff', 'WheelsOn', 'CRSArrTime', 'ArrTime']:
#     df[col] = df[col].apply(lambda x: parse_int_to_time(x))

In [16]:
def fill_ArrDelay(x):
    if np.isnan(x['ArrDelay']):
        start_time = x['CRSArrTime']
        stop_time = x['ArrTime']
        date = datetime.date(1, 1, 1)
        datetime1 = datetime.datetime.combine(date, start_time)
        datetime2 = datetime.datetime.combine(date, stop_time)
        time_elapsed = datetime1 - datetime2
        x['ArrDelay'] = time_elapsed.seconds//60
        return x

## Feature engineering & Encoding

### Weather

In [57]:
def weather_real_encoding(sev):
    """ Convert severity into real_encoding, from light 1 to severe 4
    nan as 0
    """
    if sev == 'Light':
        return 1
    elif sev == 'Moderate':
        return 2
    elif sev == 'Heavy':
        return 3
    elif sev == 'Severe':
        return 4
    else:
        return 0

In [58]:
# real_encoding for weather severity
for col in ['Severe-Cold_Severity','Fog_Severity','Hail_Severity','Rain_Severity',
            'Snow_Severity','Storm_Severity','Other Precipitation_Severity']:
    df[col] = df[col].apply(lambda x: weather_real_encoding(x))

In [59]:
# Drop weather columns
weather_col = ['Severe-Cold', 'Fog', 'Hail', 'Rain','Snow', 'Storm', 'Other Precipitation']
df.drop(columns = weather_col, inplace = True)
df.columns

Index(['Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'FlightDate',
       'Reporting_Airline', 'DOT_ID_Reporting_Airline',
       'Flight_Number_Reporting_Airline', 'Origin', 'OriginCityName',
       'OriginState', 'Dest', 'DestCityName', 'DestState', 'CRSDepTime',
       'DepTime', 'DepDelay', 'TaxiOut', 'WheelsOff', 'WheelsOn', 'TaxiIn',
       'CRSArrTime', 'ArrTime', 'ArrDelay', 'CRSElapsedTime', 'Distance',
       'DepTimeLocal', 'DepTimeUTC', 'Severe-Cold_Severity', 'Fog_Severity',
       'Hail_Severity', 'Rain_Severity', 'Snow_Severity', 'Storm_Severity',
       'Other Precipitation_Severity'],
      dtype='object')

### Time related

In [60]:
def parse_int_to_time(num):
    try:
        num = int(num)
        if num == 2400:
            num = 2359
        m = num%100
        minute = m%60
        h = num//100
        return datetime.time(hour = h, minute = m)
    except ValueError:
        print(num)

In [61]:
#Parse CRSDepTime, CRSArrTime into datetime
for col in ['CRSDepTime', 'CRSArrTime']:
    df[col] = df[col].apply(lambda x: parse_int_to_time(x))

In [62]:
time_col = ['DepTime', 'ArrTime', 'TaxiOut', 'WheelsOff', 'WheelsOn', 'TaxiIn',
            'DepTimeLocal','DepTimeUTC','FlightDate']
df.drop(columns = time_col, inplace = True)
df.columns

Index(['Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek',
       'Reporting_Airline', 'DOT_ID_Reporting_Airline',
       'Flight_Number_Reporting_Airline', 'Origin', 'OriginCityName',
       'OriginState', 'Dest', 'DestCityName', 'DestState', 'CRSDepTime',
       'DepDelay', 'CRSArrTime', 'ArrDelay', 'CRSElapsedTime', 'Distance',
       'Severe-Cold_Severity', 'Fog_Severity', 'Hail_Severity',
       'Rain_Severity', 'Snow_Severity', 'Storm_Severity',
       'Other Precipitation_Severity'],
      dtype='object')

In [63]:
def time_cat_encoding(t):
    """Turns time into categorical with 4 values: morning, afternoon, night, midnight
    """
    mor = datetime.time(6,0)
    aft = datetime.time(12,0)
    ni = datetime.time(18,0)
    if t < mor:
        return "midnight"
    elif t < aft:
        return "morning"
    elif t < ni:
        return "afternoon"
    else:
        return "night"

In [64]:
df.CRSDepTime = df.CRSDepTime.apply(time_cat_encoding)
df.CRSArrTime = df.CRSArrTime.apply(time_cat_encoding)

In [65]:
# Do one hot encoding for CRSDepTime and CRSArrTime
one_hot_CRSDepTime = pd.get_dummies(df['CRSDepTime'])
one_hot_CRSArrTime = pd.get_dummies(df['CRSArrTime'])
one_hot_CRSDepTime.rename(columns = {'afternoon':'CRSDep_afternoon','midnight':'CRSDep_midnight',
                                     'morning':'CRSDep_morning','night':'CRSDep_night'}, inplace = True)
one_hot_CRSArrTime.rename(columns = {'afternoon':'CRSArr_afternoon','midnight':'CRSArr_midnight',
                                     'morning':'CRSArr_morning','night':'CRSArr_night'}, inplace = True)
df.drop(columns = ['CRSDepTime','CRSArrTime'], inplace = True)
df = df.join(one_hot_CRSDepTime)
df = df.join(one_hot_CRSArrTime)

In [66]:
df.Quarter.unique()

array([1, 4, 2, 3], dtype=int64)

In [67]:
# do one-hot for Quarter & DayOfWeek
one_hot_quarter = pd.get_dummies(df['Quarter'])
one_hot_day = pd.get_dummies(df['DayOfWeek'])
one_hot_quarter.rename(columns ={1:'Q1',2:'Q2',3:'Q3',4:'Q4'}, inplace = True)
one_hot_day.rename(columns ={1:'Mon',2:'Tue',3:'Wed',4:'Thu',5:'Fri',6:'Sat',7:'Sun'}, inplace = True)
df.drop(columns = ['Quarter','DayOfWeek'], inplace = True)
df = df.join(one_hot_quarter)
df = df.join(one_hot_day)

In [68]:
df.columns

Index(['Year', 'Month', 'DayofMonth', 'Reporting_Airline',
       'DOT_ID_Reporting_Airline', 'Flight_Number_Reporting_Airline', 'Origin',
       'OriginCityName', 'OriginState', 'Dest', 'DestCityName', 'DestState',
       'DepDelay', 'ArrDelay', 'CRSElapsedTime', 'Distance',
       'Severe-Cold_Severity', 'Fog_Severity', 'Hail_Severity',
       'Rain_Severity', 'Snow_Severity', 'Storm_Severity',
       'Other Precipitation_Severity', 'CRSDep_afternoon', 'CRSDep_midnight',
       'CRSDep_morning', 'CRSDep_night', 'CRSArr_afternoon', 'CRSArr_midnight',
       'CRSArr_morning', 'CRSArr_night', 'Q1', 'Q2', 'Q3', 'Q4', 'Mon', 'Tue',
       'Wed', 'Thu', 'Fri', 'Sat', 'Sun'],
      dtype='object')

### Airline related

In [69]:
airline_lst = list(df.Reporting_Airline.unique())
airline_lst_rename = ['Airline_'+item for item in airline_lst]
one_hot_airline = pd.get_dummies(df['Reporting_Airline'])
for i in range(len(airline_lst)):
    one_hot_airline.rename(columns = {airline_lst[i]:airline_lst_rename[i]},inplace = True)
df.drop(columns = ['Reporting_Airline','DOT_ID_Reporting_Airline',
                   'Flight_Number_Reporting_Airline'], inplace = True)
df = df.join(one_hot_airline)

### location related

In [16]:
# origin_lst = list(df.Origin.unique())
# origin_lst_rename = ['Origin_'+item for item in origin_lst]
# one_hot_origin = pd.get_dummies(df['Origin'])
# for i in range(len(origin_lst)):
#     one_hot_origin.rename(columns = {origin_lst[i]:origin_lst_rename[i]},inplace = True)
# df = df.join(one_hot_origin)
# df.drop(columns = ['Origin','OriginCityName','OriginState'], inplace = True)

In [17]:
# df.columns

Index(['Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'FlightDate',
       'Reporting_Airline', 'DOT_ID_Reporting_Airline',
       'Flight_Number_Reporting_Airline', 'Dest', 'DestCityName', 'DestState',
       'CRSDepTime', 'DepTime', 'DepDelay', 'TaxiOut', 'WheelsOff', 'WheelsOn',
       'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'CRSElapsedTime',
       'Distance', 'DepTimeLocal', 'DepTimeUTC', 'Severe-Cold', 'Fog', 'Hail',
       'Rain', 'Snow', 'Storm', 'Other Precipitation', 'Severe-Cold_Severity',
       'Fog_Severity', 'Hail_Severity', 'Rain_Severity', 'Snow_Severity',
       'Storm_Severity', 'Other Precipitation_Severity', 'Origin_ATL',
       'Origin_CLT', 'Origin_DEN', 'Origin_DFW', 'Origin_LAS', 'Origin_LAX',
       'Origin_MCO', 'Origin_ORD', 'Origin_PHX', 'Origin_SEA'],
      dtype='object')

#### Plan 1: use destState for destination category

In [18]:
# destState_lst = list(df.DestState.unique())
# destState_lst_rename = ['DestState_'+item for item in destState_lst]
# one_hot_destState = pd.get_dummies(df['DestState'])
# for i in range(len(destState_lst)):
#     one_hot_destState.rename(columns = {destState_lst[i]:destState_lst_rename[i]},inplace = True)
# df1 = df.join(one_hot_destState)
# df1.drop(columns = ['Dest', 'DestCityName', 'DestState'], inplace = True)

In [19]:
# df1.shape

(1969084, 57)

In [20]:
# df1.to_csv("2016_to_2020_flight_feature_eng_w_DestState.csv")

#### Plan 2: use dest for destination category

In [21]:
# dest_lst = list(df.Dest.unique())
# dest_lst_rename = ['Dest_'+item for item in dest_lst]
# one_hot_dest = pd.get_dummies(df['Dest'])
# for i in range(len(dest_lst)):
#     one_hot_dest.rename(columns = {dest_lst[i]:dest_lst_rename[i]},inplace = True)
# df2 = df.join(one_hot_dest)
# df2.drop(columns = ['Dest', 'DestCityName', 'DestState'], inplace = True)

In [22]:
# df2.shape

(1969084, 57)

In [23]:
# # df2.to_csv("2016_to_2020_flight_feature_eng_w_Dest.csv")
# df2.to_csv("2016_to_2020_flight_feature_eng_w_Dest_10_10.csv")

### change origin to longitude / latitude

In [70]:
df.head()

Unnamed: 0,Year,Month,DayofMonth,Origin,OriginCityName,OriginState,Dest,DestCityName,DestState,DepDelay,...,Airline_G4,Airline_MQ,Airline_NK,Airline_OH,Airline_OO,Airline_UA,Airline_VX,Airline_WN,Airline_YV,Airline_YX
0,2016,1,1,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,0.0,...,0,0,0,0,0,0,0,0,0,0
1,2016,1,2,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,11.0,...,0,0,0,0,0,0,0,0,0,0
2,2016,1,3,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,11.0,...,0,0,0,0,0,0,0,0,0,0
3,2016,1,4,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,2.0,...,0,0,0,0,0,0,0,0,0,0
4,2016,1,5,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,558.0,...,0,0,0,0,0,0,0,0,0,0


In [71]:
list_of_airports = set(['ATL', 'DFW', 'DEN', 'ORD', 'LAX', 'CLT', 'LAS', 'PHX', 'MCO', 'SEA'])
list_of_airports

{'ATL', 'CLT', 'DEN', 'DFW', 'LAS', 'LAX', 'MCO', 'ORD', 'PHX', 'SEA'}

In [72]:
c = dict.fromkeys(list_of_airports, 0)
c

{'LAS': 0,
 'PHX': 0,
 'DEN': 0,
 'DFW': 0,
 'ORD': 0,
 'LAX': 0,
 'ATL': 0,
 'SEA': 0,
 'CLT': 0,
 'MCO': 0}

In [73]:
c['ATL'] = ['33.6407° N', '84.4277° W']
c['DFW'] = ['32.8998° N', '97.0403° W']
c['DEN'] = ['39.8561° N', '104.6737° W']
c['ORD'] = ['41.9803° N', '87.9090° W']
c['LAX'] = ['33.9416° N', '118.4085° W']
c['CLT'] = ['35.2144° N', '80.9473° W']
c['LAS'] = ['36.0840° N', '115.1537° W']
c['PHX'] = ['33.4352° N', '112.0101° W']
c['MCO'] = ['28.4179° N', '81.3041° W']
c['SEA'] = ['47.4502° N', '122.3088° W']
c

{'LAS': ['36.0840° N', '115.1537° W'],
 'PHX': ['33.4352° N', '112.0101° W'],
 'DEN': ['39.8561° N', '104.6737° W'],
 'DFW': ['32.8998° N', '97.0403° W'],
 'ORD': ['41.9803° N', '87.9090° W'],
 'LAX': ['33.9416° N', '118.4085° W'],
 'ATL': ['33.6407° N', '84.4277° W'],
 'SEA': ['47.4502° N', '122.3088° W'],
 'CLT': ['35.2144° N', '80.9473° W'],
 'MCO': ['28.4179° N', '81.3041° W']}

In [96]:
df_origin = df['Origin']
df_Origin_Lat = pd.Series([c[i][0] for i in df_origin])
df_Origin_Long = pd.Series([c[i][1] for i in df_origin])

In [99]:
df1 = df.copy()
df1 = df1.assign(Origin_Lat=df_Origin_Lat.values)
df1 = df1.assign(Origin_Long=df_Origin_Long.values)
df1.head()

Unnamed: 0,Year,Month,DayofMonth,Origin,OriginCityName,OriginState,Dest,DestCityName,DestState,DepDelay,...,Airline_NK,Airline_OH,Airline_OO,Airline_UA,Airline_VX,Airline_WN,Airline_YV,Airline_YX,Origin_Lat,Origin_Long
0,2016,1,1,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,0.0,...,0,0,0,0,0,0,0,0,33.4352° N,112.0101° W
1,2016,1,2,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,11.0,...,0,0,0,0,0,0,0,0,33.4352° N,112.0101° W
2,2016,1,3,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,11.0,...,0,0,0,0,0,0,0,0,33.4352° N,112.0101° W
3,2016,1,4,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,2.0,...,0,0,0,0,0,0,0,0,33.4352° N,112.0101° W
4,2016,1,5,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,558.0,...,0,0,0,0,0,0,0,0,33.4352° N,112.0101° W


In [74]:
for i in range(len(df)):
    origin = df.loc[i]['Origin']
    df.loc[i]['Origin_Lat'] = c[origin][0]
    df.loc[i]['Origin_Long'] = c[origin][1]
df.head()

Unnamed: 0,Year,Month,DayofMonth,Origin,OriginCityName,OriginState,Dest,DestCityName,DestState,DepDelay,...,Airline_G4,Airline_MQ,Airline_NK,Airline_OH,Airline_OO,Airline_UA,Airline_VX,Airline_WN,Airline_YV,Airline_YX
0,2016,1,1,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,0.0,...,0,0,0,0,0,0,0,0,0,0
1,2016,1,2,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,11.0,...,0,0,0,0,0,0,0,0,0,0
2,2016,1,3,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,11.0,...,0,0,0,0,0,0,0,0,0,0
3,2016,1,4,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,2.0,...,0,0,0,0,0,0,0,0,0,0
4,2016,1,5,PHX,"Phoenix, AZ",AZ,DFW,"Dallas/Fort Worth, TX",TX,558.0,...,0,0,0,0,0,0,0,0,0,0


In [100]:
df1.to_csv("2016_to_2020_flight_feature_eng_w_Origin_Lat_Long.csv")

In [101]:
dfn = pd.read_csv("2016_to_2020_flight_feature_eng_w_Origin_Lat_Long.csv")
dfn.columns

Index(['Unnamed: 0', 'Year', 'Month', 'DayofMonth', 'Origin', 'OriginCityName',
       'OriginState', 'Dest', 'DestCityName', 'DestState', 'DepDelay',
       'ArrDelay', 'CRSElapsedTime', 'Distance', 'Severe-Cold_Severity',
       'Fog_Severity', 'Hail_Severity', 'Rain_Severity', 'Snow_Severity',
       'Storm_Severity', 'Other Precipitation_Severity', 'CRSDep_afternoon',
       'CRSDep_midnight', 'CRSDep_morning', 'CRSDep_night', 'CRSArr_afternoon',
       'CRSArr_midnight', 'CRSArr_morning', 'CRSArr_night', 'Q1', 'Q2', 'Q3',
       'Q4', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', 'Airline_9E',
       'Airline_AA', 'Airline_AS', 'Airline_B6', 'Airline_DL', 'Airline_EV',
       'Airline_F9', 'Airline_G4', 'Airline_MQ', 'Airline_NK', 'Airline_OH',
       'Airline_OO', 'Airline_UA', 'Airline_VX', 'Airline_WN', 'Airline_YV',
       'Airline_YX', 'Origin_Lat', 'Origin_Long'],
      dtype='object')