In [1]:
# Import our dependencies
import pandas as pd
from sqlalchemy import create_engine
from sklearn.preprocessing import OneHotEncoder
#from config import db_password

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


#Data Paths
flights_cleaned_path = "resources/flights_ml_depart_cleaned.csv"
flights_cleaned_no_null_path = 'resources/flights_cleaned_no_null.csv'

In [2]:
# Load data from resource file

df = pd.read_csv(flights_cleaned_path, low_memory=False)
df.head(5)

Unnamed: 0.1,Unnamed: 0,MONTH,DAY,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,DEPARTURE_DELAY,ELAPSED_TIME,AIR_TIME,DISTANCE,WHEELS_ON,TAXI_IN,SCHEDULED_ARRIVAL,ARRIVAL_TIME,ARRIVAL_DELAY,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
0,0,1,1,4,AS,98,N407AS,ANC,SEA,5,-11.0,194.0,169.0,1448,404.0,4.0,430,408.0,-22.0,,,,,
1,1,1,1,4,AA,2336,N3KUAA,LAX,PBI,10,-8.0,279.0,263.0,2330,737.0,4.0,750,741.0,-9.0,,,,,
2,2,1,1,4,US,840,N171US,SFO,CLT,20,-2.0,293.0,266.0,2296,800.0,11.0,806,811.0,5.0,,,,,
3,3,1,1,4,AA,258,N3HYAA,LAX,MIA,20,-5.0,281.0,258.0,2342,748.0,8.0,805,756.0,-9.0,,,,,
4,4,1,1,4,AS,135,N527AS,SEA,ANC,25,-1.0,215.0,199.0,1448,254.0,5.0,320,259.0,-21.0,,,,,


In [3]:
# DF with features for first ml exploration


df_ml = df[['DEPARTURE_DELAY', 'MONTH', 'DAY_OF_WEEK', 'AIRLINE', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'SCHEDULED_DEPARTURE', 'DISTANCE']].copy()
df_ml.head(5)

Unnamed: 0,DEPARTURE_DELAY,MONTH,DAY_OF_WEEK,AIRLINE,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,DISTANCE
0,-11.0,1,4,AS,ANC,SEA,5,1448
1,-8.0,1,4,AA,LAX,PBI,10,2330
2,-2.0,1,4,US,SFO,CLT,20,2296
3,-5.0,1,4,AA,LAX,MIA,20,2342
4,-1.0,1,4,AS,SEA,ANC,25,1448


In [4]:
# Get categorical data 

categorical_data = ['AIRLINE', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT']
df_ml[categorical_data].nunique()

AIRLINE                 14
ORIGIN_AIRPORT         628
DESTINATION_AIRPORT    629
dtype: int64

In [5]:
orig_counts = df_ml.ORIGIN_AIRPORT.value_counts()
dest_counts = df_ml.DESTINATION_AIRPORT.value_counts()

In [6]:
# Bucket number of unique origin / destination airports with under 1000 values
# Memory issues when encoding dataset. box sizes increased to limit of memory 

# Get app types
to_replace = orig_counts[orig_counts < 5000].index
to_replace_2 = dest_counts[dest_counts < 5000].index

# Loop through apps that are to be replaced, replacing in charity_df
for air_port in to_replace:
    df_ml.ORIGIN_AIRPORT = df_ml.ORIGIN_AIRPORT.replace(air_port, 'Other')
    
for air_port in to_replace_2:
    df_ml.DESTINATION_AIRPORT = df_ml.DESTINATION_AIRPORT.replace(air_port, 'Other')

In [7]:
# One Hot encoder,  label encoding

enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(df_ml[categorical_data]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(categorical_data)

In [8]:
# Merge encoded df and original df, remove pre-encoded columns

df_ml.drop(columns=categorical_data, inplace=True)
df_ml = df_ml.merge(encode_df, left_index=True, right_index=True)
df_ml.head()

Unnamed: 0,DEPARTURE_DELAY,MONTH,DAY_OF_WEEK,SCHEDULED_DEPARTURE,DISTANCE,AIRLINE_AA,AIRLINE_AS,AIRLINE_B6,AIRLINE_DL,AIRLINE_EV,AIRLINE_F9,AIRLINE_HA,AIRLINE_MQ,AIRLINE_NK,AIRLINE_OO,AIRLINE_UA,AIRLINE_US,AIRLINE_VX,AIRLINE_WN,ORIGIN_AIRPORT_10397,ORIGIN_AIRPORT_10721,ORIGIN_AIRPORT_10821,ORIGIN_AIRPORT_11057,ORIGIN_AIRPORT_11259,ORIGIN_AIRPORT_11278,ORIGIN_AIRPORT_11292,ORIGIN_AIRPORT_11298,ORIGIN_AIRPORT_11433,ORIGIN_AIRPORT_11618,ORIGIN_AIRPORT_11697,ORIGIN_AIRPORT_12266,ORIGIN_AIRPORT_12478,ORIGIN_AIRPORT_12889,ORIGIN_AIRPORT_12892,ORIGIN_AIRPORT_12953,ORIGIN_AIRPORT_13204,ORIGIN_AIRPORT_13232,ORIGIN_AIRPORT_13303,ORIGIN_AIRPORT_13487,ORIGIN_AIRPORT_13930,ORIGIN_AIRPORT_14100,ORIGIN_AIRPORT_14107,ORIGIN_AIRPORT_14679,ORIGIN_AIRPORT_14747,ORIGIN_AIRPORT_14771,ORIGIN_AIRPORT_14869,ORIGIN_AIRPORT_15304,ORIGIN_AIRPORT_ABQ,ORIGIN_AIRPORT_ALB,ORIGIN_AIRPORT_ANC,ORIGIN_AIRPORT_ATL,ORIGIN_AIRPORT_AUS,ORIGIN_AIRPORT_BDL,ORIGIN_AIRPORT_BHM,ORIGIN_AIRPORT_BNA,ORIGIN_AIRPORT_BOI,ORIGIN_AIRPORT_BOS,ORIGIN_AIRPORT_BTR,ORIGIN_AIRPORT_BUF,ORIGIN_AIRPORT_BUR,ORIGIN_AIRPORT_BWI,ORIGIN_AIRPORT_CAE,ORIGIN_AIRPORT_CAK,ORIGIN_AIRPORT_CHS,ORIGIN_AIRPORT_CID,ORIGIN_AIRPORT_CLE,ORIGIN_AIRPORT_CLT,ORIGIN_AIRPORT_CMH,ORIGIN_AIRPORT_COS,ORIGIN_AIRPORT_CVG,ORIGIN_AIRPORT_DAL,ORIGIN_AIRPORT_DAY,ORIGIN_AIRPORT_DCA,ORIGIN_AIRPORT_DEN,ORIGIN_AIRPORT_DFW,ORIGIN_AIRPORT_DSM,ORIGIN_AIRPORT_DTW,ORIGIN_AIRPORT_ELP,ORIGIN_AIRPORT_EWR,ORIGIN_AIRPORT_FAR,ORIGIN_AIRPORT_FAT,ORIGIN_AIRPORT_FLL,ORIGIN_AIRPORT_FSD,ORIGIN_AIRPORT_FWA,ORIGIN_AIRPORT_GEG,ORIGIN_AIRPORT_GRR,ORIGIN_AIRPORT_GSO,ORIGIN_AIRPORT_GSP,ORIGIN_AIRPORT_HNL,ORIGIN_AIRPORT_HOU,ORIGIN_AIRPORT_HPN,ORIGIN_AIRPORT_IAD,ORIGIN_AIRPORT_IAH,ORIGIN_AIRPORT_ICT,ORIGIN_AIRPORT_IND,ORIGIN_AIRPORT_ITO,ORIGIN_AIRPORT_JAN,ORIGIN_AIRPORT_JAX,ORIGIN_AIRPORT_JFK,ORIGIN_AIRPORT_KOA,ORIGIN_AIRPORT_LAS,ORIGIN_AIRPORT_LAX,ORIGIN_AIRPORT_LEX,ORIGIN_AIRPORT_LFT,ORIGIN_AIRPORT_LGA,ORIGIN_AIRPORT_LGB,ORIGIN_AIRPORT_LIH,ORIGIN_AIRPORT_LIT,ORIGIN_AIRPORT_MAF,ORIGIN_AIRPORT_MCI,ORIGIN_AIRPORT_MCO,ORIGIN_AIRPORT_MDW,ORIGIN_AIRPORT_MEM,ORIGIN_AIRPORT_MHT,ORIGIN_AIRPORT_MIA,ORIGIN_AIRPORT_MKE,ORIGIN_AIRPORT_MSN,ORIGIN_AIRPORT_MSP,ORIGIN_AIRPORT_MSY,ORIGIN_AIRPORT_OAK,ORIGIN_AIRPORT_OGG,ORIGIN_AIRPORT_OKC,ORIGIN_AIRPORT_OMA,ORIGIN_AIRPORT_ONT,ORIGIN_AIRPORT_ORD,ORIGIN_AIRPORT_ORF,ORIGIN_AIRPORT_Other,ORIGIN_AIRPORT_PBI,ORIGIN_AIRPORT_PDX,ORIGIN_AIRPORT_PHL,ORIGIN_AIRPORT_PHX,ORIGIN_AIRPORT_PIT,ORIGIN_AIRPORT_PNS,ORIGIN_AIRPORT_PSP,ORIGIN_AIRPORT_PVD,ORIGIN_AIRPORT_RDU,ORIGIN_AIRPORT_RIC,ORIGIN_AIRPORT_RNO,ORIGIN_AIRPORT_ROC,ORIGIN_AIRPORT_RSW,ORIGIN_AIRPORT_SAN,ORIGIN_AIRPORT_SAT,ORIGIN_AIRPORT_SAV,ORIGIN_AIRPORT_SBA,ORIGIN_AIRPORT_SDF,ORIGIN_AIRPORT_SEA,ORIGIN_AIRPORT_SFO,ORIGIN_AIRPORT_SGF,ORIGIN_AIRPORT_SHV,ORIGIN_AIRPORT_SJC,ORIGIN_AIRPORT_SJU,ORIGIN_AIRPORT_SLC,ORIGIN_AIRPORT_SMF,ORIGIN_AIRPORT_SNA,ORIGIN_AIRPORT_STL,ORIGIN_AIRPORT_SYR,ORIGIN_AIRPORT_TPA,ORIGIN_AIRPORT_TUL,ORIGIN_AIRPORT_TUS,ORIGIN_AIRPORT_TYS,ORIGIN_AIRPORT_XNA,DESTINATION_AIRPORT_10397,DESTINATION_AIRPORT_10721,DESTINATION_AIRPORT_10821,DESTINATION_AIRPORT_11057,DESTINATION_AIRPORT_11259,DESTINATION_AIRPORT_11278,DESTINATION_AIRPORT_11292,DESTINATION_AIRPORT_11298,DESTINATION_AIRPORT_11433,DESTINATION_AIRPORT_11618,DESTINATION_AIRPORT_11697,DESTINATION_AIRPORT_12266,DESTINATION_AIRPORT_12478,DESTINATION_AIRPORT_12889,DESTINATION_AIRPORT_12892,DESTINATION_AIRPORT_12953,DESTINATION_AIRPORT_13204,DESTINATION_AIRPORT_13232,DESTINATION_AIRPORT_13303,DESTINATION_AIRPORT_13487,DESTINATION_AIRPORT_13930,DESTINATION_AIRPORT_14100,DESTINATION_AIRPORT_14107,DESTINATION_AIRPORT_14679,DESTINATION_AIRPORT_14747,DESTINATION_AIRPORT_14771,DESTINATION_AIRPORT_14869,DESTINATION_AIRPORT_15304,DESTINATION_AIRPORT_ABQ,DESTINATION_AIRPORT_ALB,DESTINATION_AIRPORT_ANC,DESTINATION_AIRPORT_ATL,DESTINATION_AIRPORT_AUS,DESTINATION_AIRPORT_BDL,DESTINATION_AIRPORT_BHM,DESTINATION_AIRPORT_BNA,DESTINATION_AIRPORT_BOI,DESTINATION_AIRPORT_BOS,DESTINATION_AIRPORT_BTR,DESTINATION_AIRPORT_BUF,DESTINATION_AIRPORT_BUR,DESTINATION_AIRPORT_BWI,DESTINATION_AIRPORT_CAE,DESTINATION_AIRPORT_CAK,DESTINATION_AIRPORT_CHS,DESTINATION_AIRPORT_CID,DESTINATION_AIRPORT_CLE,DESTINATION_AIRPORT_CLT,DESTINATION_AIRPORT_CMH,DESTINATION_AIRPORT_COS,DESTINATION_AIRPORT_CVG,DESTINATION_AIRPORT_DAL,DESTINATION_AIRPORT_DAY,DESTINATION_AIRPORT_DCA,DESTINATION_AIRPORT_DEN,DESTINATION_AIRPORT_DFW,DESTINATION_AIRPORT_DSM,DESTINATION_AIRPORT_DTW,DESTINATION_AIRPORT_ELP,DESTINATION_AIRPORT_EWR,DESTINATION_AIRPORT_FAR,DESTINATION_AIRPORT_FAT,DESTINATION_AIRPORT_FLL,DESTINATION_AIRPORT_FSD,DESTINATION_AIRPORT_FWA,DESTINATION_AIRPORT_GEG,DESTINATION_AIRPORT_GRR,DESTINATION_AIRPORT_GSO,DESTINATION_AIRPORT_GSP,DESTINATION_AIRPORT_HNL,DESTINATION_AIRPORT_HOU,DESTINATION_AIRPORT_HPN,DESTINATION_AIRPORT_IAD,DESTINATION_AIRPORT_IAH,DESTINATION_AIRPORT_ICT,DESTINATION_AIRPORT_IND,DESTINATION_AIRPORT_ITO,DESTINATION_AIRPORT_JAN,DESTINATION_AIRPORT_JAX,DESTINATION_AIRPORT_JFK,DESTINATION_AIRPORT_KOA,DESTINATION_AIRPORT_LAS,DESTINATION_AIRPORT_LAX,DESTINATION_AIRPORT_LEX,DESTINATION_AIRPORT_LFT,DESTINATION_AIRPORT_LGA,DESTINATION_AIRPORT_LGB,DESTINATION_AIRPORT_LIH,DESTINATION_AIRPORT_LIT,DESTINATION_AIRPORT_MAF,DESTINATION_AIRPORT_MCI,DESTINATION_AIRPORT_MCO,DESTINATION_AIRPORT_MDW,DESTINATION_AIRPORT_MEM,DESTINATION_AIRPORT_MHT,DESTINATION_AIRPORT_MIA,DESTINATION_AIRPORT_MKE,DESTINATION_AIRPORT_MSN,DESTINATION_AIRPORT_MSP,DESTINATION_AIRPORT_MSY,DESTINATION_AIRPORT_OAK,DESTINATION_AIRPORT_OGG,DESTINATION_AIRPORT_OKC,DESTINATION_AIRPORT_OMA,DESTINATION_AIRPORT_ONT,DESTINATION_AIRPORT_ORD,DESTINATION_AIRPORT_ORF,DESTINATION_AIRPORT_Other,DESTINATION_AIRPORT_PBI,DESTINATION_AIRPORT_PDX,DESTINATION_AIRPORT_PHL,DESTINATION_AIRPORT_PHX,DESTINATION_AIRPORT_PIT,DESTINATION_AIRPORT_PNS,DESTINATION_AIRPORT_PSP,DESTINATION_AIRPORT_PVD,DESTINATION_AIRPORT_RDU,DESTINATION_AIRPORT_RIC,DESTINATION_AIRPORT_RNO,DESTINATION_AIRPORT_ROC,DESTINATION_AIRPORT_RSW,DESTINATION_AIRPORT_SAN,DESTINATION_AIRPORT_SAT,DESTINATION_AIRPORT_SAV,DESTINATION_AIRPORT_SBA,DESTINATION_AIRPORT_SDF,DESTINATION_AIRPORT_SEA,DESTINATION_AIRPORT_SFO,DESTINATION_AIRPORT_SGF,DESTINATION_AIRPORT_SHV,DESTINATION_AIRPORT_SJC,DESTINATION_AIRPORT_SJU,DESTINATION_AIRPORT_SLC,DESTINATION_AIRPORT_SMF,DESTINATION_AIRPORT_SNA,DESTINATION_AIRPORT_STL,DESTINATION_AIRPORT_SYR,DESTINATION_AIRPORT_TPA,DESTINATION_AIRPORT_TUL,DESTINATION_AIRPORT_TUS,DESTINATION_AIRPORT_TYS,DESTINATION_AIRPORT_XNA
0,-11.0,1,4,5,1448,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-8.0,1,4,10,2330,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-2.0,1,4,20,2296,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-5.0,1,4,20,2342,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-1.0,1,4,25,1448,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Export to csv
# This will take a very long time, if you run into errprs try adding chunksize = 1000000. Total csv file size is 6.8 Gb....

df_ml.to_csv('resources/flights_ml_depart_encoded.csv')

In [11]:
print(df_ml.shape)

(5714008, 303)
