In [23]:
# Generic inputs for most ML tasks
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# This is new
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor

pd.options.display.float_format = '{:,.2f}'.format

# setup interactive notebook mode
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.display import display, HTML

### Fetching flight data

In [24]:
pred_data = pd.read_csv('pred_data\CIS_662 _FINAL_Predictions.csv')

In [25]:
pred_data.head()

Unnamed: 0,DATE,DAY,FLIGHT NUMBER,ORIGIN,DEPARTURE TIME,ARRIVAL TIME,ARRIVAL STATUS,ARRIVAL STATUS_Prev_flight_early,ARRIVAL STATUS_Prev_flight_ontime,ARRIVAL STATUS_Prev_flight_late,dep_order
0,4/19/2024,FRIDAY,UA 1400,ORD,6:52 PM,9:47 PM,,,,,early
1,4/19/2024,FRIDAY,AA 3402,ORD,7:59 PM,10:52 PM,,,,,latter
2,4/19/2024,FRIDAY,B6 116,JFK,1:34 PM,2:51 PM,,,,,early
3,4/19/2024,FRIDAY,DL 5182,JFK,2:55 PM,4:21 PM,,,,,latter
4,4/19/2024,FRIDAY,WN 5285,MCO,11:35 AM,2:20 PM,,,,,early


In [26]:
pred_data.dtypes

DATE                                  object
DAY                                   object
FLIGHT NUMBER                         object
ORIGIN                                object
DEPARTURE TIME                        object
ARRIVAL TIME                          object
ARRIVAL STATUS                       float64
ARRIVAL STATUS_Prev_flight_early     float64
ARRIVAL STATUS_Prev_flight_ontime    float64
ARRIVAL STATUS_Prev_flight_late      float64
dep_order                             object
dtype: object

In [27]:
pred_data['DATE'] = pd.to_datetime( pred_data['DATE'],format ="%m/%d/%Y")
pred_data['DAY'] = pred_data['DATE'].dt.day_of_week.astype('object')
pred_data['Carrier_Code'] = pred_data['FLIGHT NUMBER'].str[0:2].astype('object')
pred_data['DEPARTURE TIME'] = pd.to_datetime(pred_data['DEPARTURE TIME'], format = '%I:%M %p')
pred_data['dep_hour']= pred_data['DEPARTURE TIME'].dt.hour.astype('object')
pred_data['dep_min'] = pred_data['DEPARTURE TIME'].dt.minute.astype('object')
pred_data['ARRIVAL TIME'] = pd.to_datetime(pred_data['ARRIVAL TIME'], format = '%I:%M %p')
pred_data['arr_hour']= pred_data['ARRIVAL TIME'].dt.hour.astype('object')
pred_data['arr_min'] = pred_data['ARRIVAL TIME'].dt.minute.astype('object')
pred_data.rename(columns = {'ORIGIN' : 'Origin_Airport'},inplace =True)
pred_data.head()

Unnamed: 0,DATE,DAY,FLIGHT NUMBER,Origin_Airport,DEPARTURE TIME,ARRIVAL TIME,ARRIVAL STATUS,ARRIVAL STATUS_Prev_flight_early,ARRIVAL STATUS_Prev_flight_ontime,ARRIVAL STATUS_Prev_flight_late,dep_order,Carrier_Code,dep_hour,dep_min,arr_hour,arr_min
0,2024-04-19,4,UA 1400,ORD,1900-01-01 18:52:00,1900-01-01 21:47:00,,,,,early,UA,18,52,21,47
1,2024-04-19,4,AA 3402,ORD,1900-01-01 19:59:00,1900-01-01 22:52:00,,,,,latter,AA,19,59,22,52
2,2024-04-19,4,B6 116,JFK,1900-01-01 13:34:00,1900-01-01 14:51:00,,,,,early,B6,13,34,14,51
3,2024-04-19,4,DL 5182,JFK,1900-01-01 14:55:00,1900-01-01 16:21:00,,,,,latter,DL,14,55,16,21
4,2024-04-19,4,WN 5285,MCO,1900-01-01 11:35:00,1900-01-01 14:20:00,,,,,early,WN,11,35,14,20


In [28]:
pred_data.dtypes

DATE                                 datetime64[ns]
DAY                                          object
FLIGHT NUMBER                                object
Origin_Airport                               object
DEPARTURE TIME                       datetime64[ns]
ARRIVAL TIME                         datetime64[ns]
ARRIVAL STATUS                              float64
ARRIVAL STATUS_Prev_flight_early            float64
ARRIVAL STATUS_Prev_flight_ontime           float64
ARRIVAL STATUS_Prev_flight_late             float64
dep_order                                    object
Carrier_Code                                 object
dep_hour                                     object
dep_min                                      object
arr_hour                                     object
arr_min                                      object
dtype: object

In [30]:
pred_data.head(30)

Unnamed: 0,DATE,DAY,FLIGHT NUMBER,Origin_Airport,DEPARTURE TIME,ARRIVAL TIME,ARRIVAL STATUS,ARRIVAL STATUS_Prev_flight_early,ARRIVAL STATUS_Prev_flight_ontime,ARRIVAL STATUS_Prev_flight_late,dep_order,Carrier_Code,dep_hour,dep_min,arr_hour,arr_min
0,2024-04-19,4,UA 1400,ORD,1900-01-01 18:52:00,1900-01-01 21:47:00,,,,,early,UA,18,52,21,47
1,2024-04-19,4,AA 3402,ORD,1900-01-01 19:59:00,1900-01-01 22:52:00,,,,,latter,AA,19,59,22,52
2,2024-04-19,4,B6 116,JFK,1900-01-01 13:34:00,1900-01-01 14:51:00,,,,,early,B6,13,34,14,51
3,2024-04-19,4,DL 5182,JFK,1900-01-01 14:55:00,1900-01-01 16:21:00,,,,,latter,DL,14,55,16,21
4,2024-04-19,4,WN 5285,MCO,1900-01-01 11:35:00,1900-01-01 14:20:00,,,,,early,WN,11,35,14,20
5,2024-04-19,4,B6 656,MCO,1900-01-01 13:35:00,1900-01-01 16:25:00,,,,,latter,B6,13,35,16,25
6,2024-04-20,5,UA 1400,ORD,1900-01-01 18:52:00,1900-01-01 21:47:00,,,,,early,UA,18,52,21,47
7,2024-04-20,5,AA 3402,ORD,1900-01-01 19:59:00,1900-01-01 22:52:00,,,,,latter,AA,19,59,22,52
8,2024-04-20,5,B6 116,JFK,1900-01-01 13:25:00,1900-01-01 14:41:00,,,,,early,B6,13,25,14,41
9,2024-04-20,5,DL 5182,JFK,1900-01-01 14:55:00,1900-01-01 16:21:00,,,,,latter,DL,14,55,16,21


In [31]:
pred_data['dep_minutes'] = 0
pred_data['dep_minutes'] = pred_data['dep_minutes'].astype('object')
pred_data['dep_hours'] = pred_data['dep_hour'].astype('object')
pred_data['arr_minutes'] = 0
pred_data['arr_minutes'] = pred_data['arr_minutes'].astype('object')
pred_data['arr_hours'] = pred_data['arr_hour'].astype('object')
pred_data.rename(columns ={'DATE':'Date'},inplace=True)
pred_data.head()


Unnamed: 0,Date,DAY,FLIGHT NUMBER,Origin_Airport,DEPARTURE TIME,ARRIVAL TIME,ARRIVAL STATUS,ARRIVAL STATUS_Prev_flight_early,ARRIVAL STATUS_Prev_flight_ontime,ARRIVAL STATUS_Prev_flight_late,dep_order,Carrier_Code,dep_hour,dep_min,arr_hour,arr_min,dep_minutes,dep_hours,arr_minutes,arr_hours
0,2024-04-19,4,UA 1400,ORD,1900-01-01 18:52:00,1900-01-01 21:47:00,,,,,early,UA,18,52,21,47,0,18,0,21
1,2024-04-19,4,AA 3402,ORD,1900-01-01 19:59:00,1900-01-01 22:52:00,,,,,latter,AA,19,59,22,52,0,19,0,22
2,2024-04-19,4,B6 116,JFK,1900-01-01 13:34:00,1900-01-01 14:51:00,,,,,early,B6,13,34,14,51,0,13,0,14
3,2024-04-19,4,DL 5182,JFK,1900-01-01 14:55:00,1900-01-01 16:21:00,,,,,latter,DL,14,55,16,21,0,14,0,16
4,2024-04-19,4,WN 5285,MCO,1900-01-01 11:35:00,1900-01-01 14:20:00,,,,,early,WN,11,35,14,20,0,11,0,14


In [32]:
pred_data['Date'] = pred_data['Date'].dt.strftime('%m/%d/%Y')
pred_data.dtypes
len(pred_data)
pred_data.head(30)

Date                                         object
DAY                                          object
FLIGHT NUMBER                                object
Origin_Airport                               object
DEPARTURE TIME                       datetime64[ns]
ARRIVAL TIME                         datetime64[ns]
ARRIVAL STATUS                              float64
ARRIVAL STATUS_Prev_flight_early            float64
ARRIVAL STATUS_Prev_flight_ontime           float64
ARRIVAL STATUS_Prev_flight_late             float64
dep_order                                    object
Carrier_Code                                 object
dep_hour                                     object
dep_min                                      object
arr_hour                                     object
arr_min                                      object
dep_minutes                                  object
dep_hours                                    object
arr_minutes                                  object
arr_hours   

23

Unnamed: 0,Date,DAY,FLIGHT NUMBER,Origin_Airport,DEPARTURE TIME,ARRIVAL TIME,ARRIVAL STATUS,ARRIVAL STATUS_Prev_flight_early,ARRIVAL STATUS_Prev_flight_ontime,ARRIVAL STATUS_Prev_flight_late,dep_order,Carrier_Code,dep_hour,dep_min,arr_hour,arr_min,dep_minutes,dep_hours,arr_minutes,arr_hours
0,04/19/2024,4,UA 1400,ORD,1900-01-01 18:52:00,1900-01-01 21:47:00,,,,,early,UA,18,52,21,47,0,18,0,21
1,04/19/2024,4,AA 3402,ORD,1900-01-01 19:59:00,1900-01-01 22:52:00,,,,,latter,AA,19,59,22,52,0,19,0,22
2,04/19/2024,4,B6 116,JFK,1900-01-01 13:34:00,1900-01-01 14:51:00,,,,,early,B6,13,34,14,51,0,13,0,14
3,04/19/2024,4,DL 5182,JFK,1900-01-01 14:55:00,1900-01-01 16:21:00,,,,,latter,DL,14,55,16,21,0,14,0,16
4,04/19/2024,4,WN 5285,MCO,1900-01-01 11:35:00,1900-01-01 14:20:00,,,,,early,WN,11,35,14,20,0,11,0,14
5,04/19/2024,4,B6 656,MCO,1900-01-01 13:35:00,1900-01-01 16:25:00,,,,,latter,B6,13,35,16,25,0,13,0,16
6,04/20/2024,5,UA 1400,ORD,1900-01-01 18:52:00,1900-01-01 21:47:00,,,,,early,UA,18,52,21,47,0,18,0,21
7,04/20/2024,5,AA 3402,ORD,1900-01-01 19:59:00,1900-01-01 22:52:00,,,,,latter,AA,19,59,22,52,0,19,0,22
8,04/20/2024,5,B6 116,JFK,1900-01-01 13:25:00,1900-01-01 14:41:00,,,,,early,B6,13,25,14,41,0,13,0,14
9,04/20/2024,5,DL 5182,JFK,1900-01-01 14:55:00,1900-01-01 16:21:00,,,,,latter,DL,14,55,16,21,0,14,0,16


### Fetching weather data and merging with flight data

In [33]:
# Read and process weather data files for each airport
jfk_weather_data = pd.read_csv('weather_data/JFK_weather_data_forecast_processed.csv')
syr_weather_data = pd.read_csv('weather_data/SYR_weather_data_forecast_processed.csv')
ord_weather_data = pd.read_csv('weather_data/ORD_weather_data_forecast_processed.csv')
mco_weather_data = pd.read_csv('weather_data/MCO_weather_data_forecast_processed.csv')

# Combine weather data for all airports
weather_dfs = [jfk_weather_data, ord_weather_data, mco_weather_data]
weather_data = pd.concat(weather_dfs, axis=0)
#weather_data['dep_minutes'] = weather_data['dep_minutes'].astype('object')
weather_data['dep_hours'] = weather_data['dep_hours'].astype('object')
#syr_weather_data['arr_minutes'] = syr_weather_data['arr_minutes'].astype('object')
syr_weather_data['arr_hours'] = syr_weather_data['arr_hours'].astype('object')
weather_data.head(50)
len(weather_data)
len(syr_weather_data)

# Define merging logic based on airport code
pred_data = pd.merge(pred_data, syr_weather_data, how='left', on=['Date', 'arr_hours'])
len(pred_data)
pred_data.to_csv("first.csv")

pred_data = pd.merge(pred_data, weather_data, how='left', on=['Origin_Airport', 'Date', 'dep_hours'])
pred_data.to_csv("sec.csv")
len(pred_data)


#weather_data.head()


Unnamed: 0,dep_clouds,dep_clouds_hi,dep_clouds_low,dep_clouds_mid,dep_dewpt,dep_ozone,dep_pop,dep_precip,dep_pres,dep_rh,...,dep_weather.description,dep_weather.code,dep_wind_cdir,dep_wind_cdir_full,dep_wind_dir,dep_wind_gust_spd,dep_wind_spd,Date,dep_hours,Origin_Airport
0,92,0,100,0,4.3,382.5,0,0.0,1019.5,74,...,Overcast clouds,804,NE,northeast,50,9.1,6.0,04/18/2024,18,JFK
1,95,0,100,0,4.5,375.8,0,0.0,1020.5,77,...,Overcast clouds,804,NE,northeast,50,7.7,5.2,04/18/2024,19,JFK
2,95,0,100,0,3.4,369.5,0,0.0,1021.0,72,...,Overcast clouds,804,NE,northeast,50,7.6,5.2,04/18/2024,20,JFK
3,83,48,100,0,3.8,365.3,0,0.0,1020.5,75,...,Overcast clouds,804,NE,northeast,50,7.6,5.2,04/18/2024,21,JFK
4,76,87,99,0,3.8,362.5,0,0.0,1020.5,74,...,Overcast clouds,804,ENE,east-northeast,60,6.8,4.8,04/18/2024,22,JFK
5,80,100,100,0,3.3,354.8,0,0.0,1021.0,72,...,Overcast clouds,804,ENE,east-northeast,60,6.8,4.4,04/18/2024,23,JFK
6,73,100,98,0,3.7,354.8,0,0.0,1020.0,74,...,Overcast clouds,804,ENE,east-northeast,60,6.4,4.4,04/19/2024,0,JFK
7,67,54,34,10,3.6,351.5,0,0.0,1020.0,75,...,Broken clouds,803,ENE,east-northeast,60,6.0,4.0,04/19/2024,1,JFK
8,61,35,12,100,3.8,347.5,0,0.0,1020.5,76,...,Broken clouds,803,ENE,east-northeast,60,6.0,4.0,04/19/2024,2,JFK
9,57,1,5,73,3.6,346.0,0,0.0,1020.0,76,...,Broken clouds,803,ENE,east-northeast,60,6.0,4.0,04/19/2024,3,JFK


504

168

23

23

In [34]:
pred_data.to_csv('pred_mid.csv')

In [35]:
pred_data['arr_day'] = pred_data['DAY']
pred_data['dep_day'] = pred_data['DAY']
pred_data.drop(columns = ['DAY','DEPARTURE TIME','ARRIVAL TIME','ARRIVAL STATUS_Prev_flight_early',
                         'ARRIVAL STATUS_Prev_flight_ontime','ARRIVAL STATUS_Prev_flight_late',
                         'FLIGHT NUMBER','Date','ARRIVAL STATUS'],inplace = True)



In [36]:
pred_data.dtypes

Origin_Airport              object
dep_order                   object
Carrier_Code                object
dep_hour                    object
dep_min                     object
arr_hour                    object
arr_min                     object
dep_minutes                 object
dep_hours                   object
arr_minutes                 object
arr_hours                   object
arr_clouds                   int64
arr_clouds_hi                int64
arr_clouds_low               int64
arr_clouds_mid               int64
arr_dewpt                  float64
arr_ozone                  float64
arr_pop                      int64
arr_precip                 float64
arr_pres                   float64
arr_rh                       int64
arr_snow                     int64
arr_snow_depth               int64
arr_temp                   float64
arr_vis                    float64
arr_weather.description     object
arr_weather.code             int64
arr_wind_cdir               object
arr_wind_cdir_full  

In [37]:
pred_data['dep_hour'] = pd.Categorical(pred_data['dep_hour'], categories=[i for i in range(24)])
pred_data['dep_day'] = pd.Categorical(pred_data['dep_day'], categories=[i for i in range(7)])
pred_data['dep_min'] = pd.Categorical(pred_data['dep_min'], categories=[i for i in range(60)])
pred_data['arr_hour'] = pd.Categorical(pred_data['arr_hour'], categories=[i for i in range(24)])
pred_data['arr_day'] = pd.Categorical(pred_data['arr_day'], categories=[i for i in range(7)])
pred_data['arr_min'] = pd.Categorical(pred_data['arr_min'], categories=[i for i in range(60)])
pred_data['Carrier_Code'] = pd.Categorical(pred_data['Carrier_Code'], categories=['AA', 'UA', 'DL', 'B6', 'WN'])
pred_data['Origin_Airport'] = pd.Categorical(pred_data['Origin_Airport'], categories=['ORD', 'JFK', 'MCO'])
pred_data=pred_data[['dep_hour', 'dep_day', 'Origin_Airport', 'arr_hour', 'arr_day',
       'dep_min', 'arr_min', 'arr_clouds', 'arr_dewpt',
       'arr_precip', 'arr_pres', 'arr_rh', 'arr_snow', 'arr_temp', 'arr_vis',
       'arr_weather.code', 'arr_wind_dir', 'arr_wind_gust_spd', 'arr_wind_spd',
       'dep_clouds', 'dep_dewpt', 'dep_precip', 'dep_pres', 'dep_rh',
       'dep_snow', 'dep_temp', 'dep_vis', 'dep_weather.code', 'dep_wind_dir',
       'dep_wind_gust_spd', 'dep_wind_spd','dep_order']]
pred_data.head()
pred_data.columns

Unnamed: 0,dep_hour,dep_day,Origin_Airport,arr_hour,arr_day,dep_min,arr_min,arr_clouds,arr_dewpt,arr_precip,...,dep_pres,dep_rh,dep_snow,dep_temp,dep_vis,dep_weather.code,dep_wind_dir,dep_wind_gust_spd,dep_wind_spd,dep_order
0,18,4,ORD,21,4,52,47,69,8.0,0.5,...,996.5,35,0,12.7,24.0,801,280,12.5,8.4,early
1,19,4,ORD,22,4,59,52,66,7.2,0.25,...,996.5,37,0,12.1,24.0,803,280,11.1,7.6,latter
2,13,4,JFK,14,4,34,51,84,7.2,0.5,...,1019.5,58,0,12.1,24.0,804,110,7.2,5.2,early
3,14,4,JFK,16,4,55,21,86,7.9,0.76,...,1019.0,59,0,12.3,24.0,804,120,7.3,5.6,latter
4,11,4,MCO,14,4,35,20,84,7.2,0.5,...,1015.0,58,0,28.1,24.0,801,280,2.4,2.0,early


Index(['dep_hour', 'dep_day', 'Origin_Airport', 'arr_hour', 'arr_day',
       'dep_min', 'arr_min', 'arr_clouds', 'arr_dewpt', 'arr_precip',
       'arr_pres', 'arr_rh', 'arr_snow', 'arr_temp', 'arr_vis',
       'arr_weather.code', 'arr_wind_dir', 'arr_wind_gust_spd', 'arr_wind_spd',
       'dep_clouds', 'dep_dewpt', 'dep_precip', 'dep_pres', 'dep_rh',
       'dep_snow', 'dep_temp', 'dep_vis', 'dep_weather.code', 'dep_wind_dir',
       'dep_wind_gust_spd', 'dep_wind_spd', 'dep_order'],
      dtype='object')

### Spliting data into latter and early flights

In [38]:
pred_data1 = pred_data[pred_data['dep_order'] == 'early']
pred_data2 = pred_data[pred_data['dep_order'] == 'latter']


In [39]:
pred_data1.drop(columns=['dep_order'],inplace = True)
pred_data2.drop(columns=['dep_order'],inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_data1.drop(columns=['dep_order'],inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_data2.drop(columns=['dep_order'],inplace = True)


In [40]:
pred_data1.head(22)

Unnamed: 0,dep_hour,dep_day,Origin_Airport,arr_hour,arr_day,dep_min,arr_min,arr_clouds,arr_dewpt,arr_precip,...,dep_precip,dep_pres,dep_rh,dep_snow,dep_temp,dep_vis,dep_weather.code,dep_wind_dir,dep_wind_gust_spd,dep_wind_spd
0,18,4,ORD,21,4,52,47,69,8.0,0.5,...,0.0,996.5,35,0,12.7,24.0,801,280,12.5,8.4
2,13,4,JFK,14,4,34,51,84,7.2,0.5,...,0.0,1019.5,58,0,12.1,24.0,804,110,7.2,5.2
4,11,4,MCO,14,4,35,20,84,7.2,0.5,...,0.0,1015.0,58,0,28.1,24.0,801,280,2.4,2.0
6,18,5,ORD,21,5,52,47,23,-2.4,0.0,...,0.0,994.0,40,0,9.7,24.13,803,296,6.66,4.4
8,13,5,JFK,14,5,25,41,71,-1.2,0.0,...,0.0,1014.5,46,0,15.8,24.0,803,283,5.73,4.13
10,13,5,MCO,16,5,35,25,58,-1.6,0.0,...,0.0,1014.0,42,0,30.6,24.0,801,270,4.8,3.46
11,18,6,ORD,21,6,52,47,9,-2.9,0.0,...,0.0,990.5,35,0,13.7,24.13,801,273,9.03,6.0
13,13,6,JFK,14,6,35,51,44,-3.1,0.0,...,0.0,1013.5,38,0,12.8,24.13,803,256,7.93,5.2
15,11,6,MCO,13,6,5,50,48,-2.8,0.0,...,0.0,1013.0,60,0,27.2,24.13,802,230,5.2,3.6
17,18,0,ORD,21,0,52,47,10,-4.7,0.0,...,0.0,987.0,36,0,16.0,24.13,804,203,8.68,5.77


In [41]:
pred_data2.head(20)

Unnamed: 0,dep_hour,dep_day,Origin_Airport,arr_hour,arr_day,dep_min,arr_min,arr_clouds,arr_dewpt,arr_precip,...,dep_precip,dep_pres,dep_rh,dep_snow,dep_temp,dep_vis,dep_weather.code,dep_wind_dir,dep_wind_gust_spd,dep_wind_spd
1,19,4,ORD,22,4,59,52,66,7.2,0.25,...,0.0,996.5,37,0,12.1,24.0,803,280,11.1,7.6
3,14,4,JFK,16,4,55,21,86,7.9,0.76,...,0.0,1019.0,59,0,12.3,24.0,804,120,7.3,5.6
5,13,4,MCO,16,4,35,25,86,7.9,0.76,...,0.0,1013.5,45,0,30.9,24.0,801,270,3.2,2.4
7,19,5,ORD,22,5,59,52,23,-2.4,0.0,...,0.0,994.5,41,0,9.5,24.13,804,300,6.0,4.0
9,14,5,JFK,16,5,55,21,58,-1.6,0.0,...,0.0,1014.0,42,0,16.5,24.0,803,270,6.0,4.4
12,19,6,ORD,22,6,59,52,9,-2.9,0.0,...,0.0,990.5,36,0,13.4,24.13,801,270,9.1,6.0
14,14,6,JFK,16,6,55,21,32,-3.2,0.0,...,0.0,1012.5,37,0,13.4,24.13,803,250,8.7,5.6
16,13,6,MCO,16,6,35,25,32,-3.2,0.0,...,0.0,1012.0,49,0,30.0,24.13,802,236,8.33,5.46
18,19,0,ORD,22,0,59,52,13,-4.3,0.0,...,0.0,986.5,36,0,15.9,24.13,804,200,8.8,5.87
20,14,0,JFK,16,0,55,21,9,-4.6,0.0,...,0.0,1012.0,36,0,15.7,24.13,801,270,6.1,4.1


In [42]:
pred_data1.to_csv('pred_data1.csv')

In [43]:
len(pred_data1)

12

In [44]:
pred_data1.head(20)

Unnamed: 0,dep_hour,dep_day,Origin_Airport,arr_hour,arr_day,dep_min,arr_min,arr_clouds,arr_dewpt,arr_precip,...,dep_precip,dep_pres,dep_rh,dep_snow,dep_temp,dep_vis,dep_weather.code,dep_wind_dir,dep_wind_gust_spd,dep_wind_spd
0,18,4,ORD,21,4,52,47,69,8.0,0.5,...,0.0,996.5,35,0,12.7,24.0,801,280,12.5,8.4
2,13,4,JFK,14,4,34,51,84,7.2,0.5,...,0.0,1019.5,58,0,12.1,24.0,804,110,7.2,5.2
4,11,4,MCO,14,4,35,20,84,7.2,0.5,...,0.0,1015.0,58,0,28.1,24.0,801,280,2.4,2.0
6,18,5,ORD,21,5,52,47,23,-2.4,0.0,...,0.0,994.0,40,0,9.7,24.13,803,296,6.66,4.4
8,13,5,JFK,14,5,25,41,71,-1.2,0.0,...,0.0,1014.5,46,0,15.8,24.0,803,283,5.73,4.13
10,13,5,MCO,16,5,35,25,58,-1.6,0.0,...,0.0,1014.0,42,0,30.6,24.0,801,270,4.8,3.46
11,18,6,ORD,21,6,52,47,9,-2.9,0.0,...,0.0,990.5,35,0,13.7,24.13,801,273,9.03,6.0
13,13,6,JFK,14,6,35,51,44,-3.1,0.0,...,0.0,1013.5,38,0,12.8,24.13,803,256,7.93,5.2
15,11,6,MCO,13,6,5,50,48,-2.8,0.0,...,0.0,1013.0,60,0,27.2,24.13,802,230,5.2,3.6
17,18,0,ORD,21,0,52,47,10,-4.7,0.0,...,0.0,987.0,36,0,16.0,24.13,804,203,8.68,5.77


In [45]:
pred_data2.to_csv('pred_data2.csv')

In [46]:
len(pred_data2)

11