In [112]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
import os

from sklearn.ensemble import RandomForestRegressor

#Evaluation:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
import ast
from geopy import distance
from sklearn.model_selection import RandomizedSearchCV
import datetime as dt

import utils
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [94]:
def process_fixes(df):
    df['decoded_fixes'] = df['decoded_fixes'].apply(lambda x: ast.literal_eval(x))
    
    df.loc[:, 'lat_start'] = df['decoded_fixes'].map(lambda x: x[0][0])
    df.loc[:, 'lng_start'] = df['decoded_fixes'].map(lambda x: x[0][1])
    
    df.loc[:, 'lat_end'] = df['decoded_fixes'].map(lambda x: x[-1][0])
    df.loc[:, 'lng_end'] = df['decoded_fixes'].map(lambda x: x[-1][1])
    
    df.drop(columns=['decoded_fixes'], inplace=True)

In [122]:
df_train = pd.read_csv('data/flight_plans_train.csv')
df_train.drop(columns=['id', 'departure_airport', 'arrival_airport', 'fixes'], inplace=True)
df_train['Distance'] = df_train.apply(lambda x: utils.dist_in_miles_from_spherical_path(ast.literal_eval(x['decoded_fixes'])), axis=1)
process_fixes(df_train)
df_train['DistanceStraight'] = df_train.apply(lambda x: distance.distance((x['lat_start'], x['lng_start']), (x['lat_end'], x['lng_end'])).m, axis=1)
df_train['DsitancesProduct'] = df_train['Distance'] * df_train['DistanceStraight']
df_train['TimeFromDistanceStraight'] = df_train['DistanceStraight'] / df_train['requested_airspeed']
df_train['TimeFromDistance'] = df_train['Distance'] / df_train['requested_airspeed']

In [123]:
def get_hour(row):
    row['departure_time'] = dt.datetime.strptime(row['departure_time'][: -4], "%Y-%m-%d %H:%M:%S")
    minutes = row['departure_time'].minute
    hour = row['departure_time'].hour
    
    if minutes >= 30:
        row['departure_time'] = (hour + 1) % 24
    else:
        row['departure_time'] = hour
        
    return row

In [124]:
df_train = df_train.apply(get_hour, axis=1)
one_hot = pd.get_dummies(df_train['departure_time'])
df_train.drop(columns=['departure_time'], inplace=True)
df_train = pd.concat([df_train, one_hot], axis=1, sort=False)

In [96]:
df_train.head()

Unnamed: 0,assigned_altitude,requested_airspeed,Distance,lat_start,lng_start,lat_end,lng_end,DistanceStraight,DsitancesProduct,TimeFromDistanceStraight,TimeFromDistance
0,29000,469.0,351.472012,44.881972,-93.221778,41.974522,-87.906597,537924.0,189065200.0,1146.959569,0.749407
1,34000,461.0,1762.13059,41.974522,-87.906597,33.942494,-118.40805,2807409.0,4947021000.0,6089.824493,3.822409
2,31000,438.0,571.232884,46.920639,-96.81575,41.974522,-87.906597,896267.7,511977600.0,2046.273249,1.304185
3,32000,452.0,541.254583,41.974522,-87.906597,36.281581,-94.307767,839607.4,454441400.0,1857.538561,1.197466
4,32000,467.0,465.057792,41.974522,-87.906597,39.297611,-94.713889,647791.3,301260400.0,1387.13333,0.995841


In [125]:
    wheathers = pd.read_csv('weather_features.csv')
    df_train = pd.concat([df_train, wheathers], axis=1, sort=False)
    df_train = df_train.replace(-999, 0)
    #df_train = df_train.fillna(0.0)

In [98]:
df_train.head()

Unnamed: 0,assigned_altitude,requested_airspeed,Distance,lat_start,lng_start,lat_end,lng_end,DistanceStraight,DsitancesProduct,TimeFromDistanceStraight,TimeFromDistance,cloud_echo_tops_0_0,vertically_integrated_liquid_water_0_0,flight_level_25000_u_0_0,flight_level_25000_v_0_0,flight_level_30000_u_0_0,flight_level_30000_v_0_0,flight_level_50000_u_0_0,flight_level_50000_v_0_0,cloud_echo_tops_1_0,vertically_integrated_liquid_water_1_0,flight_level_25000_u_1_0,flight_level_25000_v_1_0,flight_level_30000_u_1_0,flight_level_30000_v_1_0,flight_level_50000_u_1_0,flight_level_50000_v_1_0,cloud_echo_tops_2_0,vertically_integrated_liquid_water_2_0,flight_level_25000_u_2_0,flight_level_25000_v_2_0,flight_level_30000_u_2_0,flight_level_30000_v_2_0,flight_level_50000_u_2_0,flight_level_50000_v_2_0,cloud_echo_tops_3_0,vertically_integrated_liquid_water_3_0,flight_level_25000_u_3_0,flight_level_25000_v_3_0,flight_level_30000_u_3_0,flight_level_30000_v_3_0,flight_level_50000_u_3_0,flight_level_50000_v_3_0,cloud_echo_tops_4_0,vertically_integrated_liquid_water_4_0,flight_level_25000_u_4_0,flight_level_25000_v_4_0,flight_level_30000_u_4_0,flight_level_30000_v_4_0,flight_level_50000_u_4_0,flight_level_50000_v_4_0,cloud_echo_tops_0_1,vertically_integrated_liquid_water_0_1,flight_level_25000_u_0_1,flight_level_25000_v_0_1,flight_level_30000_u_0_1,flight_level_30000_v_0_1,flight_level_50000_u_0_1,flight_level_50000_v_0_1,cloud_echo_tops_1_1,vertically_integrated_liquid_water_1_1,flight_level_25000_u_1_1,flight_level_25000_v_1_1,flight_level_30000_u_1_1,flight_level_30000_v_1_1,flight_level_50000_u_1_1,flight_level_50000_v_1_1,cloud_echo_tops_2_1,vertically_integrated_liquid_water_2_1,flight_level_25000_u_2_1,flight_level_25000_v_2_1,flight_level_30000_u_2_1,flight_level_30000_v_2_1,flight_level_50000_u_2_1,flight_level_50000_v_2_1,cloud_echo_tops_3_1,vertically_integrated_liquid_water_3_1,flight_level_25000_u_3_1,flight_level_25000_v_3_1,flight_level_30000_u_3_1,flight_level_30000_v_3_1,flight_level_50000_u_3_1,flight_level_50000_v_3_1,cloud_echo_tops_4_1,vertically_integrated_liquid_water_4_1,flight_level_25000_u_4_1,flight_level_25000_v_4_1,flight_level_30000_u_4_1,flight_level_30000_v_4_1,flight_level_50000_u_4_1,flight_level_50000_v_4_1,cloud_echo_tops_0_2,vertically_integrated_liquid_water_0_2,flight_level_25000_u_0_2,flight_level_25000_v_0_2,flight_level_30000_u_0_2,flight_level_30000_v_0_2,flight_level_50000_u_0_2,flight_level_50000_v_0_2,cloud_echo_tops_1_2,vertically_integrated_liquid_water_1_2,flight_level_25000_u_1_2,flight_level_25000_v_1_2,flight_level_30000_u_1_2,flight_level_30000_v_1_2,flight_level_50000_u_1_2,flight_level_50000_v_1_2,cloud_echo_tops_2_2,vertically_integrated_liquid_water_2_2,flight_level_25000_u_2_2,flight_level_25000_v_2_2,flight_level_30000_u_2_2,flight_level_30000_v_2_2,flight_level_50000_u_2_2,flight_level_50000_v_2_2,cloud_echo_tops_3_2,vertically_integrated_liquid_water_3_2,flight_level_25000_u_3_2,flight_level_25000_v_3_2,flight_level_30000_u_3_2,flight_level_30000_v_3_2,flight_level_50000_u_3_2,flight_level_50000_v_3_2,cloud_echo_tops_4_2,vertically_integrated_liquid_water_4_2,flight_level_25000_u_4_2,flight_level_25000_v_4_2,flight_level_30000_u_4_2,flight_level_30000_v_4_2,flight_level_50000_u_4_2,flight_level_50000_v_4_2,cloud_echo_tops_0_3,vertically_integrated_liquid_water_0_3,flight_level_25000_u_0_3,flight_level_25000_v_0_3,flight_level_30000_u_0_3,flight_level_30000_v_0_3,flight_level_50000_u_0_3,flight_level_50000_v_0_3,cloud_echo_tops_1_3,vertically_integrated_liquid_water_1_3,flight_level_25000_u_1_3,flight_level_25000_v_1_3,flight_level_30000_u_1_3,flight_level_30000_v_1_3,flight_level_50000_u_1_3,flight_level_50000_v_1_3,cloud_echo_tops_2_3,vertically_integrated_liquid_water_2_3,flight_level_25000_u_2_3,flight_level_25000_v_2_3,flight_level_30000_u_2_3,flight_level_30000_v_2_3,flight_level_50000_u_2_3,flight_level_50000_v_2_3,cloud_echo_tops_3_3,vertically_integrated_liquid_water_3_3,flight_level_25000_u_3_3,flight_level_25000_v_3_3,flight_level_30000_u_3_3,flight_level_30000_v_3_3,flight_level_50000_u_3_3,flight_level_50000_v_3_3,cloud_echo_tops_4_3,vertically_integrated_liquid_water_4_3,flight_level_25000_u_4_3,flight_level_25000_v_4_3,flight_level_30000_u_4_3,flight_level_30000_v_4_3,flight_level_50000_u_4_3,flight_level_50000_v_4_3,cloud_echo_tops_0_4,vertically_integrated_liquid_water_0_4,flight_level_25000_u_0_4,flight_level_25000_v_0_4,flight_level_30000_u_0_4,flight_level_30000_v_0_4,flight_level_50000_u_0_4,flight_level_50000_v_0_4,cloud_echo_tops_1_4,vertically_integrated_liquid_water_1_4,flight_level_25000_u_1_4,flight_level_25000_v_1_4,flight_level_30000_u_1_4,flight_level_30000_v_1_4,flight_level_50000_u_1_4,flight_level_50000_v_1_4,cloud_echo_tops_2_4,vertically_integrated_liquid_water_2_4,flight_level_25000_u_2_4,flight_level_25000_v_2_4,flight_level_30000_u_2_4,flight_level_30000_v_2_4,flight_level_50000_u_2_4,flight_level_50000_v_2_4,cloud_echo_tops_3_4,vertically_integrated_liquid_water_3_4,flight_level_25000_u_3_4,flight_level_25000_v_3_4,flight_level_30000_u_3_4,flight_level_30000_v_3_4,flight_level_50000_u_3_4,flight_level_50000_v_3_4,cloud_echo_tops_4_4,vertically_integrated_liquid_water_4_4,flight_level_25000_u_4_4,flight_level_25000_v_4_4,flight_level_30000_u_4_4,flight_level_30000_v_4_4,flight_level_50000_u_4_4,flight_level_50000_v_4_4,cloud_echo_tops_0_5,vertically_integrated_liquid_water_0_5,flight_level_25000_u_0_5,flight_level_25000_v_0_5,flight_level_30000_u_0_5,flight_level_30000_v_0_5,flight_level_50000_u_0_5,flight_level_50000_v_0_5,cloud_echo_tops_1_5,vertically_integrated_liquid_water_1_5,flight_level_25000_u_1_5,flight_level_25000_v_1_5,flight_level_30000_u_1_5,flight_level_30000_v_1_5,flight_level_50000_u_1_5,flight_level_50000_v_1_5,cloud_echo_tops_2_5,vertically_integrated_liquid_water_2_5,flight_level_25000_u_2_5,flight_level_25000_v_2_5,flight_level_30000_u_2_5,flight_level_30000_v_2_5,flight_level_50000_u_2_5,flight_level_50000_v_2_5,cloud_echo_tops_3_5,vertically_integrated_liquid_water_3_5,flight_level_25000_u_3_5,flight_level_25000_v_3_5,flight_level_30000_u_3_5,flight_level_30000_v_3_5,flight_level_50000_u_3_5,flight_level_50000_v_3_5,cloud_echo_tops_4_5,vertically_integrated_liquid_water_4_5,flight_level_25000_u_4_5,flight_level_25000_v_4_5,flight_level_30000_u_4_5,flight_level_30000_v_4_5,flight_level_50000_u_4_5,flight_level_50000_v_4_5
0,29000,469.0,351.472012,44.881972,-93.221778,41.974522,-87.906597,537924.0,189065200.0,1146.959569,0.749407,0.0,0.0,36.700581,-21.120432,33.406174,-17.554939,21.383408,-10.993797,0.0,0.0,36.198544,-19.8634,32.810867,-16.318703,21.006134,-10.74353,0.0,0.0,37.043873,-18.680557,33.794468,-16.461224,20.665981,-10.98576,0.0,0.0,37.035473,-17.69508,34.72393,-16.434917,20.351036,-10.533554,0.0,0.0,37.169048,-16.901413,35.269001,-17.444832,19.801605,-9.648087,0.0,0.004,33.075581,-24.495432,29.406174,-20.179939,20.070908,-11.056297,0.0,0.0,34.636044,-21.3009,31.248367,-19.131203,19.068634,-9.99353,0.0,0.0,35.918873,-19.368057,33.231968,-18.336224,18.853481,-10.04826,0.0,0.0,37.035473,-17.75758,35.09893,-18.122417,18.538536,-10.408554,0.0,0.0,36.669048,-16.588913,35.644001,-18.507332,16.926605,-10.023087,0.0,0.0,29.888081,-24.057932,26.531174,-19.617439,19.633408,-12.493797,0.0,0.012,31.636044,-22.1759,28.123367,-19.193703,19.193634,-10.93103,0.0,0.0,32.793873,-19.993057,29.856968,-18.461224,18.353481,-10.48576,0.0,0.0,35.597973,-17.32008,32.47393,-19.372417,17.351036,-10.783554,0.0,0.0,38.106548,-15.588913,32.144001,-21.382332,16.989105,-8.585587,0.0,0.0,29.700581,-24.870432,26.656174,-18.429939,19.195908,-12.306297,0.0,0.0,30.011044,-24.4259,27.435867,-19.506203,19.131134,-12.99353,0.0,0.0,31.856373,-22.305557,28.481968,-19.461224,18.415981,-11.61076,0.0,0.0,33.785473,-20.19508,30.59893,-18.559917,17.663536,-10.346054,0.0,0.0,35.419048,-18.213913,31.644001,-19.194832,16.801605,-10.398087,0.0,0.0,29.763081,-22.932932,26.656174,-17.179939,19.195908,-9.431297,0.0,0.0,29.136044,-24.6759,26.560867,-17.631203,18.631134,-11.05603,0.0,0.0,29.043873,-23.493057,26.544468,-18.836224,17.728481,-11.54826,0.0,0.0,31.097973,-21.25758,27.84893,-19.059917,17.663536,-10.971054,0.0,0.0,32.794048,-19.463913,29.331501,-18.194832,17.551605,-10.085587,0.0,0.0,28.888081,-22.307932,25.718674,-14.867439,19.820908,-8.743797,0.0,0.0,28.886044,-22.8634,26.248367,-15.943703,19.506134,-8.30603,0.0,0.0,27.981373,-23.680557,25.419468,-16.211224,18.290981,-9.17326,0.0,0.0,28.160473,-22.13258,25.41143,-17.184917,17.226036,-9.721054,0.0,0.0,30.481548,-19.713913,26.644001,-17.319832,16.739105,-9.460587
1,34000,461.0,1762.13059,41.974522,-87.906597,33.942494,-118.40805,2807409.0,4947021000.0,6089.824493,3.822409,0.0,0.0,28.848738,-22.93425,26.412453,-16.836464,18.347626,-8.622734,0.0,0.0,28.899439,-22.153114,26.894402,-16.69688,17.10306,-7.800468,0.0,0.0,29.224174,-19.365187,26.933887,-15.834385,16.86721,-8.438286,0.0,0.0,29.965363,-17.575111,27.185909,-15.901188,16.920479,-8.703072,0.0,0.0,30.95224,-17.193932,27.077488,-16.565845,17.045975,-9.107807,0.0,0.0,16.848738,-16.49675,12.162453,-14.461464,10.910126,-9.247734,0.0,0.0,17.086939,-10.215614,14.081902,-12.75938,11.85306,-8.175468,0.0,0.0,17.099174,-11.802687,16.808887,-5.209385,12.17971,-7.688286,0.0,0.0,17.027863,-9.387611,16.560909,-8.026188,13.545479,-6.328072,0.0,0.0,19.88974,-9.006432,17.952488,-8.128345,11.983475,-7.482807,0.0,0.0,6.911238,-6.93425,5.537453,-1.023964,1.285126,-7.935234,0.0,0.0,6.086939,-7.153114,5.394402,-1.19688,1.54056,-7.987968,0.0,0.0,6.099174,-6.927687,5.433887,-1.959385,1.42971,-7.000786,0.0,0.0,5.902863,-6.700111,4.998409,-1.963688,0.982979,-6.578072,0.0,0.0,5.38974,-6.506432,5.139988,-1.128345,1.295975,-6.982807,0.0,0.0,3.348738,-7.12175,3.849953,-6.086464,0.785126,-0.372734,0.0,0.0,2.086939,-7.340614,1.894402,-5.94688,0.60306,-1.425468,0.0,0.0,1.161674,-7.365187,0.683887,-5.646885,0.99221,-1.625786,0.0,0.0,1.402863,-7.075111,-0.189091,-5.713688,0.795479,-2.203072,0.0,0.0,1.88974,-6.193932,-0.422512,-4.940845,0.045975,-2.357807,0.0,0.0,3.473738,0.25325,2.849953,2.788536,-7.027374,12.377266,0.0,0.0,4.086939,0.596886,1.894402,2.92812,-7.27194,11.762032,0.0,0.0,6.474174,0.759813,1.183887,4.165615,-7.44529,11.499214,0.0,0.0,5.840363,0.862389,1.060909,5.036312,-6.579521,10.671928,0.0,0.0,5.13974,1.618568,1.639988,6.496655,-6.266525,9.454693,0.0,0.0,9.411238,10.25325,3.162453,10.663536,-1.339874,1.314766,0.0,0.0,8.211939,10.034386,2.706902,10.49062,-1.52194,1.074532,0.0,0.0,8.911674,9.634813,3.371387,10.165615,-2.88279,0.624214,0.0,0.0,8.777863,9.987389,4.248409,10.661312,-3.829521,0.234428,0.0,0.0,7.95224,10.243568,4.577488,10.996655,-3.766525,0.642193
2,31000,438.0,571.232884,46.920639,-96.81575,41.974522,-87.906597,896267.7,511977600.0,2046.273249,1.304185,0.0,0.0,21.350555,-28.169518,22.242762,-24.508022,20.505642,-17.929863,0.0,0.0,23.488636,-28.590858,23.381252,-28.663582,19.459661,-19.465168,0.0,0.0,24.984402,-28.095795,24.941517,-29.106495,20.627991,-19.1842,0.0,0.012,25.785969,-27.84547,25.407738,-28.794876,22.356674,-17.808037,0.0,0.0,27.265701,-27.375549,27.092598,-29.581364,22.992924,-18.801111,0.0,0.0,25.913055,-15.482018,30.992762,-9.320522,28.068142,-9.679863,0.0,0.0,24.051136,-21.215858,26.506252,-17.476082,24.147161,-12.402668,0.0,0.0,23.796902,-24.783295,24.066517,-22.481495,21.065491,-13.6842,0.0,0.0,24.660969,-24.65797,24.720238,-24.919876,20.606674,-15.745537,0.0,0.0,25.953201,-25.000549,25.842598,-24.331364,21.492924,-15.551111,0.0,0.0,45.538055,4.455482,46.80526,10.054478,38.505642,-6.679863,0.0,0.0,37.488636,5.596642,37.693752,4.148918,37.522163,-4.652668,0.0,0.0,29.046902,-6.720795,29.441517,-9.793995,36.002991,-3.3092,0.0,0.0,25.160969,-15.65797,27.845238,-17.919876,29.419174,-6.683037,0.0,0.0,24.890701,-18.125549,25.155098,-18.643864,25.055424,-11.363611,0.0,0.0,37.725555,0.017982,34.74276,1.179478,36.193142,4.132637,0.0,0.0,40.863636,2.784142,35.006252,4.523918,33.584663,6.534832,8458.600586,0.2,39.921902,5.779205,41.129017,5.518505,33.440491,-3.4967,0.0,0.004,38.973469,6.59203,36.220238,10.142624,39.981674,-5.183037,0.0,0.0,31.953201,-0.375549,28.717598,-1.456364,41.055424,-4.238611,956.200012,0.028,27.038055,6.330482,29.367762,5.804478,26.318142,0.007637,0.0,0.0,29.488636,6.784142,29.381252,6.586418,31.459661,0.784832,0.0,0.004,32.359402,7.341705,31.941517,4.643505,33.690491,1.8158,0.0,0.0,39.223469,7.59203,36.282738,3.142624,31.231674,4.941963,0.0,0.068,37.078201,11.061951,32.967598,3.418636,40.492924,-1.301111,0.0,0.08,22.725555,10.517982,25.805262,5.366978,23.443142,-12.054863,0.0,0.088,28.613636,10.284142,28.131252,8.023918,21.272161,-8.402668,0.0,0.0,25.484402,10.216705,29.004017,6.706005,24.440491,-0.5592,0.0,0.0,28.785969,7.21703,31.407738,6.392624,27.981674,0.129463,0.0,0.0,32.203201,6.311951,33.217598,5.543636,29.930424,0.386389
3,32000,452.0,541.254583,41.974522,-87.906597,36.281581,-94.307767,839607.4,454441400.0,1857.538561,1.197466,8337.799805,0.152,25.405596,-6.794083,27.708584,-5.320858,20.328999,1.19516,11072.600586,1.8704,26.396305,-11.21381,20.108475,-6.622665,24.293549,5.016014,0.0,0.128,22.986959,4.985058,25.080383,3.105629,23.641697,-11.04493,0.0,0.0,26.997473,8.192825,27.298241,3.320118,21.59952,-9.341125,0.0,0.0,25.640125,6.658295,28.325962,4.049805,21.34967,-6.269291,0.0,0.0,14.155596,-8.606583,17.396084,-5.820858,19.766499,-1.49234,0.0,0.0,12.333805,-4.52631,17.045975,-3.810165,21.293549,-5.671486,0.0,0.0,15.799459,0.797558,17.392883,-0.144371,18.516697,-10.60743,0.0,0.0,19.184973,3.067825,20.798241,1.945118,18.97452,-9.653625,0.0,0.0,22.265125,0.908295,22.763462,1.674805,17.22467,-9.144291,0.0,0.0,16.343096,-3.419083,14.271084,-3.258358,10.203999,-7.49234,0.0,0.0,17.583805,-2.02631,14.420975,-2.372665,11.668549,-7.421486,0.0,0.0,19.174459,-1.639942,15.080383,-1.769371,12.079197,-7.48243,0.0,0.0,19.747473,-2.494675,16.110741,-2.742382,12.53702,-7.403625,0.0,0.0,18.890125,-2.904205,16.138462,-2.512695,12.28717,-7.206791,0.0,0.0,10.718096,-3.856583,11.708584,-3.008358,3.078999,-5.36734,0.0,0.0,11.396305,-3.40131,11.233475,-3.310165,4.043549,-5.171486,0.0,0.0,12.924459,-2.952442,11.142883,-2.831871,4.141697,-4.85743,0.0,0.0,13.997473,-3.244675,11.423241,-2.554882,4.34952,-4.966125,0.0,0.0,15.515125,-3.716705,11.388462,-1.887695,4.59967,-4.581791,0.0,0.0,7.655596,-2.106583,7.208584,-3.758358,3.453999,-1.42984,0.0,0.0,7.896305,-1.27631,7.233475,-2.685165,3.731049,-1.483986,0.0,0.0,8.111959,-2.389942,6.455383,-1.894371,3.641697,-1.66993,0.0,0.0,8.247473,-3.994675,6.048241,-1.992382,3.78702,-1.403625,0.0,0.0,8.452625,-4.966705,5.888462,-1.637695,4.09967,-1.456791,0.0,0.0,4.655596,-2.169083,2.458584,-3.383358,0.016499,-0.55484,0.0,0.0,4.458805,-1.46381,2.170975,-2.747665,0.231049,-0.483986,0.0,0.0,5.236959,-1.264942,1.767883,-1.456871,0.391697,-0.73243,0.0,0.0,6.309973,-1.932175,2.298241,-0.804882,0.78702,-0.841125,0.0,0.0,6.702625,-2.779205,3.138462,-0.512695,0.84967,-0.644291
4,32000,467.0,465.057792,41.974522,-87.906597,39.297611,-94.713889,647791.3,301260400.0,1387.13333,0.995841,8337.799805,0.152,25.405596,-6.794083,27.708584,-5.320858,20.328999,1.19516,11072.600586,1.8704,26.396305,-11.21381,20.108475,-6.622665,24.293549,5.016014,0.0,0.128,22.986959,4.985058,25.080383,3.105629,23.641697,-11.04493,0.0,0.0,26.997473,8.192825,27.298241,3.320118,21.59952,-9.341125,0.0,0.0,25.640125,6.658295,28.325962,4.049805,21.34967,-6.269291,0.0,0.0,14.343096,-9.981583,17.583584,-6.070858,20.516499,1.75766,0.0,0.0224,15.083805,-5.15131,17.983475,-4.310165,22.856049,-7.233986,0.0,0.008,15.736959,1.735058,18.392883,0.480629,20.016697,-10.04493,0.0,0.0,18.434973,3.255325,20.735741,2.570118,20.59952,-9.091125,0.0,0.0,21.452625,2.095795,23.200962,1.924805,19.09967,-8.831791,0.0,0.0,16.593096,-4.044083,15.458584,-3.195858,14.703999,-5.92984,0.0,0.0,16.833805,-1.71381,16.483475,-1.060165,16.043549,-7.671486,0.0,0.0,18.924459,-1.014942,17.892883,-0.269371,16.329197,-8.91993,0.0,0.0,19.434973,-2.869675,18.298241,-1.242382,16.47452,-8.903625,0.0,0.0,20.515125,-2.654205,18.825962,-1.075195,15.91217,-9.394291,0.0,0.0,17.780596,-1.669083,14.521084,-3.008358,12.828999,-6.99234,0.0,0.0,19.208805,-1.77631,16.108475,-2.872665,13.668549,-7.296486,0.0,0.0,21.049459,-3.702442,17.642883,-2.394371,13.829197,-6.60743,0.0,0.0,21.872473,-3.744675,18.048241,-2.367382,14.53702,-5.403625,0.0,0.0,21.765125,-5.216705,18.263462,-1.887695,14.66217,-5.081791,0.0,0.0,18.468096,-3.169083,14.583584,-3.695858,8.516499,-4.74234,0.0,0.0,20.146305,-4.33881,15.608475,-2.997665,9.543549,-5.171486,0.0,0.0,20.486959,-5.139942,17.205383,-4.019371,10.204197,-3.98243,0.0,0.0,20.372473,-3.869675,16.798241,-3.992382,9.47452,-3.528625,0.0,0.002,20.952625,-5.216705,17.450962,-3.075195,10.09967,-2.956791,0.0,0.0,18.468096,-3.544083,12.646084,-3.133358,5.141499,-6.17984,0.0,0.0,18.458805,-4.52631,13.608475,-3.685165,4.606049,-5.546486,0.0,0.012,17.799459,-5.577442,14.455383,-3.894371,5.016697,-4.48243,0.0,0.0,18.122473,-6.619675,13.985741,-5.179882,5.66202,-2.091125,0.0,0.0,17.827625,-6.841705,14.825962,-5.575195,6.53717,-1.456791


In [126]:
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(df_train)

SimpleImputer(add_indicator=False, copy=True, fill_value=None,
              missing_values=nan, strategy='mean', verbose=0)

In [135]:
    Y = pd.read_csv('data/flight_times_labels_train.csv')['flight_time_s'].astype(float).values.reshape(-1)
    X = df_train.values
    X = imp.transform(X)
    
    #X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=5)
    X_train = X
    y_train = Y

    min_max_scaler = preprocessing.MinMaxScaler()
    X_train = min_max_scaler.fit_transform(X_train)
    X_test = min_max_scaler.transform(X_test)

In [144]:
model = RandomForestRegressor(n_estimators=300)

In [139]:
X_train.shape

(26838, 275)

In [145]:
model.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=300, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [138]:
    pred = model.predict(X_test).flatten()
    
    outcomes = pd.DataFrame({'Prediction': pred.tolist(), 'Ground_Truth': y_test.tolist(), 'Difference': (y_test - pred).tolist()})
    print(outcomes.describe())

Prediction  Ground_Truth    Difference
count  5368.000000   5368.000000   5368.000000
mean   3254.614402   6136.449329   2881.834927
std      26.349303   3213.165030   3212.187019
min    2224.750000   1505.000000  -1750.880000
25%    3253.760000   3708.000000    459.772500
50%    3255.270000   5484.000000   2231.025000
75%    3255.270000   7270.250000   4018.092500
max    3317.360000  16610.000000  13313.710000


In [103]:
model = pickle.load(open('finalized_model500.sav', 'rb'))

In [146]:
    test_data = pd.read_csv('data/flight_plans_test.csv')
    test_ids = test_data[['id']].squeeze()
    test_data.drop(columns=['id', 'departure_airport', 'arrival_airport', 'fixes'], inplace=True)
    test_data['Distance'] = test_data.apply(lambda x: utils.dist_in_miles_from_spherical_path(ast.literal_eval(x['decoded_fixes'])), axis=1)
    process_fixes(test_data)
    test_data['DistanceStraight'] = test_data.apply(lambda x: distance.distance((x['lat_start'], x['lng_start']), (x['lat_end'], x['lng_end'])).m, axis=1)
    test_data['DsitancesProduct'] = test_data['Distance'] * test_data['DistanceStraight']
    test_data['TimeFromDistanceStraight'] = test_data['DistanceStraight'] / test_data['requested_airspeed']
    test_data['TimeFromDistance'] = test_data['Distance'] / test_data['requested_airspeed']
    
    test_data = test_data.apply(get_hour, axis=1)
    one_hot = pd.get_dummies(test_data['departure_time'])
    test_data.drop(columns=['departure_time'], inplace=True)
    test_data = pd.concat([test_data, one_hot], axis=1, sort=False)


    wheathers_test = pd.read_csv('weather_features_test.csv')
    test_data = pd.concat([test_data, wheathers_test], axis=1, sort=False)
    test_data = test_data.replace(-999, 0)


In [147]:
X_test_data = test_data.values
X_test_data = imp.transform(X_test_data)
X_test_data = min_max_scaler.transform(X_test_data)
pred = model.predict(X_test_data).flatten()
    
test_outcome = pd.DataFrame({'id': list(test_ids), 'flight_time_s': list(pred.astype(int))})

In [148]:
test_outcome.to_csv('results7.csv', index=False)

In [149]:
import pickle
# save the classifier
filename = 'finalized_modelFull300.sav'
pickle.dump(model, open(filename, 'wb'))    

In [15]:
loaded_model = pickle.load(open(filename, 'rb'))

NameError: name 'Y_test' is not defined

In [16]:
loaded_model

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=30, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=500, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)