<h1>Prediction Using Current Best Random Forest Estimator</h1>

In [1]:
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json

import warnings
warnings.filterwarnings('ignore')

<h3>A - Import Training Data</h3>

In [2]:
features_names = [
    'flight_id',
    'month-day',
    'month',
    'day_in_month',
    'adep', 
    'country_code_adep', 
    'ades', 
    'country_code_ades', 
    'aircraft_type', 
    'wtc', 
    'airline', 
    'flight_duration', 
    'taxiout_time', 
    'flown_distance', 
    'actual_offblock_time_hour', 
    'actual_offblock_time_minute', 
    'actual_offblock_time_hour_minute', 
    'arrival_time_hour', 
    'arrival_time_minute', 
    'arrival_time_hour_minute', 
    'latitude_median', 
    'longitude_median', 
    'altitude_mean', 
    'groundspeed_mean', 
    'track_mean', 
    'vertical_rate_mean', 
    'track_unwrapped_mean', 
    'u_component_of_wind_mean', 
    'v_component_of_wind_mean', 
    'temperature_mean', 
    'specific_humidity_mean'
]

In [3]:
encoded_challenge_set = pd.read_csv('../../data/encoded_challenge_set.csv')

# Pick up features from dataset
enccoded_training_features = encoded_challenge_set[features_names]

# Pick up target from dataset
target = encoded_challenge_set['tow']

<h3>B - Train a RandomForestRegressor</h3>

In [4]:
rf = RandomForestRegressor(
    n_estimators=11, # From elbow graph in CV Grid Search 
)

rf.fit(enccoded_training_features, target)

<h3>C - Predict for Submission</h3>

In [5]:
def save_dataframe(path: str, df: pd.DataFrame) -> None:
    df.to_csv(path, index=False)
    print(f"{path} is saved!")

In [6]:
encoded_submission_set = pd.read_csv('../../data/encoded_submission_set.csv')

# Pick up features from dataset
encoded_submission_features = encoded_submission_set[features_names]

predicted_tow = rf.predict(encoded_submission_features)

predicted_submission_set = encoded_submission_features.copy()
predicted_submission_set['flight_id'] = encoded_submission_set['flight_id']
predicted_submission_set['tow'] = predicted_tow.astype("int64")
display(predicted_submission_set)

submission_file_content_1 = predicted_submission_set[['flight_id', 'tow']]
display(submission_file_content_1)

Unnamed: 0,flight_id,month-day,month,day_in_month,adep,country_code_adep,ades,country_code_ades,aircraft_type,wtc,...,altitude_mean,groundspeed_mean,track_mean,vertical_rate_mean,track_unwrapped_mean,u_component_of_wind_mean,v_component_of_wind_mean,temperature_mean,specific_humidity_mean,tow
0,248753821,0,1,1,384,96,216,26,14,1,...,33188.313883,420.959804,282.808153,175.331666,282.808153,2.832943,-24.641557,219.596603,0.000152,68106
1,248753822,0,1,1,17,11,162,78,7,0,...,32583.842964,396.773194,260.807718,-16.987908,260.807718,34.618718,11.034940,225.815537,0.000547,205780
2,248754498,0,1,1,190,99,58,27,20,0,...,22116.996137,430.803681,88.140047,-20.213588,49.203442,21.846468,25.232590,244.716348,0.001122,217274
3,248757623,0,1,1,61,37,189,24,11,1,...,30159.995122,358.478244,173.338410,-78.997854,173.338410,16.119310,11.120445,226.034554,0.000542,62799
4,248763603,0,1,1,92,44,216,26,4,1,...,28515.134491,401.131767,140.981972,254.485529,134.567263,24.612074,24.603707,231.581341,0.000491,63817
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105954,258066302,364,12,31,384,96,85,20,11,1,...,33796.791745,374.838499,300.444819,-48.212533,323.220053,21.671881,-16.838615,216.294734,0.000183,69420
105955,258068609,364,12,31,321,5,160,78,17,0,...,28865.208298,377.312504,278.020583,-14.281511,278.020583,41.461469,16.649501,228.145188,0.000676,179622
105956,258068876,364,12,31,385,96,309,14,5,1,...,32501.214079,387.655081,287.267163,-112.362177,287.267163,9.114682,-17.039006,219.970819,0.000444,75793
105957,258064675,364,12,31,85,73,18,19,4,1,...,11400.762440,323.355484,170.146884,-14.588067,170.146884,31.063791,8.720314,262.451712,0.003364,61801


Unnamed: 0,flight_id,tow
0,248753821,68106
1,248753822,205780
2,248754498,217274
3,248757623,62799
4,248763603,63817
...,...,...
105954,258066302,69420
105955,258068609,179622
105956,258068876,75793
105957,258064675,61801


In [7]:
save_dataframe(path="../../data/my_submission_1.csv", df=submission_file_content_1) 

../../data/my_submission_1.csv is saved!


<h3>D - Predict but check for Maximum Take-Off Weight (MTOW) and Operating Empty Weight (OEW) weight limitation</h3>

In [8]:
def predict_with_weight_limitation(encoded_submission_features: pd.DataFrame) -> pd.DataFrame | None:
    predicted_tow = rf.predict(encoded_submission_features)
    
    predicted_submission_set = encoded_submission_features.copy()
    predicted_submission_set['flight_id'] = encoded_submission_set['flight_id']
    predicted_submission_set['tow'] = predicted_tow.astype("int64")

    with open('../../data/aircraft_types_encoding.json', 'r') as file:
        aircraft_types_encoding = json.load(file)

    with open('../../data/aircraft_types_weight_limitations.json', 'r') as file:
        weight_limitations = json.load(file)

    count_edited = 0
    
    flight_ids, tows = [], []
    for _, row in predicted_submission_set.iterrows():
        flight_ids.append(int(row['flight_id']))
        aircraft_encoded_type = str(int(row['aircraft_type']))
        predicted_weight = int(row['tow'])
        
        if aircraft_encoded_type in aircraft_types_encoding:
            aircraft_type = aircraft_types_encoding.get(aircraft_encoded_type)
            if aircraft_type in weight_limitations:
                limitations = weight_limitations.get(aircraft_type)
                operating_empty_weight = int(limitations.get('operating_empty_weight'))
                maximum_takeoff_weight = int(limitations.get('maximum_takeoff_weight'))
                if predicted_weight < operating_empty_weight:
                    tows.append(operating_empty_weight)
                elif predicted_weight > maximum_takeoff_weight:
                    tows.append(maximum_takeoff_weight)
                else:
                   tows.append(predicted_weight) 
            else:
                tows.append(predicted_weight)
        else:
            tows.append(predicted_weight)

    submission_file_content = pd.DataFrame({
        'flight_id': flight_ids,
        'tow': tows
    })

    return submission_file_content

In [9]:
submission_file_content_2 = predict_with_weight_limitation(encoded_submission_features)
display(submission_file_content_2)

Unnamed: 0,flight_id,tow
0,248753821,68106
1,248753822,205780
2,248754498,217274
3,248757623,62799
4,248763603,63817
...,...,...
105954,258066302,69420
105955,258068609,158800
105956,258068876,75793
105957,258064675,61801


In [10]:
save_dataframe(path="../../data/my_submission_2.csv", df=submission_file_content_2)

../../data/my_submission_2.csv is saved!
