<h1>Prediction Using Current Best XGBoost Estimator</h1>

In [1]:
import matplotlib.pyplot as plt
import xgboost as xgb
import pandas as pd
import numpy as np
import json

import warnings
warnings.filterwarnings('ignore')

<h3>A - Import Training Data</h3>

In [2]:
top_important_features = [
    'wtc',
    'aircraft_type',
    'flown_distance',
    'airline',
    'ades',
    'flight_duration',
    'adep',
    'country_code_adep',
    'country_code_ades',
    'longitude_median',
    'groundspeed_mean',
    'month',
    'latitude_median', 
    'altitude_mean', 
    'month-day',
    'taxiout_time',
    'actual_offblock_time_hour_minute',
    'actual_offblock_time_hour', 
    'arrival_time_hour_minute'
]

In [3]:
encoded_challenge_set = pd.read_csv('../../data/encoded_challenge_set.csv')

# Pick up features from dataset
encoded_training_features = encoded_challenge_set[top_important_features]

# Pick up target from dataset
target = encoded_challenge_set['tow']

<h3>B - Train a RandomForestRegressor</h3>

In [4]:
model = xgb.XGBRegressor(
    colsample_bytree=1.0,
    learning_rate=0.1,
    max_depth=10,
    n_estimators=91,
    subsample=1.0,
    tree_method='hist',      # Use hist or approx to support categorical data
    enable_categorical=True,  # Enable categorical handling if using version 1.5.0+
    objective='reg:squarederror', 
    random_state=42
)

model.fit(encoded_training_features, target)

<h3>C - Predict for Submission</h3>

In [5]:
def save_dataframe(path: str, df: pd.DataFrame) -> None:
    df.to_csv(path, index=False)
    print(f"{path} is saved!")

In [6]:
encoded_submission_set = pd.read_csv('../../data/encoded_submission_set.csv')

# Pick up features from dataset
encoded_submission_features = encoded_submission_set[top_important_features]

predicted_tow = model.predict(encoded_submission_features)

predicted_submission_set = encoded_submission_features.copy()
predicted_submission_set['flight_id'] = encoded_submission_set['flight_id']
predicted_submission_set['tow'] = predicted_tow.astype("int64")
display(predicted_submission_set)

my_submission_XGBoost_no_weight_limit = predicted_submission_set[['flight_id', 'tow']]
display(my_submission_XGBoost_no_weight_limit)

Unnamed: 0,wtc,aircraft_type,flown_distance,airline,ades,flight_duration,adep,country_code_adep,country_code_ades,longitude_median,...,month,latitude_median,altitude_mean,month-day,taxiout_time,actual_offblock_time_hour_minute,actual_offblock_time_hour,arrival_time_hour_minute,flight_id,tow
0,1,14,1122,14,216,170,384,96,26,18.956270,...,1,43.887268,33188.313883,0,15,584,9,768,248753821,67769
1,0,7,3205,22,162,470,17,11,78,-14.595004,...,1,51.137805,32583.842964,0,15,585,9,1069,248753822,213182
2,0,20,3965,10,58,473,190,99,27,-4.965820,...,1,50.163016,22116.996137,0,10,112,1,595,248754498,220604
3,1,11,986,6,189,156,61,37,24,-0.125950,...,1,44.952610,30159.995122,0,10,500,8,666,248757623,65778
4,1,4,686,20,216,105,92,44,26,-1.162992,...,1,51.250214,28515.134491,0,15,661,11,780,248763603,64018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105954,1,11,1199,14,85,201,384,96,20,20.683242,...,12,49.135483,33796.791745,364,15,576,9,792,258066302,68535
105955,0,17,3937,13,160,575,321,5,78,1.683897,...,12,51.939402,28865.208298,364,14,589,9,1178,258068609,175016
105956,1,5,988,14,309,154,385,96,14,16.480591,...,12,45.919364,32501.214079,364,25,565,9,744,258068876,74476
105957,1,4,240,27,18,42,85,73,19,8.431790,...,12,50.482132,11400.762440,364,9,604,10,655,258064675,60944


Unnamed: 0,flight_id,tow
0,248753821,67769
1,248753822,213182
2,248754498,220604
3,248757623,65778
4,248763603,64018
...,...,...
105954,258066302,68535
105955,258068609,175016
105956,258068876,74476
105957,258064675,60944


In [7]:
current_version = 7

save_dataframe(path=f"../../data/submissions/my_submission_v{str(current_version)}.csv", df=my_submission_XGBoost_no_weight_limit) 

../../data/submissions/my_submission_v7.csv is saved!


<h3>D - Predict but check for Maximum Take-Off Weight (MTOW) and Operating Empty Weight (OEW) weight limitation</h3>

In [8]:
def predict_with_weight_limitation(encoded_submission_features: pd.DataFrame) -> pd.DataFrame | None:
    predicted_tow = model.predict(encoded_submission_features)
    
    predicted_submission_set = encoded_submission_features.copy()
    predicted_submission_set['flight_id'] = encoded_submission_set['flight_id']
    predicted_submission_set['tow'] = predicted_tow.astype("int64")

    with open('../../data/aircraft_types_encoding.json', 'r') as file:
        aircraft_types_encoding = json.load(file)

    with open('../../data/aircraft_types_weight_limitations.json', 'r') as file:
        weight_limitations = json.load(file)

    count_edited = 0
    
    flight_ids, tows = [], []
    for _, row in predicted_submission_set.iterrows():
        flight_ids.append(int(row['flight_id']))
        aircraft_encoded_type = str(int(row['aircraft_type']))
        predicted_weight = int(row['tow'])
        
        if aircraft_encoded_type in aircraft_types_encoding:
            aircraft_type = aircraft_types_encoding.get(aircraft_encoded_type)
            if aircraft_type in weight_limitations:
                limitations = weight_limitations.get(aircraft_type)
                operating_empty_weight = int(limitations.get('operating_empty_weight'))
                maximum_takeoff_weight = int(limitations.get('maximum_takeoff_weight'))
                if predicted_weight < operating_empty_weight:
                    tows.append(operating_empty_weight)
                elif predicted_weight > maximum_takeoff_weight:
                    tows.append(maximum_takeoff_weight)
                else:
                   tows.append(predicted_weight) 
            else:
                tows.append(predicted_weight)
        else:
            tows.append(predicted_weight)

    submission_file_content = pd.DataFrame({
        'flight_id': flight_ids,
        'tow': tows
    })

    return submission_file_content

In [9]:
my_submission_XGBoost_with_weight_limit = predict_with_weight_limitation(encoded_submission_features)
display(my_submission_XGBoost_with_weight_limit)

Unnamed: 0,flight_id,tow
0,248753821,67769
1,248753822,213182
2,248754498,220604
3,248757623,65778
4,248763603,64018
...,...,...
105954,258066302,68535
105955,258068609,158800
105956,258068876,74476
105957,258064675,60944


In [10]:
current_version = 9

save_dataframe(path=f"../../data/submissions/my_submission_v{str(current_version)}.csv", df=my_submission_XGBoost_with_weight_limit)

../../data/submissions/my_submission_v8.csv is saved!
