# Load Packages

In [31]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

# Load Data

In [32]:
df = pd.read_csv("../../data/processed/training_winsorized.csv", sep="\t")

# Create engineering features

In [33]:
df["FuelPerPassenger"] = df["ActualTotalFuel"] / df["FlownPassengers"].replace(0, np.nan)
df["AvgBagWeight"] = df["FlightBagsWeight"] / df["BagsCount"].replace(0, np.nan)
df["TotalPayloadEstimate"] = df["FlownPassengers"] * 80 + df["FlightBagsWeight"]

# Processing file

In [34]:
df = df.fillna(0)

selected_features = ["ActualFlightTime", "ActualTotalFuel", "FuelPerPassenger", "AvgBagWeight", "TotalPayloadEstimate"]
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(df[selected_features])
poly_feature_names = poly.get_feature_names_out(selected_features)

df_poly = pd.DataFrame(poly_features, columns=poly_feature_names)
df = df.drop(columns=selected_features).reset_index(drop=True)
df_final = pd.concat([df, df_poly], axis=1)

# Save file

In [35]:

df_final.to_csv("../../data/processed/extended_features/training_winsorized.csv", index=False, sep="\t")