In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load dataset
file_path = "C:/Users/yamin/Downloads/UTA-Student-Competition-2025-data.csv"
df = pd.read_csv(file_path)

# Drop unnecessary columns
df.drop(columns=["SERVICE_SUFFIX", "FLIGHT_NUMBER"], inplace=True)

# Convert date column to datetime format
df["SCHEDULED_DEPARTURE_DATE"] = pd.to_datetime(df["SCHEDULED_DEPARTURE_DATE"])

# Extract date-related features
df["DEPARTURE_DAY"] = df["SCHEDULED_DEPARTURE_DATE"].dt.dayofweek  # 0 = Monday, 6 = Sunday
df["DEPARTURE_MONTH"] = df["SCHEDULED_DEPARTURE_DATE"].dt.month

df.drop(columns=["SCHEDULED_DEPARTURE_DATE"], inplace=True)

# Create route feature
df["ROUTE"] = df["DEPARTURE_AIRPORT"] + "_" + df["ARRIVAL_AIRPORT"]
df.drop(columns=["DEPARTURE_AIRPORT", "ARRIVAL_AIRPORT"], inplace=True)

# Encode categorical features
encoder = LabelEncoder()
df["AIRCRAFT_TYPE"] = encoder.fit_transform(df["AIRCRAFT_TYPE"])
df["ROUTE"] = encoder.fit_transform(df["ROUTE"])

# Create fuel burn efficiency features
df["FUEL_BURN_TAXI_RATIO"] = df["ESTIMATED_FUEL_BURN_TAXI_OUT_TONNES"] / df["ESTIMATED_FUEL_BURN_TOTAL_TONNES"]
df["FUEL_BURN_TAKEOFF_RATIO"] = df["ESTIMATED_FUEL_BURN_TAKEOFF_TONNES"] / df["ESTIMATED_FUEL_BURN_TOTAL_TONNES"]
df["FUEL_BURN_CRUISE_RATIO"] = df["ESTIMATED_FUEL_BURN_CRUISE_TONNES"] / df["ESTIMATED_FUEL_BURN_TOTAL_TONNES"]

# Create CO2 efficiency features
df["CO2_TAXI_RATIO"] = df["ESTIMATED_CO2_TAXI_OUT_TONNES"] / df["ESTIMATED_CO2_TOTAL_TONNES"]
df["CO2_TAKEOFF_RATIO"] = df["ESTIMATED_CO2_TAKEOFF_TONNES"] / df["ESTIMATED_CO2_TOTAL_TONNES"]
df["CO2_CRUISE_RATIO"] = df["ESTIMATED_CO2_CRUISE_TONNES"] / df["ESTIMATED_CO2_TOTAL_TONNES"]

# Handle potential NaN values
df.fillna(0, inplace=True)

# Save processed data
df.to_csv("processed_data.csv", index=False)

print("Feature engineering complete. Processed dataset saved as 'processed_data.csv'.")
