In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

filename = "airline_food_data_last.csv"
df = pd.read_csv(filename)
X = df.drop(['flight_id', 'total_food_demand'], axis=1)
y = df['total_food_demand']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("--- Hyperparameter Tuning (Random Forest) ---")

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5]
}

rf = RandomForestRegressor(random_state=42)

grid_search = GridSearchCV(estimator=rf, param_grid=param_grid,
                           cv=3, n_jobs=-1, scoring='neg_mean_squared_error')

print("Searching for best parameters...")
grid_search.fit(X_train, y_train)

best_rf_model = grid_search.best_estimator_
print(f"Best Parameters Found: {grid_search.best_params_}")

y_pred_tuned = best_rf_model.predict(X_test)
rmse_tuned = np.sqrt(mean_squared_error(y_test, y_pred_tuned))
print(f"Tuned Random Forest RMSE: {rmse_tuned:.4f}")

--- Hyperparameter Tuning (Random Forest) ---
Searching for best parameters...
Best Parameters Found: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 100}
Tuned Random Forest RMSE: 5.9477


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

filename = "airline_food_data_last.csv"
df = pd.read_csv(filename)
X = df.drop(['flight_id', 'total_food_demand'], axis=1)
y = df['total_food_demand']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("\n--- Business Cost Analysis ---")

def calculate_business_cost(y_true, y_pred):
    """
    Calculates financial impact:
    - Overprediction (Waste): $5 cost per unit
    - Underprediction (Dissatisfaction): $20 cost per unit
    """
    cost = 0
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    for actual, predicted in zip(y_true, y_pred):
        diff = predicted - actual
        if diff > 0:
            cost += diff * 5
        elif diff < 0:
            cost += abs(diff) * 20

    return cost

cost_lr = calculate_business_cost(y_test, y_pred_lr)
cost_rf = calculate_business_cost(y_test, y_pred_rf)

print(f"Linear Regression Cost: ${cost_lr:,.2f}")
print(f"Random Forest Cost:     ${cost_rf:,.2f}")

savings = cost_lr - cost_rf
print(f"Potential Savings with RF: ${savings:,.2f}")


--- Business Cost Analysis ---
Linear Regression Cost: $60,549.05
Random Forest Cost:     $58,713.65
Potential Savings with RF: $1,835.40


In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

filename = "airline_food_data_last.csv"
df = pd.read_csv(filename)
X = df.drop(['flight_id', 'total_food_demand'], axis=1)
y = df['total_food_demand']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\n--- Third Model (Gradient Boosting) ---")

gb_model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)

y_pred_gb = gb_model.predict(X_test)

gb_rmse = np.sqrt(mean_squared_error(y_test, y_pred_gb))
gb_r2 = r2_score(y_test, y_pred_gb)

print(f"Gradient Boosting RMSE: {gb_rmse:.4f}")
print(f"Gradient Boosting R2 Score: {gb_r2:.4f}")

print("\n--- Final Model Comparison ---")


--- Third Model (Gradient Boosting) ---
Gradient Boosting RMSE: 5.6439
Gradient Boosting R2 Score: 0.9946

--- Final Model Comparison ---
