In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

file_path = 'transformed_data.csv'
data = pd.read_csv(file_path)

print(data.head(5))

X = data.iloc[:,:-1]
y = data['GrossWeight']

X_train , X_test , Y_train , Y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

In [None]:
import joblib
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

xgb_model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=8,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
xgb_model.fit(X_train, Y_train)

y_pred_xgb = xgb_model.predict(X_test)

mse_xgb = mean_squared_error(Y_test, y_pred_xgb)
rmse_xgb = np.sqrt(mse_xgb)
r2_xgb = r2_score(Y_test, y_pred_xgb)
mae_xgb = mean_absolute_error(Y_test, y_pred_xgb)

print("\nXGBoost Model Performance:")
print("Mean Squared Error (MSE):", mse_xgb)
print("R-squared (R2) Score:", r2_xgb)
print("Mean Absolute Error (MAE):", mae_xgb)
print("Root Mean Squared Error (RMSE):", rmse_xgb)

model_filename = "xgb_model.joblib"
joblib.dump(xgb_model, model_filename)
print(f"XGBoost model saved as {model_filename}")

results_df = pd.DataFrame({
    'Actual GrossWeight': Y_test,
    'Predicted GrossWeight': y_pred_xgb
})

results_filename = "xgb_results.csv"
results_df.to_csv(results_filename, index=False)
print(f"Results saved to {results_filename}")