In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.dummy import DummyRegressor  # Import DummyRegressor
from xgboost import XGBRegressor

# Load your Excel file (update sheet name if needed)
df = pd.read_excel("Updated_BMS_Data11.xlsm", sheet_name="Sheet1", engine="openpyxl", header=0, skiprows=0, nrows=6109)

# Drop first column
df = df.iloc[:, 1:]

# Separate features and target
X = df.drop(columns=["tbsagrft"])
y = df["tbsagrft"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling
scaler = MinMaxScaler()
scaler.fit(X_train)

# Transform both training and testing data
X_train_normalized = scaler.transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Train XGBoost model
model_xgb = XGBRegressor(
    objective="reg:squarederror",
    booster="gbtree",         # this is the tree model
    n_estimators=100,         # number of trees
    learning_rate=0.1,        # shrinkage to prevent overfitting
    max_depth=6,              # controls tree complexity
    random_state=42
)
model_xgb.fit(X_train_normalized, y_train)

# Predict and evaluate for XGBoost
y_pred_xgb = model_xgb.predict(X_test_normalized)
print("XGBoost Model:")
print("MSE:", mean_squared_error(y_test, y_pred_xgb))
print("R² Score:", r2_score(y_test, y_pred_xgb))

# Create a Dummy Model using median strategy
dummy_model_median = DummyRegressor(strategy="median")
dummy_model_median.fit(X_train_normalized, y_train)

# Predict and evaluate for Dummy Model (Median)
y_pred_dummy_median = dummy_model_median.predict(X_test_normalized)
print("\nDummy Model (Median Strategy):")
print("MSE:", mean_squared_error(y_test, y_pred_dummy_median))
print("R² Score:", r2_score(y_test, y_pred_dummy_median))

# Create a Dummy Model using quantile strategy (e.g., 75th percentile)
dummy_model_quantile = DummyRegressor(strategy="quantile", quantile=0.75)
dummy_model_quantile.fit(X_train_normalized, y_train)

# Predict and evaluate for Dummy Model (Quantile Strategy)
y_pred_dummy_quantile = dummy_model_quantile.predict(X_test_normalized)
print("\nDummy Model (Quantile Strategy):")
print("MSE:", mean_squared_error(y_test, y_pred_dummy_quantile))
print("R² Score:", r2_score(y_test, y_pred_dummy_quantile))


XGBoost Model:
MSE: 68.9565914588794
R² Score: 0.8064371771308242

Dummy Model (Median Strategy):
MSE: 421.10106789112626
R² Score: -0.18204089978602123

Dummy Model (Quantile Strategy):
MSE: 391.61514316638323
R² Score: -0.09927319471416074
