In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

# Load your Excel file (update sheet name if needed)
df = pd.read_excel("Updated_BMS_Data11.xlsm", sheet_name="Sheet1", engine="openpyxl", header=0, skiprows=0, nrows=6109)

# Drop first column
df = df.iloc[:, 1:]

# Separate features and target
X = df.drop(columns=["tbsagrft"])
y = df["tbsagrft"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling
scaler = MinMaxScaler()
scaler.fit(X_train)

# Transform both training and testing data
X_train_normalized = scaler.transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Train a Simple Linear Regression model
model_lr = LinearRegression()
model_lr.fit(X_train_normalized, y_train)

# Predict and evaluate for Linear Regression
y_pred_lr = model_lr.predict(X_test_normalized)
print("Linear Regression MSE:", mean_squared_error(y_test, y_pred_lr))
print("Linear Regression R²:", r2_score(y_test, y_pred_lr))

# Train a Decision Tree model
model_tree = DecisionTreeRegressor(random_state=42)
model_tree.fit(X_train_normalized, y_train)

# Predict and evaluate for Decision Tree
y_pred_tree = model_tree.predict(X_test_normalized)
print("\nDecision Tree MSE:", mean_squared_error(y_test, y_pred_tree))
print("Decision Tree R²:", r2_score(y_test, y_pred_tree))

# Train XGBoost model (as comparison)
model_xgb = XGBRegressor(
    objective="reg:squarederror",
    booster="gbtree",         # this is the tree model
    n_estimators=100,         # number of trees
    learning_rate=0.1,        # shrinkage to prevent overfitting
    max_depth=6,              # controls tree complexity
    random_state=42
)
model_xgb.fit(X_train_normalized, y_train)

# Predict and evaluate for XGBoost
y_pred_xgb = model_xgb.predict(X_test_normalized)
print("\nXGBoost Model MSE:", mean_squared_error(y_test, y_pred_xgb))
print("XGBoost Model R²:", r2_score(y_test, y_pred_xgb))


Linear Regression MSE: 96.62724955365415
Linear Regression R²: 0.7287649694686119

Decision Tree MSE: 145.26190079882429
Decision Tree R²: 0.5922463251286182

XGBoost Model MSE: 68.9565914588794
XGBoost Model R²: 0.8064371771308242
