In [None]:
import pandas as pd
import numpy as np
import random
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("final_extended_500_dataset.csv")

# Drop unwanted column if exists
df = df.drop(columns=["workout_type"], errors='ignore')

# Encode categorical column
df["age_category"] = pd.factorize(df["age_category"])[0]

# Add distance and workload_score
df["distance (km)"] = df["target_speed (km/h)"] * df["time (min)"] / 60
df["workload_score"] = df["target_speed (km/h)"] * df["incline_angle (°)"] * df["time (min)"]

# Select input features (excluding target_speed)
X = df[[
    "incline_angle (°)",
    "time (min)",
    "vo2_max (%)",
    "avg_heart_rate (bpm)",
    "calories (kcal)",
    "age_category",
    "workload_score",
    "distance (km)"
]]
y = df["anaerobic_threshold (bpm)"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
model = xgb.XGBRegressor(
    n_estimators=500,
    learning_rate=0.025,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    reg_alpha=0.1,          # L1 regularization
    reg_lambda=1.0,         # L2 regularization
    random_state=42,
    objective='reg:squarederror'
)

model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Pick a random test row
random_idx = np.random.choice(X_test.index)
sample_input = X_test.loc[random_idx]
sample_actual = y_test.loc[random_idx]
sample_predicted = model.predict(sample_input.to_frame().T)[0]

# Show input features
print("\nInput Features Used for Prediction:")
print("------------------------------------")
for key, value in sample_input.to_dict().items():
    print(f"{key}: {value}")

# 🧠 Realistic and smart speed suggestion — stop at first safe speed
def find_safe_speed(sample_input, target_hr, model, speed_range=(3.0, 12.0), step=0.1):
    lower_limit_hr = target_hr - 2  # 2 bpm safety margin

    for speed in np.arange(speed_range[0], speed_range[1] + step, step):
        modified_input = sample_input.copy()
        modified_input["workload_score"] = speed * modified_input["incline_angle (°)"] * modified_input["time (min)"]
        modified_input["distance (km)"] = (speed * modified_input["time (min)"]) / 60
        pred = model.predict(modified_input.to_frame().T)[0]

        if pred < lower_limit_hr:
            return round(max(3.0, speed), 2)  # Return first safe speed

    return 3.0  # If nothing found, fallback to lowest safe value

# Simulate current HR
current_hr = random.randint(130, 190)

# Determine recommendation
if current_hr > sample_actual:
    recommended_speed = find_safe_speed(sample_input, current_hr, model)
    safety_msg = "Warning: Anaerobic threshold exceeded. It is advised to reduce treadmill speed or incline."
else:
    recommended_speed = "N/A "
    safety_msg = "Heart rate is within safe aerobic limits. Workout can continue as normal."

# Final Output
print("\nModel Prediction Output")
print("------------------------")
print(f"Actual Anaerobic Threshold: {round(sample_actual, 2)} bpm")
print(f"Predicted Anaerobic Threshold: {round(sample_predicted, 2)} bpm")
print(f"Current Heart Rate: {current_hr} bpm")
print(f"Recommended Treadmill Speed: {recommended_speed}")
print(f"\n{safety_msg}")

print("\nModel Performance Metrics:")
print(f"R² Score: {round(r2, 4)}")
print(f"Mean Absolute Error (MAE): {round(mae, 2)} bpm")
print(f"Root Mean Squared Error (RMSE): {round(rmse, 2)} bpm")

# Plot Actual vs Predicted
plt.figure(figsize=(8, 5))
plt.scatter(y_test, y_pred, alpha=0.6, edgecolors='black')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--')
plt.title("Actual vs Predicted Anaerobic Threshold")
plt.xlabel("Actual Anaerobic Threshold (bpm)")
plt.ylabel("Predicted Anaerobic Threshold (bpm)")
plt.grid(True)
plt.tight_layout()
plt.show()