In [5]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pingouin as pg

# Load your data again if not already loaded
images, onsd_measurements = load_data('Meta_pic_3.xlsx', preprocessed_dir)
images = images / 255.0
images = images.reshape(images.shape + (1,))
X_train, X_test, y_train, y_test = train_test_split(images, onsd_measurements, test_size=0.2, random_state=42)

# Flatten the images for XGBoost
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Define the parameters for XGBoost based on Rust results
params = {
    'max_depth': 10,         # Higher complexity
    'learning_rate': 0.1,    # Adjusted learning rate
    'n_estimators': 100,     # Number of boosting rounds
    'subsample': 1.0,        # Use all samples
    'colsample_bytree': 1.0, # Use all features
    'objective': 'reg:squarederror',
    'random_state': 42
}

# Create the XGBoost regressor
xgb_reg = xgb.XGBRegressor(**params)

# Train the model
xgb_reg.fit(X_train_flat, y_train)

# Evaluate the model
y_pred_xgb = xgb_reg.predict(X_test_flat)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
print(f"XGBoost MAE: {mae_xgb}, RMSE: {rmse_xgb}")

# Calculate ICC
icc = pg.intraclass_corr(data=pd.DataFrame({'y_test': y_test, 'y_pred_xgb': y_pred_xgb}), targets='y_test', raters='y_pred_xgb').round(3)
print(f"Intraclass Correlation Coefficient (ICC): \n{icc}")

# Create Bland-Altman plot for the XGBoost model
bland_altman_df_xgb = pd.DataFrame({
    'Actual ONSD': y_test,
    'Predicted ONSD': y_pred_xgb
})

# Calculate differences and averages
bland_altman_df_xgb['Difference'] = bland_altman_df_xgb['Actual ONSD'] - bland_altman_df_xgb['Predicted ONSD']
bland_altman_df_xgb['Average'] = (bland_altman_df_xgb['Actual ONSD'] + bland_altman_df_xgb['Predicted ONSD']) / 2

# Calculate bias and limits of agreement
bias = bland_altman_df_xgb['Difference'].mean()
std_dev = bland_altman_df_xgb['Difference'].std()
upper_loa = bias + 1.96 * std_dev
lower_loa = bias - 1.96 * std_dev

# Create the Bland-Altman plot
plt.figure(figsize=(10, 6))
plt.scatter(bland_altman_df_xgb['Average'], bland_altman_df_xgb['Difference'], s=30)
plt.axhline(bias, color='black', linestyle='--', label=f'Bias: {bias:.2f}')  # Bias line
plt.axhline(upper_loa, color='gray', linestyle='--', label=f'Upper LOA: {upper_loa:.2f}')  # Upper LOA
plt.axhline(lower_loa, color='gray', linestyle='--', label=f'Lower LOA: {lower_loa:.2f}')  # Lower LOA
plt.xlabel('Average of Actual and Predicted ONSD')
plt.ylabel('Difference between Actual and Predicted ONSD')
plt.title('Bland-Altman Plot for XGBoost Model')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'xgboost'