In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.core.metrics import make_scorer
from sklearn.metrics import fbeta_score
from autogluon.tabular import TabularPredictor
import shap
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs: {num_gpus}")
    for i in range(num_gpus):
        device_name = torch.cuda.get_device_name(i)
        print(f"GPU {i}: {device_name}")
else: 
    print('CPU')

import pprint
shap.initjs()
import umap
import plotly.express as px

In [None]:
specific_model_name = 'WeightedEnsemble_L2'

metric = 'balanced_accuracy' 
metric = 'f1' 
metric = 'f2'

train_scale = 'Full'
train_scale = '42'

data_source = 'manual'
data_source = 'csv'

In [None]:
class AutogluonWrapper:
    def __init__(self, predictor, feature_names, model_name):
        self.ag_model = predictor
        self.feature_names = feature_names
        self.model_name = model_name
    
    def predict_binary_prob(self, X):
        if isinstance(X, pd.Series):
            X = X.values.reshape(1, -1)
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X, columns=self.feature_names)
        return self.ag_model.predict_proba(X, model=self.model_name, as_multiclass=False)

In [None]:
predictor_path = f'../AutoGluon Models/AutoGluon_{metric}_{train_scale}'
original_set_path = f'../Datasets/tibial_slope_2.csv'
test_set_path = f'../Datasets/tibial_slope_2.csv'
expected_columns = ['CTS', 'MTS', 'LTS', 'MTD', 'Sex']

original_df = pd.read_csv(original_set_path).drop(columns=['Subject #'])
original_df['Sex'] = original_df['Sex'].map({'F': 0, 'M': 1})
if train_scale == 'Full':
    train_df = original_df
else:
    train_df, _ = train_test_split(original_df, test_size=0.2, random_state=int(train_scale), stratify=original_df['Injury'])

X_train = train_df.iloc[:, :-1]
y_train = train_df.iloc[:, -1]
med = X_train.median()

if data_source == 'manual':
    test_data = [[2, 7, 12, 0, 0, 1], [5, 10, 10, 1.98, 0, 0]]
    test_df = pd.DataFrame(test_data, columns=['CTS', 'MTS', 'LTS', 'MTD', 'Sex', 'Injury'])
elif data_source == 'csv':
    test_df = pd.read_csv(test_set_path).drop(columns=['Subject #'])
    test_df['Sex'] = test_df['Sex'].map({'F': 0, 'M': 1})
X_test = test_df.iloc[:, :-1]
y_test = test_df.iloc[:, -1]

if os.path.exists(predictor_path):
    print(f'loade path: {predictor_path}')
    predictor = TabularPredictor.load(predictor_path)
    predictor.delete_models(models_to_keep=specific_model_name, dry_run=False)
    predictor.save_space()

model_info = predictor._trainer.get_model_info(specific_model_name)
model_weights = {
key: info['model_weights']
    for key, info in model_info['children_info'].items()
    if 'model_weights' in info
}   
print(model_weights)

print("positive class:", predictor.positive_class)
predictions = predictor.predict(test_df, model=specific_model_name).reset_index(drop=True)
predicted_probs = predictor.predict_proba(test_df, model=specific_model_name).reset_index(drop=True)
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embeddings = reducer.fit_transform(predicted_probs)
umap_embeddings = pd.DataFrame(umap_embeddings, columns=['UMAP1', 'UMAP2'])
umap_embeddings['Injury'] = test_df['Injury'].reset_index(drop=True)
fig = px.scatter(umap_embeddings, x='UMAP1', y='UMAP2', color='Injury', title='UMAP Projection of AutoGluon Embeddings', width=1000, height=600)
fig.show()

# train
predicted_probs_train = predictor.predict_proba(train_df, model=specific_model_name).reset_index(drop=True)
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embeddings = reducer.fit_transform(predicted_probs_train)
umap_embeddings = pd.DataFrame(umap_embeddings, columns=['UMAP1', 'UMAP2'])
umap_embeddings['Injury'] = train_df['Injury'].reset_index(drop=True)
fig = px.scatter(umap_embeddings, x='UMAP1', y='UMAP2', color='Injury', title='UMAP Projection of AutoGluon Embeddings', width=1000, height=600)
fig.show()

# test
from sklearn.metrics import confusion_matrix
predicted_probs_ = predictor.predict_proba(_, model=specific_model_name).reset_index(drop=True)
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embeddings = reducer.fit_transform(predicted_probs_)
umap_embeddings = pd.DataFrame(umap_embeddings, columns=['UMAP1', 'UMAP2'])
umap_embeddings['Injury'] = _['Injury'].reset_index(drop=True)
fig = px.scatter(umap_embeddings, x='UMAP1', y='UMAP2', color='Injury', title='UMAP Projection of AutoGluon Embeddings', width=1000, height=600)
fig.show()

y_pred = (predicted_probs_.iloc[:, 1] >= 0.5).astype(int)
cm_array = confusion_matrix(_['Injury'].reset_index(drop=True), y_pred)
cm_df = pd.DataFrame(
    cm_array,
    index=['Actual Lower Risk (0)', 'Actual Higher Risk (1)'],
    columns=['Predicted Lower Risk (0)', 'Predicted Higher Risk (1)']
)
print(cm_df)

# raw
raw = original_df
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embeddings = reducer.fit_transform(raw)
umap_embeddings = pd.DataFrame(umap_embeddings, columns=['UMAP1', 'UMAP2'])
umap_embeddings['Injury'] = original_df['Injury'].reset_index(drop=True)
fig = px.scatter(umap_embeddings, x='UMAP1', y='UMAP2', color='Injury', title='UMAP Projection of AutoGluon Embeddings', width=1000, height=600)
fig.show()

ag_wrapper = AutogluonWrapper(predictor, feature_names=X_train.columns, model_name=specific_model_name)
explainer = shap.KernelExplainer(ag_wrapper.predict_binary_prob, X_train)

print('==========================================================')
for i in range(len(test_df)):
    shap_values_single = explainer.shap_values(X_test.iloc[[i]])
    explanation = shap.Explanation(
        values=shap_values_single[0],
        base_values=explainer.expected_value,
        data=X_test.iloc[i].to_numpy(),
        feature_names=X_test.columns
    )
    print(f'confidence score: {predicted_probs.iloc[i, 1]:.2f}')
    # shap.force_plot(explainer.expected_value, shap_values_single, test_df.iloc[:,:-1].iloc[i,:], matplotlib=True)
    fig, ax = plt.subplots(figsize=(10, 6))
    shap.plots.waterfall(explanation)
    fig.savefig(f'../AutoGluon Figures/waterfall_{metric}_{train_scale}_subject_{i}.png',
                dpi=600,
                bbox_inches='tight')
    plt.close(fig)

eval_metrics = predictor.evaluate(
    data=test_df,
    model=specific_model_name,
)
print(eval_metrics)