In [1]:
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from tabpfn import TabPFNClassifier
import pandas as pd
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from scikeras.wrappers import KerasClassifier
import numpy as np
from sklearn.metrics import balanced_accuracy_score, classification_report
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import label_binarize
import gc
import torch
from tensorflow.keras import backend as K
import  statsmodels.api as sm
from sklearn.decomposition import PCA
import subprocess
import warnings
warnings.filterwarnings('ignore')

In [2]:
def create_mlp_model(input_shape, num_classes):
    model = Sequential([
        layers.Dense(1024, activation="relu", input_shape=(input_shape,)),
        layers.Dropout(0.3),
        layers.Dense(512, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(256, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation="softmax")
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_cnn_model(input_shape, num_classes):
    model = Sequential([
        layers.Conv1D(128, kernel_size=3, activation='relu', input_shape=(input_shape[0], 1)),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(64, kernel_size=3, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [3]:
def clean_up_cuda(model):
    # Delete the Keras model
    K.clear_session()
    del model
    
    # Run garbage collection
    gc.collect()
    
    # Free CUDA memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
    
    print("CUDA memory cleared and model deleted.")

In [4]:

def feature_extraction_best_corr_with_target(X,X_val, X_control, y, threshold=0.6, df_columns=None, number_of_features=40):
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X)
        if df_columns is not None:
            X.columns = df_columns
    if isinstance(y, np.ndarray):
        y = pd.Series(y)
    if isinstance(X_val, np.ndarray):
        X_val = pd.DataFrame(X_val)
        if df_columns is not None:
            X_val.columns = df_columns
    if isinstance(X_control, np.ndarray):
        X_control = pd.DataFrame(X_control)
        if df_columns is not None:
            X_control.columns = df_columns
    correlation_matrix = X.corrwith(y).abs()
    to_keep = correlation_matrix.sort_values(ascending=False).head(number_of_features).index
    X = X[to_keep]
    X_val = X_val[to_keep]
    X_control = X_control[to_keep]
    X_ret = X.to_numpy().copy()
    X_val_ret = X_val.to_numpy().copy()
    X_control_ret = X_control.to_numpy().copy()
    return X_ret, X_val_ret, X_control_ret


def feature_extraction_with_Pearson(X, X_val, X_control, y, threshold=0.6, df_columns=None):
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X)
        if df_columns is not None:
            X.columns = df_columns
    if isinstance(X_val, np.ndarray):
        X_val = pd.DataFrame(X_val)
        if df_columns is not None:
            X_val.columns = df_columns
    if isinstance(X_control, np.ndarray):
        X_control = pd.DataFrame(X_control)
        if df_columns is not None:
            X_control.columns = df_columns
    correlation_matrix = X.corr().abs()
    upper = correlation_matrix.where(np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool))
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    X = X.drop(columns=to_drop)
    X_val = X_val.drop(columns=to_drop)
    X_control = X_control.drop(columns=to_drop)
    X_ret = X.to_numpy().copy()
    X_val_ret = X_val.to_numpy().copy()
    X_control_ret = X_control.to_numpy().copy()
    return X_ret, X_val_ret, X_control_ret

def feature_extration_with_PCA(X, X_val, X_control, n_components):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    X_val_pca = pca.transform(X_val)
    X_control_pca = pca.transform(X_control)
    return X_pca, X_val_pca, X_control_pca

def feature_extration_with_BE(X, X_val, X_control, y, significance_level=0.05, df_columns=None):
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X)
        if df_columns is not None:
            X.columns = df_columns
    if isinstance(X_val, np.ndarray):
        X_val = pd.DataFrame(X_val)
        if df_columns is not None:
            X_val.columns = df_columns
    if isinstance(X_control, np.ndarray):
        X_control = pd.DataFrame(X_control)
        if df_columns is not None:
            X_control.columns = df_columns
    # Add constant for intercept
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)
    X = sm.add_constant(X)

    while True:
        # Fit the OLS model
        model = sm.OLS(y, X).fit()
        
        # Get the p-values for each feature
        p_values = model.pvalues
        
        # Find the feature with the highest p-value
        max_p_value = p_values.max()
        
        if max_p_value > significance_level:
            # Remove the feature with the highest p-value
            feature_to_remove = p_values.idxmax()
            print(f"Removing {feature_to_remove} with p-value {max_p_value:.4f}")
            X = X.drop(columns=[feature_to_remove])
            X_val = X_val.drop(columns=[feature_to_remove])
            X_control = X_control.drop(columns=[feature_to_remove])
        else:
            break
        print("Final Feature lengthe: ", len(X.columns))
    # Return the final selected feature set (excluding the intercept)
    X_ret = X.drop(columns=['const']).to_numpy().copy()
    X_val_ret = X_val.to_numpy().copy()
    X_control_ret = X_control.to_numpy().copy()
    return X_ret, X_val_ret, X_control_ret

In [5]:
# Example usage
def print_model_performance(results):
    """
    Print model performance metrics
    
    Parameters:
    results (dict): Performance metrics from evaluate_model_performance()
    """
    for metric, value in results.items():
        if metric == 'classification_report':
            print("\nClassification Report:")
            print(value)
        else:
            print(f"{metric.replace('_', ' ').title()}: {value}")
def aggregate_cv_metrics_and_print(all_results, model_name, tag="Validation"):
    """
    Aggregate cross-validation metrics
    
    Parameters:
    all_results (list): List of results dictionaries from each fold
    
    Returns:
    dict: Aggregated metrics with means and standard deviations
    """
    # Initialize aggregation dictionary
    aggregated = {
        'accuracy': [],
        'balanced_accuracy': [],
        'random_balanced_accuracy': [],
        'roc_auc': []
    }
    
    # Collect metrics from each fold
    for result in all_results:
        aggregated['accuracy'].append(result['accuracy'])
        aggregated['balanced_accuracy'].append(result['balanced_accuracy'])
        aggregated['random_balanced_accuracy'].append(result['random_balanced_accuracy'])
        aggregated['roc_auc'].append(result['roc_auc'])
    # Compute mean and standard deviation
    summary = {
        'mean_accuracy': np.mean(aggregated['accuracy']),
        'std_accuracy': np.std(aggregated['accuracy']),
        'mean_balanced_accuracy': np.mean(aggregated['balanced_accuracy']),
        'std_balanced_accuracy': np.std(aggregated['balanced_accuracy']),
        'mean_random_balanced_accuracy': np.mean(aggregated['random_balanced_accuracy']),
        'std_random_balanced_accuracy': np.std(aggregated['random_balanced_accuracy']),
        'mean_roc_auc': np.mean(aggregated['roc_auc']),
        'std_roc_auc': np.std(aggregated['roc_auc'])
    }
    
    print(f"\n {model_name} Classifier Performance {tag}:")
    print_model_performance(summary)
    return summary

In [6]:
os.makedirs("/opt/notebooks/TABPFN/02_UKB/00_data/age_label", exist_ok=True)
os.makedirs("/opt/notebooks/TABPFN/02_UKB/00_data/deconfounded_but_age", exist_ok=True)
mri_table = "aseg.volume_aparc.volume_aparc.thickness.csv"
""" # Load the age data
command = "dx download file-GyGfBQ8J34gPK8XXxbjYGbg4 --output /opt/notebooks/TABPFN/02_UKB/00_data/age_label/all_ages_all_ids_healthy.csv --overwrite"
subprocess.run(command, shell=True, check=True)
#load mri data
command = f"dx download file-GyGf9vjJ34g2g9QbJQ7P1qZG --output '/opt/notebooks/TABPFN/02_UKB/00_data/deconfounded_but_age/{mri_table}' --overwrite"
subprocess.run(command, shell=True, check=True) """

# Load the age data middle
command = "dx download file-GyJp51jJ34g246Y7bZ6j7yK4 --output /opt/notebooks/TABPFN/02_UKB/00_data/age_label/all_ages_all_ids_healthy.csv --overwrite"
subprocess.run(command, shell=True, check=True)
#load mri data cleand and renamed but age
command = f"dx download file-GyJp6B0J34g8xpf6Q6jz12xJ --output '/opt/notebooks/TABPFN/02_UKB/00_data/deconfounded_but_age/{mri_table}' --overwrite"
subprocess.run(command, shell=True, check=True)
df = pd.read_csv(f"../00_data/deconfounded_but_age/{mri_table}")
label_df = pd.read_csv("../00_data/age_label/all_ages_all_ids_healthy.csv")
n_splits = 5
label_col= "label_age_group"

label_df = label_df[['ID', 'label_age_group']]
merged_df = pd.merge(df, label_df, on='ID', how='inner')
merged_df.dropna(inplace=True)
df_sampled, _ = train_test_split(merged_df, train_size=10000, stratify=merged_df["label_age_group"], random_state=42)
print(label_df["label_age_group"].value_counts())
df_sampled["label_age_group"].value_counts()



label_age_group
8.0    6400
7.0    6350
6.0    5894
5.0    5436
9.0    4690
4.0    4681
3.0    4120
2.0    3299
1.0    1425
0.0     168
Name: count, dtype: int64


label_age_group
7.0    1501
8.0    1490
6.0    1377
5.0    1273
4.0    1097
9.0    1065
3.0     971
2.0     822
1.0     361
0.0      43
Name: count, dtype: int64

In [7]:
label_counts = merged_df["label_age_group"].value_counts()

# Include all rows for groups with fewer samples than the target threshold
threshold = 1000  # You can adjust this threshold as needed
small_groups = label_counts[label_counts <= threshold].index
small_groups_df = merged_df[merged_df["label_age_group"].isin(small_groups)]

# Calculate how many more samples are needed to reach 10,000
remaining_needed = 10000 - len(small_groups_df)

# Sample proportionally from the larger groups
large_groups = label_counts[label_counts > threshold].index
large_groups_df = merged_df[merged_df["label_age_group"].isin(large_groups)]

# Stratified sampling from the remaining data
proportional_sampled_df, _ = train_test_split(
    large_groups_df, 
    train_size=remaining_needed, 
    stratify=large_groups_df["label_age_group"], 
    random_state=42
)

# Combine the small groups and the proportional sample
final_sampled_df = pd.concat([small_groups_df, proportional_sampled_df])

# Verify the result
print(final_sampled_df["label_age_group"].value_counts())
print(f"Total samples: {len(final_sampled_df)}")

df_sampled = final_sampled_df

label_age_group
7.0    1482
8.0    1471
6.0    1360
5.0    1257
4.0    1083
9.0    1052
3.0     959
2.0     812
1.0     356
0.0     168
Name: count, dtype: int64
Total samples: 10000


In [8]:
os.makedirs("/opt/notebooks/TABPFN/02_UKB/00_data/validation_data/00_National_Cohort/", exist_ok=True)
#load middle age control data
command = "dx download file-GyK09JQJ34g95zyvV9vFxQFv --output /opt/notebooks/TABPFN/02_UKB/00_data/validation_data/00_National_Cohort/all_ages_all_ids_subset_middle_age.csv --overwrite"
subprocess.run(command, shell=True)
#load mri data
command = "dx download file-GyK08xjJ34g95zyvV9vFxQFf --output /opt/notebooks/TABPFN/02_UKB/00_data/validation_data/00_National_Cohort/aparc.thickness_aseg.volume_aparc.volume_deconfounded_but_age.csv --overwrite"
subprocess.run(command, shell=True)
label_df_control = pd.read_csv("../00_data/validation_data/00_National_Cohort/all_ages_all_ids_subset_middle_age.csv")
df_control = pd.read_csv("../00_data/validation_data/00_National_Cohort/aparc.thickness_aseg.volume_aparc.volume_deconfounded_but_age.csv")

label_df_control = label_df_control[['ID', 'label_age_group']]
merged_df_control = pd.merge(df_control, label_df_control, on='ID', how='inner')
merged_df_control.dropna(inplace=True)
#sample 400 so that from each group 25 samples if possible
target_samples_per_group = 25
grouped_df = merged_df_control.groupby('label_age_group')

# Sample 25 from each group if possible, otherwise sample all available
sampled_df = grouped_df.apply(lambda x: x.sample(n=min(target_samples_per_group, len(x)), random_state=42))

# Reset the index after sampling
sampled_df.reset_index(drop=True, inplace=True)

# Check if we reached the desired total number of 400 samples
if len(sampled_df) < 400:
    print(f"Only {len(sampled_df)} samples available after balanced sampling.")
else:
    print(f"Sampled {len(sampled_df)} rows with balanced distribution across groups.")

X_control_source = sampled_df.drop(["ID", "label_age_group"], axis=1)
y_control_source = sampled_df["label_age_group"]
sampled_df["label_age_group"].value_counts()


Only 250 samples available after balanced sampling.


label_age_group
0    25
1    25
2    25
3    25
4    25
5    25
6    25
7    25
8    25
9    25
Name: count, dtype: int64

In [9]:
column_control = df_sampled.drop(["ID", "label_age_group"], axis=1).columns
X_control = X_control_source[column_control]

In [10]:
len(X_control.columns)

192

In [11]:
def evaluate_model_performance_train(y_test, y_pred, y_pred_proba, y_val_bin=None):
    # Compute basic metrics
    acc = accuracy_score(y_test, y_pred)
    balanced_acc = balanced_accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    # Random comparison
    n_classes = len(np.unique(y_test))
    random_y_test = np.random.randint(0, n_classes, size=y_test.shape)
    random_balanced_acc = balanced_accuracy_score(random_y_test, y_pred)
    
    # ROC AUC (if probabilities provided)
    if y_val_bin is not None:
        y_test = y_val_bin
    auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='macro')
    
    # Prepare results
    results = {
        'accuracy': acc,
        'balanced_accuracy': balanced_acc,
        'random_balanced_accuracy': random_balanced_acc,
        'classification_report': report
    }
    
    if auc is not None:
        results['roc_auc'] = auc
    
    return results, balanced_acc




In [12]:
def predict_and_evaluate(model, X_val, y_val, original_classes=None, multi_class=True):
    if multi_class:
        y_pred_proba = model.predict_proba(X_val)
        y_pred = np.argmax(y_pred_proba, axis=1)
    else:
        y_pred_proba = model.predict_proba(X_val)
        y_pred = model.predict(X_val)
        #print(y_pred)
    
    # Get unique classes present in validation data
    present_classes = np.unique(y_val)
    
    # Get the indices of these classes in the original prediction probabilities
    print(original_classes)
    class_indices = [np.where(original_classes == cls)[0][0] for cls in present_classes]
    
    # Select only the probability columns for present classes
    y_pred_proba_filtered = y_pred_proba[:, class_indices]
    
    # Binarize the true labels using only the present classes
    y_val_bin = label_binarize(y_val, classes=present_classes)

    results, balanced_acc = evaluate_model_performance_train(y_val, y_pred, y_pred_proba_filtered, y_val_bin)
    print_model_performance(results)
    return results, balanced_acc

In [13]:

percentage_of_the_data = [1, 0.9, 0.8, 0.5, 0.3]
#percentage_of_the_data = [0.01]
data = []
percentage_dict = {}
best_mse_mlp = float('inf')
best_mse_lgb = float('inf')
best_mse_tab = float('inf')
deconfounding_strategies = ["BE", "Correlation_in_Feature","Correlation_with_target", "PCA", "Nothing"]
for percentage in percentage_of_the_data:
        percentage_dict[percentage] = {}
        for deconfounding_strategy in deconfounding_strategies:
                print(f"\n=== Deconfounding Strategy: {deconfounding_strategy} ===")
                if percentage == 1:
                        print(f"\n #### TRAINING WITH {percentage} OF THE DATA ####")
                        df_sampled_subset = df_sampled
                else:
                        print(f"\n #### TRAINING WITH {percentage} OF THE DATA ####")
                        df_sampled_subset, _ = train_test_split(
                        df_sampled,
                        train_size=percentage,  # Use train_size to get desired percentage
                        stratify=df_sampled["label_age_group"],
                        random_state=42
                        )

                y = df_sampled_subset["label_age_group"]
                X = df_sampled_subset.drop(["ID", "label_age_group"], axis=1)

                print(f"Training data shape: {X.shape}, length of y: {len(y)}")
                print(f"Training data class distribution: {y.value_counts()}")
                

                skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
                cv_results = {
                        'accuracy': [],
                        'balanced_accuracy': [],
                        'roc_auc': [],
                        'classification_reports': []
                }

                tabpfn_results = []
                tabpfn_results_eval = []
                lgb_results = []
                lgb_results_eval = []
                random_results = []
                mlp_results = []
                mlp_results_eval = []
                cnn_results = []
                cnn_results_eval = []
                model_dict = {}
                model_results = {}


                best_balanced_accuracy_mlp = 0
                best_balanced_accuracy_tabpfn = 0
                best_balanced_accuracy_lgb = 0
                for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):
                        unique_classes = np.unique(y)
                        missing_classes = [cls for cls in unique_classes if cls not in y.iloc[val_index]]
                        for cls in missing_classes:
                                cls_indices = np.where(y == cls)[0]  # Get all indices of the missing class
                                # Check if removing a sample would leave train set empty for the class
                                train_cls_indices = np.intersect1d(cls_indices, train_index)

                                if len(train_cls_indices) <= 1:
                                        # If moving the last one, instead take a duplicate from the whole y array
                                        cls_idx_to_move = np.random.choice(cls_indices, 1)[0]
                                else:
                                        cls_idx_to_move = np.random.choice(train_cls_indices, 1)[0]
                                # Add to validation set
                                val_index = np.append(val_index, cls_idx_to_move)
                                # Remove only if it's not the last one in train
                                if len(train_cls_indices) > 1:
                                        train_index = np.setdiff1d(train_index, cls_idx_to_move)
                        print(f"\nFold {fold}")
                        X_train, X_test = X.iloc[train_index], X.iloc[val_index]
                        y_train, y_test = y.iloc[train_index], y.iloc[val_index]
                        X_control = X_control_source.copy()
                        y_control = y_control_source.copy()

                        #check if columns between control and trai nare the same
                        try:
                                column_control = X_train.columns
                                X_control = X_control[column_control]
                        except Exception as e:
                                print("Columns are not the same")
                                print(e)
                        #scaler = MinMaxScaler()
                        df_columns = X.columns
                        scaler = StandardScaler()
                        X_train_scaled = scaler.fit_transform(X_train)
                        X_test_scaled = scaler.transform(X_test)
                        X_control_scaled = scaler.fit_transform(X_control)
                        if deconfounding_strategy == "BE":
                                X_train, X_test, X_control = feature_extration_with_BE(X_train_scaled, X_test_scaled, X_control_scaled, y_train, df_columns=df_columns)
                        elif deconfounding_strategy == "PCA":
                                X_train, X_test, X_control= feature_extration_with_PCA(X_train_scaled, X_test_scaled, X_control_scaled,  n_components=50)
                        elif deconfounding_strategy == "Correlation_in_Feature":
                                X_train, X_test, X_control = feature_extraction_with_Pearson(X_train_scaled, X_test_scaled, X_control_scaled, y_train, threshold=0.6, df_columns=df_columns)
                        elif deconfounding_strategy == "Correlation_with_target":
                                X_train, X_test, X_control = feature_extraction_best_corr_with_target(X_train_scaled, X_test_scaled, X_control_scaled, y_train, threshold=0.6, df_columns=df_columns)
                        elif deconfounding_strategy == "Nothing":
                                X_train, X_test, X_control = X_train_scaled, X_test_scaled, X_control_scaled
                        n_classes = len(np.unique(y_test))
                        random_y_test = np.random.randint(0, n_classes, size=y_test.shape)
                        random_y_pred_proba = np.random.rand(len(y_test), n_classes)
                        random_y_pred_proba /= random_y_pred_proba.sum(axis=1)[:, np.newaxis]
                        results, balanced_accuracy  = evaluate_model_performance_train(y_test, random_y_test, random_y_pred_proba)
                        print("RANDOM PERFORMANCE")
                        print_model_performance(results)
                        random_results.append(results)

                        tabclf = TabPFNClassifier()
                        tabclf.fit(X_train, y_train)
                        y_pred_proba = tabclf.predict_proba(X_test)
                        y_pred = tabclf.predict(X_test)
                        results, balanced_accuracy = evaluate_model_performance_train(y_test, y_pred, y_pred_proba)
                        print("tabpfn PERFORMANCE")
                        print_model_performance(results)
                        tabpfn_results.append(results)
                        #model_dict["tabpfn"] = tabclf
                        original_classes = tabclf.classes_
                        print("tabpfn PERFORMANCE FOR CONTROL")
                        results, balanced_accuracy = predict_and_evaluate(tabclf, X_control, y_control, original_classes=original_classes)
                        tabpfn_results_eval.append(results)
                        if balanced_accuracy > best_balanced_accuracy_tabpfn:
                                best_balanced_accuracy_tabpfn = balanced_accuracy
                                model_dict["tabpfn"] = tabclf
                        clean_up_cuda(tabclf)
                        

                random_summary = aggregate_cv_metrics_and_print(random_results, "Random")
                tabpfn_summary = aggregate_cv_metrics_and_print(tabpfn_results, "TabPFN")
                tabpfn_eval_summary = aggregate_cv_metrics_and_print(tabpfn_results_eval, "TabPFN", "Control")
              
                percentage_dict[percentage][deconfounding_strategy] = {
                "TabPFN": {
                        "results": tabpfn_summary,
                        "results_eval": tabpfn_eval_summary,
                        "cv_results": tabpfn_results,
                        "cv_results_eval": tabpfn_results_eval
                },
                 "Random": {
                    "results": random_summary,
                    "results_eval": random_summary,
                    "cv_results": random_results,
                    "cv_results_eval": random_results,

                },
            }
        Feature_extraction_applied = False
        Pretraining_applied = False
        all_rows = []
        log_file = "/opt/notebooks/results_classification.csv"

        # Iterate over percentages and their associated models
        for percentage, models in percentage_dict.items():
            for feature_extraction, feature_summary_dict in models.items():
                for model_name, train_summary in feature_summary_dict.items():
                    for i, (cv_result, cv_result_eval) in enumerate(zip(train_summary["cv_results"], train_summary["cv_results_eval"])):
                        # Prepare training row
                        row_train = {
                            "label_col": label_col,
                            "mri_table": mri_table,
                            "test_set_size": f"{(1 - percentage):.2%} (approx. of data left for test)",
                            "Feature_extraction_applied": Feature_extraction_applied,
                            "Pretraining_applied": Pretraining_applied,
                            "model_type": model_name,
                            "Accuracy": cv_result.get("accuracy", None),
                            "AUC": cv_result.get("roc_auc", None),  # Adjust the key if your aggregator uses a different one
                            "Balanced_ACC": cv_result.get("balanced_accuracy", None),
                            "Permutation_Balanced_ACC": cv_result.get("random_balanced_accuracy", None),
                            "number_of_cross_validations": n_splits,
                            "cross_validation_count": i,
                            "search_term": f"{percentage}_{feature_extraction}_{model_name}_train",
                            "percentage_of_data": percentage,  # Storing the used fraction
                            "eval_or_train": "train"
                        }

                        # Prepare evaluation row
                        row_eval = {
                            "label_col": label_col,
                            "mri_table": mri_table,
                            "test_set_size": f"{(1 - percentage):.2%} (approx. of data left for test)",
                            "Feature_extraction_applied": Feature_extraction_applied,
                            "Pretraining_applied": Pretraining_applied,
                            "model_type": model_name,
                            "Accuracy": cv_result_eval.get("accuracy", None),
                            "AUC": cv_result_eval.get("roc_auc", None),  # Adjust the key if your aggregator uses a different one
                            "Balanced_ACC": cv_result_eval.get("balanced_accuracy", None),
                            "Permutation_Balanced_ACC": cv_result_eval.get("random_balanced_accuracy", None),
                            "number_of_cross_validations": n_splits,
                            "cross_validation_count": i,
                            "search_term": f"{percentage}_{feature_extraction}_{model_name}_eval",
                            "percentage_of_data": percentage,  # Storing the used fraction
                            "eval_or_train": "eval"
                        }

                        # Append both rows to the main list
                        all_rows.append(row_train)
                        all_rows.append(row_eval)

        # Convert the list of dictionaries to a DataFrame
        df_results = pd.DataFrame(all_rows)

        # Save the DataFrame to a CSV file
        df_results.to_csv(log_file, index=False)
        logs_path = "project-GqzxkVQJ34g6ygFJ4ZbvqBYF:/Esra/00_CLIP/01_training_logs/"
        label = os.environ.get("DX_JOB_ID") 
        logs_path_label = os.path.join(logs_path, label)
        dx_mkdir_command = f"dx mkdir '{logs_path_label}'"
        subprocess.run(dx_mkdir_command, shell=True)
        time_tag = pd.Timestamp.now().strftime("%Y-%m-%d_%H-%M-%S")
        command_csv = f"dx upload '{log_file}' --path '{logs_path_label}/{time_tag}_result_tabpfn.csv'"
        subprocess.run(command_csv, shell=True)


=== Deconfounding Strategy: BE ===

 #### TRAINING WITH 1 OF THE DATA ####
Training data shape: (10000, 192), length of y: 10000
Training data class distribution: label_age_group
7.0    1482
8.0    1471
6.0    1360
5.0    1257
4.0    1083
9.0    1052
3.0     959
2.0     812
1.0     356
0.0     168
Name: count, dtype: int64

Fold 1
Removing Right-non-WM-hypointensities with p-value 0.9858
Final Feature lengthe:  192
Removing lh_fusiform_volume with p-value 0.9797
Final Feature lengthe:  191
Removing lh_inferiortemporal_thickness with p-value 0.8916
Final Feature lengthe:  190
Removing rh_frontalpole_volume with p-value 0.8875
Final Feature lengthe:  189
Removing Left-WM-hypointensities with p-value 0.9869
Final Feature lengthe:  188
Removing Left-non-WM-hypointensities with p-value 0.9103
Final Feature lengthe:  187
Removing rh_rostralmiddlefrontal_thickness with p-value 0.8676
Final Feature lengthe:  186
Removing lh_caudalanteriorcingulate_thickness with p-value 0.8624
Final Feature l

In [14]:
def terminate_instance():
    job_id = os.environ.get("DX_JOB_ID")
    if job_id:
        print(f"Terminating job: {job_id}")
        # Terminate the job using dx terminate
        subprocess.run(["dx", "terminate", job_id], check=True)

In [15]:
#terminate_instance()