In [None]:
import csv
import os
import random
import pickle
import gc
import pandas as pd
import numpy as np
from scipy import stats
import sklearn
import warnings
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

# Pool results together

## Body

In [None]:
# Pool all results together: body
five_folds = []
folds = range(0,5)

modalities = ['body']
algorithm = '0'
merge_type = 'outer'
base_path = '/UK_BB/brainbody'

for modality in modalities:
    stacking_path = os.path.join(base_path, 'stacking', modality)
    cognition_path = os.path.join(base_path, 'cognition')

    for fold in folds:
        # PROPER os.path.join usage - build path step by step
        result_path = os.path.join(
            stacking_path, 
            'folds', 
            f'fold_{fold}', 
            'models', 
            f'{modality}_{algorithm}_{merge_type}_stacked_result_fold_{fold}.csv'
        )
        
        # Check if file exists before reading
        if not os.path.exists(result_path):
            print(f"ERROR: File not found: {result_path}")
            print(f"Full path: {os.path.abspath(result_path)}")
            # List what's actually in the directory
            models_dir = os.path.join(stacking_path, 'folds', f'fold_{fold}', 'models')
            if os.path.exists(models_dir):
                print(f"Files in {models_dir}:")
                for f in os.listdir(models_dir):
                    print(f"  - {f}")
            else:
                print(f"Directory doesn't exist: {models_dir}")
            continue  # Skip to next fold or break
        
        # Read the result file
        result = pd.read_csv(result_path)
            
        # Get number of features and subjects - also fix these paths
        test_data_path = os.path.join(
            stacking_path, 
            'features_test_level1_stacked_inner',
            f'features_test_level1_inner_g_matched_fold_{fold}.csv'
        )
        
        train_data_path = os.path.join(
            stacking_path,
            'features_train_level1_stacked_inner',
            f'features_train_level1_inner_g_matched_fold_{fold}.csv'
        )
        
        # Check these files too
        if not os.path.exists(test_data_path):
            print(f"ERROR: Test data not found: {test_data_path}")
            continue
            
        if not os.path.exists(train_data_path):
            print(f"ERROR: Train data not found: {train_data_path}")
            continue
        
        test_data = pd.read_csv(test_data_path).drop(columns=['eid', 'g'])
        n_features = test_data.shape[1]
        n_test = test_data.shape[0]
        
        n_train = pd.read_csv(train_data_path).drop(columns=['eid', 'g']).shape[0]
        
        result['N Features'] = n_features
        result['N Train'] = n_train
        result['N Test'] = n_test
            
        # Rename modality
        rename_dict = {'body': 'Body'}
        modality_rename = rename_dict.get(modality, modality)
        result['Modality'] = modality_rename
            
        # Add domain information
        domain_dict = {
            'smri': 'sMRI',
            'dwi': 'dwMRI', 
            'rs': 'rsMRI',
            'body': 'Body'
        }
        result['Domain'] = domain_dict.get(modality, 'Unknown')
            
        five_folds.append(result)

five_folds_all_modalities = pd.concat(five_folds, ignore_index=False)

# Remove underscores from column names
five_folds_all_modalities.columns = [col.replace('_', ' ') for col in five_folds_all_modalities.columns]

# Average across folds
five_folds_all_modalities_mean = (
    five_folds_all_modalities
    .groupby(['Modality', 'Domain'])
    .agg({
        'Test R2': 'mean',
        'Test Pearson r': 'mean',
        'Test MSE': 'mean',
        'Test MAE': 'mean',
        'Train R2': 'mean',
        'Train Pearson r': 'mean',
        'Train MSE': 'mean',
        'Train MAE': 'mean',
        'N Train': 'mean',
        'N Test': 'mean',
        'N Features': 'first'
    })
    .round({
        'Test R2': 3,
        'Test Pearson r': 2,
        'Test MSE': 3,
        'Test MAE': 3,
        'Train R2': 3,
        'Train Pearson r': 2,
        'Train MSE': 2,
        'Train MAE': 2,
        'N Train': 0,
        'N Test': 0
    })
    .sort_values(by='Test R2', ascending=False)
    .reset_index()
)

five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']] = five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']].astype(int)

# Display the results
with pd.option_context('display.max_rows', None):
    display(five_folds_all_modalities_mean)

Unnamed: 0,Modality,Domain,Test R2,Test Pearson r,Test MSE,Test MAE,Train R2,Train Pearson r,Train MSE,Train MAE,N Train,N Test,N Features
0,Body,Body,0.163,0.4,0.835,0.722,0.357,0.62,0.64,0.63,1146.0,286.0,19


## Brain

In [None]:
# Define brain modalities
all_modalities = [
'smri',
'dwi',
'rs']

modality_names = {
'smri': 'sMRI',
'dwi': 'dwMRI',
'rs': 'rsMRI'
}

###############################

In [None]:
# Pool all results together: sMRI
five_folds = []
folds = range(0,5)

modalities = ['smri']
algorithm = 'rf'
base_path = '/UK_BB/brainbody'

for modality in modalities:
    stacking_path = os.path.join(base_path, f'stacking/brain/{modality}')

    for fold in folds:
        # Read the result file
        result = pd.read_csv(os.path.join(stacking_path, f'folds/fold_{fold}/models/{modality}_{algorithm}_stacked_result_fold_{fold}.csv'))
            
        # Get number of features and subjects
        test_data_path = (os.path.join(stacking_path, f'features_test_level1_stacked_inner/features_test_level1_inner_g_matched_fold_{fold}.csv'))
        train_data_path = (os.path.join(stacking_path, f'features_train_level1_stacked_inner/features_train_level1_inner_g_matched_fold_{fold}.csv'))
        
        test_data = pd.read_csv(test_data_path).drop(columns=['eid', 'g'])
        n_features = test_data.shape[1]
        n_test = test_data.shape[0]
        
        n_train = pd.read_csv(train_data_path).drop(columns=['eid', 'g']).shape[0]
        
            
        result['N Features'] = n_features
        result['N Train'] = n_train
        result['N Test'] = n_test
            
        # Rename modality using the dictionary
        modality_rename = modality_names.get(modality, modality)
        result['Modality'] = modality_rename
            
        # Add domain information
        if modality == 'smri':
            domain = 'sMRI'
        elif modality == 'dwi':
            domain = 'dwMRI'
        elif modality == 'rs':
            domain = 'rsMRI'
        else:
            domain = 'Unknown'
            
        result['Domain'] = domain
            
        five_folds.append(result)
            
five_folds_all_modalities = pd.concat(five_folds, ignore_index=False)

# Remove underscores from column names
five_folds_all_modalities.columns = [col.replace('_', ' ') for col in five_folds_all_modalities.columns]

# Average across folds

column_formatting = {
    'Test R2': '$R$^2 Test',
    'Test Pearson r': 'Pearson $r$ Test',
    'Test MSE': '$MSE$ Test',
    'Test MAE': '$MAE$ Test',
    'Train R2': '$R$^2 Train',
    'Train Pearson r': 'Pearson $r$ Train',
    'Train MSE': '$MSE$ Train',
    'Train MAE': '$MAE$ Train',
    'N Train': '$N$ Train',
    'N Test': '$N$ Test',
    'N Features': '$N$ Features'
}


five_folds_all_modalities_mean = (
    five_folds_all_modalities
    .groupby(['Modality', 'Domain'])
    .agg({
        'Test R2': 'mean',
        'Test Pearson r': 'mean',
        'Test MSE': 'mean',
        'Test MAE': 'mean',
        'Train R2': 'mean',
        'Train Pearson r': 'mean',
        'Train MSE': 'mean',
        'Train MAE': 'mean',
        'N Train': 'mean',
        'N Test': 'mean',
        'N Features': 'first'
    })
    .round({
        'Test R2': 3,
        'Test Pearson r': 2,
        'Test MSE': 3,
        'Test MAE': 3,
        'Train R2': 3,
        'Train Pearson r': 2,
        'Train MSE': 2,
        'Train MAE': 2,
        'N Train': 0,
        'N Test': 0
    })
    .sort_values(by='Test R2', ascending=False)
    .reset_index()#.rename(columns=column_formatting)
)

five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']] = five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']].astype(int)
# Display the results
with pd.option_context('display.max_rows', None):
    display(five_folds_all_modalities_mean)

output_result_path = '/UK_BB/brainbody/result/1level/XGB'

# Save results
five_folds_all_modalities = five_folds_all_modalities.sort_values(by='Test R2', ascending=False)

Unnamed: 0,Modality,Domain,Test R2,Test Pearson r,Test MSE,Test MAE,Train R2,Train Pearson r,Train MSE,Train MAE,N Train,N Test,N Features
0,sMRI,sMRI,0.098,0.34,0.901,0.749,0.411,0.65,0.59,0.61,21700.0,5425.0,21


###############################

In [None]:
# Pool all results together: dwMRI
five_folds = []
folds = range(0,5)

modalities = ['dwi']
algorithm = 'rf'
base_path = '/UK_BB/brainbody'

for modality in modalities:
    stacking_path = os.path.join(base_path, f'stacking/brain/{modality}')

    for fold in folds:
        # Read the result file
        result = pd.read_csv(os.path.join(stacking_path, f'rf-old/folds/fold_{fold}/models/{modality}_{algorithm}_stacked_result_fold_{fold}.csv'))
            
        # Get number of features and subjects
        test_data_path = (os.path.join(stacking_path, f'features_test_level1_stacked_inner/features_test_level1_inner_g_matched_fold_{fold}.csv'))
        train_data_path = (os.path.join(stacking_path, f'features_train_level1_stacked_inner/features_train_level1_inner_g_matched_fold_{fold}.csv'))
        
        test_data = pd.read_csv(test_data_path).drop(columns=['eid', 'g'])
        n_features = test_data.shape[1]
        n_test = test_data.shape[0]
        
        n_train = pd.read_csv(train_data_path).drop(columns=['eid', 'g']).shape[0]
        
            
        result['N Features'] = n_features
        result['N Train'] = n_train
        result['N Test'] = n_test
            
        # Rename modality using the dictionary
        modality_rename = modality_names.get(modality, modality)
        result['Modality'] = modality_rename
            
        # Add domain information
        if modality == 'smri':
            domain = 'sMRI'
        elif modality == 'dwi':
            domain = 'dwMRI'
        elif modality == 'rs':
            domain = 'rsMRI'
        else:
            domain = 'Unknown'
            
        result['Domain'] = domain
            
        five_folds.append(result)
            
five_folds_all_modalities = pd.concat(five_folds, ignore_index=False)

# Remove underscores from column names
five_folds_all_modalities.columns = [col.replace('_', ' ') for col in five_folds_all_modalities.columns]

# Average across folds

column_formatting = {
    'Test R2': '$R$^2 Test',
    'Test Pearson r': 'Pearson $r$ Test',
    'Test MSE': '$MSE$ Test',
    'Test MAE': '$MAE$ Test',
    'Train R2': '$R$^2 Train',
    'Train Pearson r': 'Pearson $r$ Train',
    'Train MSE': '$MSE$ Train',
    'Train MAE': '$MAE$ Train',
    'N Train': '$N$ Train',
    'N Test': '$N$ Test',
    'N Features': '$N$ Features'
}


five_folds_all_modalities_mean = (
    five_folds_all_modalities
    .groupby(['Modality', 'Domain'])
    .agg({
        'Test R2': 'mean',
        'Test Pearson r': 'mean',
        'Test MSE': 'mean',
        'Test MAE': 'mean',
        'Train R2': 'mean',
        'Train Pearson r': 'mean',
        'Train MSE': 'mean',
        'Train MAE': 'mean',
        'N Train': 'mean',
        'N Test': 'mean',
        'N Features': 'first'
    })
    .round({
        'Test R2': 3,
        'Test Pearson r': 2,
        'Test MSE': 3,
        'Test MAE': 3,
        'Train R2': 3,
        'Train Pearson r': 2,
        'Train MSE': 2,
        'Train MAE': 2,
        'N Train': 0,
        'N Test': 0
    })
    .sort_values(by='Test R2', ascending=False)
    .reset_index()#.rename(columns=column_formatting)
)

five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']] = five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']].astype(int)
# Display the results
with pd.option_context('display.max_rows', None):
    display(five_folds_all_modalities_mean)

output_result_path = '/UK_BB/brainbody/result/1level/XGB'

# Save results
five_folds_all_modalities = five_folds_all_modalities.sort_values(by='Test R2', ascending=False)

Unnamed: 0,Modality,Domain,Test R2,Test Pearson r,Test MSE,Test MAE,Train R2,Train Pearson r,Train MSE,Train MAE,N Train,N Test,N Features
0,dwMRI,dwMRI,0.171,0.44,0.828,0.72,0.688,0.84,0.31,0.44,18789.0,5162.0,42


###############################

In [None]:
# Pool all results together: rsMRI
five_folds = []
folds = range(0,5)

modalities = ['rs']
algorithm = 'rf'
base_path = '/UK_BB/brainbody'

for modality in modalities:
    stacking_path = os.path.join(base_path, f'stacking/brain/{modality}')

    for fold in folds:
        # Read the result file
        result = pd.read_csv(os.path.join(stacking_path, f'rf-old/folds/fold_{fold}/models/{modality}_{algorithm}_stacked_result_fold_{fold}.csv'))
            
        # Get number of features and subjects
        test_data_path = (os.path.join(stacking_path, f'features_test_level1_stacked_inner/features_test_level1_inner_g_matched_fold_{fold}.csv'))
        train_data_path = (os.path.join(stacking_path, f'features_train_level1_stacked_inner/features_train_level1_inner_g_matched_fold_{fold}.csv'))
        
        test_data = pd.read_csv(test_data_path).drop(columns=['eid', 'g'])
        n_features = test_data.shape[1]
        n_test = test_data.shape[0]
        
        n_train = pd.read_csv(train_data_path).drop(columns=['eid', 'g']).shape[0]
        
            
        result['N Features'] = n_features
        result['N Train'] = n_train
        result['N Test'] = n_test
            
        # Rename modality using the dictionary
        modality_rename = modality_names.get(modality, modality)
        result['Modality'] = modality_rename
            
        # Add domain information
        if modality == 'smri':
            domain = 'sMRI'
        elif modality == 'dwi':
            domain = 'dwMRI'
        elif modality == 'rs':
            domain = 'rsMRI'
        else:
            domain = 'Unknown'
            
        result['Domain'] = domain
            
        five_folds.append(result)
            
five_folds_all_modalities = pd.concat(five_folds, ignore_index=False)

# Remove underscores from column names
five_folds_all_modalities.columns = [col.replace('_', ' ') for col in five_folds_all_modalities.columns]

# Average across folds

column_formatting = {
    'Test R2': '$R$^2 Test',
    'Test Pearson r': 'Pearson $r$ Test',
    'Test MSE': '$MSE$ Test',
    'Test MAE': '$MAE$ Test',
    'Train R2': '$R$^2 Train',
    'Train Pearson r': 'Pearson $r$ Train',
    'Train MSE': '$MSE$ Train',
    'Train MAE': '$MAE$ Train',
    'N Train': '$N$ Train',
    'N Test': '$N$ Test',
    'N Features': '$N$ Features'
}


five_folds_all_modalities_mean = (
    five_folds_all_modalities
    .groupby(['Modality', 'Domain'])
    .agg({
        'Test R2': 'mean',
        'Test Pearson r': 'mean',
        'Test MSE': 'mean',
        'Test MAE': 'mean',
        'Train R2': 'mean',
        'Train Pearson r': 'mean',
        'Train MSE': 'mean',
        'Train MAE': 'mean',
        'N Train': 'mean',
        'N Test': 'mean',
        'N Features': 'first'
    })
    .round({
        'Test R2': 3,
        'Test Pearson r': 2,
        'Test MSE': 3,
        'Test MAE': 3,
        'Train R2': 3,
        'Train Pearson r': 2,
        'Train MSE': 2,
        'Train MAE': 2,
        'N Train': 0,
        'N Test': 0
    })
    .sort_values(by='Test R2', ascending=False)
    .reset_index()#.rename(columns=column_formatting)
)

five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']] = five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']].astype(int)
# Display the results
with pd.option_context('display.max_rows', None):
    display(five_folds_all_modalities_mean)

output_result_path = '/UK_BB/brainbody/result/1level/XGB'

# Save results
five_folds_all_modalities = five_folds_all_modalities.sort_values(by='Test R2', ascending=False)

Unnamed: 0,Modality,Domain,Test R2,Test Pearson r,Test MSE,Test MAE,Train R2,Train Pearson r,Train MSE,Train MAE,N Train,N Test,N Features
0,rsMRI,rsMRI,0.128,0.37,0.871,0.737,0.834,0.92,0.17,0.32,20327.0,5082.0,18


#######################################

In [None]:
# Pool all results together: all MRI
five_folds = []
folds = range(0,5)

modalities = ['allmri']
algorithm = 'rf'
base_path = '/UK_BB/brainbody'

for modality in modalities:
    stacking_path = os.path.join(base_path, f'stacking/brain/{modality}')

    for fold in folds:
        # Read the result file
        result = pd.read_csv(os.path.join(stacking_path, f'folds/fold_{fold}/models/{modality}_{algorithm}_stacked_result_fold_{fold}.csv'))
            
        # Get number of features and subjects
        test_data_path = (os.path.join(stacking_path, f'features_test_level1_stacked_inner/features_test_level1_inner_g_matched_fold_{fold}.csv'))
        train_data_path = (os.path.join(stacking_path, f'features_train_level1_stacked_inner/features_train_level1_inner_g_matched_fold_{fold}.csv'))
        
        test_data = pd.read_csv(test_data_path).drop(columns=['eid', 'g'])
        n_features = test_data.shape[1]
        n_test = test_data.shape[0]
        
        n_train = pd.read_csv(train_data_path).drop(columns=['eid', 'g']).shape[0]
        
            
        result['N Features'] = n_features
        result['N Train'] = n_train
        result['N Test'] = n_test
            
        # Rename modality using the dictionary
        modality_rename = modality_names.get(modality, modality)
        result['Modality'] = modality_rename
            
        # Add domain information
        if modality == 'smri':
            domain = 'sMRI'
        elif modality == 'dwi':
            domain = 'dwMRI'
        elif modality == 'rs':
            domain = 'rsMRI'
        elif modality == 'allmri':
            domain = 'MRI all'
        else:
            domain = 'Unknown'
            
        result['Domain'] = domain
            
        five_folds.append(result)
            
five_folds_all_modalities = pd.concat(five_folds, ignore_index=False)

# Remove underscores from column names
five_folds_all_modalities.columns = [col.replace('_', ' ') for col in five_folds_all_modalities.columns]

# Average across folds

column_formatting = {
    'Test R2': '$R$^2 Test',
    'Test Pearson r': 'Pearson $r$ Test',
    'Test MSE': '$MSE$ Test',
    'Test MAE': '$MAE$ Test',
    'Train R2': '$R$^2 Train',
    'Train Pearson r': 'Pearson $r$ Train',
    'Train MSE': '$MSE$ Train',
    'Train MAE': '$MAE$ Train',
    'N Train': '$N$ Train',
    'N Test': '$N$ Test',
    'N Features': '$N$ Features'
}


five_folds_all_modalities_mean = (
    five_folds_all_modalities
    .groupby(['Modality', 'Domain'])
    .agg({
        'Test R2': 'mean',
        'Test Pearson r': 'mean',
        'Test MSE': 'mean',
        'Test MAE': 'mean',
        'Train R2': 'mean',
        'Train Pearson r': 'mean',
        'Train MSE': 'mean',
        'Train MAE': 'mean',
        'N Train': 'mean',
        'N Test': 'mean',
        'N Features': 'first'
    })
    .round({
        'Test R2': 3,
        'Test Pearson r': 2,
        'Test MSE': 3,
        'Test MAE': 3,
        'Train R2': 3,
        'Train Pearson r': 2,
        'Train MSE': 2,
        'Train MAE': 2,
        'N Train': 0,
        'N Test': 0
    })
    .sort_values(by='Test R2', ascending=False)
    .reset_index()#.rename(columns=column_formatting)
)

five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']] = five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']].astype(int)
# Display the results
with pd.option_context('display.max_rows', None):
    display(five_folds_all_modalities_mean)

output_result_path = '/UK_BB/brainbody/result/1level/XGB'

# Save results
five_folds_all_modalities = five_folds_all_modalities.sort_values(by='Test R2', ascending=False)

Unnamed: 0,Modality,Domain,Test R2,Test Pearson r,Test MSE,Test MAE,Train R2,Train Pearson r,Train MSE,Train MAE,N Train,N Test,N Features
0,allmri,MRI all,0.227,0.48,0.772,0.694,0.835,0.92,0.16,0.32,20277.0,5069.0,81


## Body and brain

In [None]:
# Pool all results together: body and brain
five_folds = []
folds = range(0,5)

modalities = ['brain-body']
algorithm = '0'
merge_type = 'outer'
base_path = '/UK_BB/brainbody'

for modality in modalities:
    stacking_path = os.path.join(base_path, 'stacking', modality)
    cognition_path = os.path.join(base_path, 'cognition')

    for fold in folds:
        # PROPER os.path.join usage - build path step by step
        result_path = os.path.join(
            stacking_path, 
            'folds', 
            f'fold_{fold}', 
            'models', 
            f'{modality}_{algorithm}_{merge_type}_stacked_result_fold_{fold}.csv'
        )
        
        # Read the result file
        result = pd.read_csv(result_path)
            
        # Get number of features and subjects - also fix these paths
        test_data_path = os.path.join(
            stacking_path, 
            'features_test_level1_stacked_inner',
            f'features_test_level1_inner_g_matched_fold_{fold}.csv'
        )
        
        train_data_path = os.path.join(
            stacking_path,
            'features_train_level1_stacked_inner',
            f'features_train_level1_inner_g_matched_fold_{fold}.csv'
        )
        
        # Check these files too
        if not os.path.exists(test_data_path):
            print(f"ERROR: Test data not found: {test_data_path}")
            continue
            
        if not os.path.exists(train_data_path):
            print(f"ERROR: Train data not found: {train_data_path}")
            continue
        
        test_data = pd.read_csv(test_data_path).drop(columns=['eid', 'g'])
        n_features = test_data.shape[1]
        n_test = test_data.shape[0]
        
        n_train = pd.read_csv(train_data_path).drop(columns=['eid', 'g']).shape[0]
        
        result['N Features'] = n_features
        result['N Train'] = n_train
        result['N Test'] = n_test
            
        # Rename modality
        rename_dict = {'brain-body': 'Body and Brain'}
        modality_rename = rename_dict.get(modality, modality)
        result['Modality'] = modality_rename
            
        # Add domain information
        domain_dict = {
            'smri': 'sMRI',
            'dwi': 'dwMRI', 
            'rs': 'rsMRI',
            'body': 'Body',
            'brain-body': 'Brain and Body'
        }
        result['Domain'] = domain_dict.get(modality, 'Unknown')
            
        five_folds.append(result)

five_folds_all_modalities = pd.concat(five_folds, ignore_index=False)

# Remove underscores from column names
five_folds_all_modalities.columns = [col.replace('_', ' ') for col in five_folds_all_modalities.columns]

# Average across folds
five_folds_all_modalities_mean = (
    five_folds_all_modalities
    .groupby(['Modality', 'Domain'])
    .agg({
        'Test R2': 'mean',
        'Test Pearson r': 'mean',
        'Test MSE': 'mean',
        'Test MAE': 'mean',
        'Train R2': 'mean',
        'Train Pearson r': 'mean',
        'Train MSE': 'mean',
        'Train MAE': 'mean',
        'N Train': 'mean',
        'N Test': 'mean',
        'N Features': 'first'
    })
    .round({
        'Test R2': 3,
        'Test Pearson r': 2,
        'Test MSE': 3,
        'Test MAE': 3,
        'Train R2': 3,
        'Train Pearson r': 2,
        'Train MSE': 2,
        'Train MAE': 2,
        'N Train': 0,
        'N Test': 0
    })
    .sort_values(by='Test R2', ascending=False)
    .reset_index()
)

five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']] = five_folds_all_modalities_mean.loc[:, ['N Test', 'N Train']].astype(int)

# Display the results
with pd.option_context('display.max_rows', None):
    display(five_folds_all_modalities_mean)

Unnamed: 0,Modality,Domain,Test R2,Test Pearson r,Test MSE,Test MAE,Train R2,Train Pearson r,Train MSE,Train MAE,N Train,N Test,N Features
0,Body and Brain,Brain and Body,0.218,0.47,0.781,0.698,0.706,0.85,0.29,0.4,990.0,248.0,100
