# Utils

### Libraries

In [2]:
import pandas as pd
import os
import glob
import nibabel as nib
import numpy as np
import contextlib
from sklearn.mixture import GaussianMixture
import warnings
import sys

In [3]:
# Suppress all FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

sys.path.append("/Users/emmatosato/Documents/PhD/ANM_Verona/src/data_processing")

In [4]:
import importlib
import data_processing.umap_run as umap_run
import data_processing.umap_clustering as umap_clustering
import data_processing.clustering_evaluation as clustering_evaluation
import data_processing.umap_regression as umap_regression
import data_processing.processing_flat as processing_flat

importlib.reload(umap_run)
importlib.reload(clustering_evaluation)
importlib.reload(umap_regression)
importlib.reload(processing_flat)
importlib.reload(umap_clustering)  # ricarica per ultimo: dipende da umap_run

from data_processing.umap_regression import main_regression
from data_processing.umap_clustering import main_clustering, run_clustering
from data_processing.processing_flat import main_processing_flat
from data_processing.clustering_evaluation import evaluate_kmeans, evaluate_gmm, evaluate_hdbscan, evaluate_consensus
from data_processing.umap_run import x_features_return, run_umap


In [5]:
%load_ext autoreload
%autoreload 2

### Paths and folders

In [6]:
# FC maps mean
dir_FCmaps = '/Users/emmatosato/Documents/PhD/ANM_Verona/data/FCmaps'
dir_FCmaps_processed = '/data/FCmaps_augmented_processed'
dir_data_utils = "/Users/emmatosato/Documents/PhD/ANM_Verona/data_utils/"
dir_dataframe = "/data/dataframes"
gm_mask_path = '/Users/emmatosato/Documents/PhD/ANM_Verona/utils/masks/GM_mask.nii'
harvard_oxford_mask_path = '/Users/emmatosato/Documents/PhD/ANM_Verona/utils/masks/mask_GM.nii'
atrophy_dataset_matches = '/Users/emmatosato/Documents/PhD/ANM_Verona/utils/metadata/atrophy_matched.xlsx'
cognitive_dataset ='/Users/emmatosato/Documents/PhD/ANM_Verona/utils/metadata/cognitive_dataset.xlsx'

# Data

In [8]:
df_thr02_gm_masked = pd.read_csv(os.path.join(dir_dataframe, "df_thr02_gm_masked.csv"))
df_gm_masked = pd.read_csv(os.path.join(dir_dataframe, "df_thr02_gm_masked.csv"))
df_meta = pd.read_csv(os.path.join(dir_dataframe, "df_meta.csv"))
df_summary = pd.read_csv(os.path.join(dir_dataframe, "df_summary.csv"))

# Network Analysis

In [23]:
import pandas as pd
import umap
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Merge reti + metadati
df_merged = pd.merge(df_networks_thr02, df_meta, on='ID', how='inner')

# Colonne dei network
network_features = ['VIS_mean', 'VAN_mean', 'DAN_mean', 'DMN_mean',
                    'LMB_mean', 'FPN_mean', 'SMN_mean', 'SUBCORTICAL_mean']
X_network = df_merged[network_features].values

# UMAP projection
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, random_state=42)
X_umap = reducer.fit_transform(X_network)
df_merged[['UMAP1', 'UMAP2']] = X_umap

# KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df_merged['Cluster'] = kmeans.fit_predict(X_network)
sil_score = silhouette_score(X_network, df_merged['Cluster'])

# Plot
fig, axs = plt.subplots(1, 2, figsize=(14, 6), sharex=True, sharey=True)

# Plot by Cluster
sns.scatterplot(data=df_merged, x='UMAP1', y='UMAP2', hue='Cluster',
                palette='tab10', ax=axs[0])
axs[0].set_title(f"KMeans Clusters (Silhouette: {sil_score:.2f})")

# Plot by Group
sns.scatterplot(data=df_merged, x='UMAP1', y='UMAP2', hue='Group',
                palette='Set2', ax=axs[1])
axs[1].set_title("Diagnostic Groups")

for ax in axs:
    ax.set_xlabel("UMAP 1")
    ax.set_ylabel("UMAP 2")

plt.tight_layout()
plt.show()

GMM_Label
0    80
1    76
2    15
Name: count, dtype: Int64


In [None]:
import statsmodels.api as sm

# Merge: reti + metadati
df_merged = pd.merge(df_networks_thr02, df_meta, on='ID', how='inner')

# Seleziona solo soggetti con CDR_SB valido
df_mean_regression = df_merged[df_merged['CDR_SB'].notna()].copy()

# Feature + confounders
network_features = ['VIS_mean', 'VAN_mean', 'DAN_mean', 'DMN_mean', 'LMB_mean', 'FPN_mean', 'SMN_mean', 'SUBCORTICAL_mean']
confounders = ['Age', 'Sex', 'Education']

# Outcome variable
y = df_mean_regression['CDR_SB']

# Prepare design matrix X
X = df_mean_regression[network_features + confounders].copy()

# Convert categorical variables if needed
X['Sex'] = X['Sex'].map({'M': 0, 'F': 1})  # Adjust mapping as needed

# Add intercept
X = sm.add_constant(X)

# Fit the model
model = sm.OLS(y, X)
results = model.fit()

# Print model summary
print(results.summary())


# Unsupervised Clustering


# Linear Regression


In [None]:
path_umap_regression = '/Users/emmatosato/Documents/PhD/ANM_Verona/output/umap_regression_CDR_SB'
target_col = "CDR_SB"
plot_flag_regression = True

### CDR sum of boxes

#### Voxel

##### GM Mask

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_gm_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "gm_mask"
)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr01_gm_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "gm_mask_01"
)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr02_gm_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "gm_mask_02"
)

##### Harvard Mask

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_har_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "har_mask"
)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr01_har_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "har_mask_01"
)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr02_har_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "har_mask_02"
)

##### Txt files

In [None]:
output_file = "/output/umap_regression_CDR_SB/OLS_regression_GM_mask.txt"

with open(output_file, "w") as f:
    with contextlib.redirect_stdout(f):
        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask without Threshold ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask without Threshold ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.2 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.2 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.1 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.1 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )


In [None]:
output_file = "/output/umap_regression_CDR_SB/OLS_regression_HAR_mask.txt"

with open(output_file, "w") as f:
    with contextlib.redirect_stdout(f):
        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask without Threshold ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask without Threshold ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.2 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.2 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.1 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.1 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )


##### GMM Distributions

In [None]:
for gmmLabel in range(0,3):
    mean_cdr = df_meta.loc[df_meta['GMM_Label'] == gmmLabel, 'CDR_SB'].mean()
    print(f"Media CDR_SB per gruppo {gmmLabel}: {mean_cdr:.3f}")


# Select the subjects ID with a particular GMM_Label
gmm_ids = df_meta[df_meta['GMM_Label'] == 0]['ID']

# Filter the dataset
df_features = df_thr02_har_masked.copy()

df_masked_gmm = df_features[df_features['ID'].isin(gmm_ids)].reset_index(drop=True)
df_meta_gmm = df_meta[df_meta['ID'].isin(gmm_ids)].reset_index(drop=True)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_masked_gmm,
    df_meta = df_meta_gmm,
    target_variable = "CDR_SB",
    covariates = None,
    y_log_transform = False,
    plot_flag = True,
    save_path = None,
    title_prefix = "gm_mask_GMM"
)

#### Yeo Networks

In [None]:
path_umap_regression = '/Users/emmatosato/Documents/PhD/ANM_Verona/output/umap_network_regression_CDR_SB'
target_col = "CDR_SB"

##### Networks 01

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_networks_thr01,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "network_har_mask_01"
)

##### Networks 02

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_networks_thr02,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "network_har_mask_02"
)

##### Networks no threshold

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_networks_no_thr,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "network_har_mask"
)

### Mini-Mental State

In [None]:
path_umap_regression = '/Users/emmatosato/Documents/PhD/ANM_Verona/output/umap_regression_MMSE'
target_col = "MMSE"

#### Voxel

##### GM Mask

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_gm_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "gm_mask"
)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr01_gm_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "gm_mask_01"
)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr02_gm_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "gm_mask_02"
)

##### Hardvard Mask

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_har_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "har_mask"
)


In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr01_har_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "har_mask_01"
)

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_thr02_har_masked,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = path_umap_regression,
    title_prefix = "har_mask_02"
)

##### Txt files

In [None]:
output_file = "/Users/emmatosato/Documents/PhD/ANM_Verona/output/umap_regression_MMSE/OLS_regression_GM_mask.txt"

with open(output_file, "w") as f:
    with contextlib.redirect_stdout(f):
        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask without Threshold ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask without Threshold ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.2 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.2 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.1 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.1 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )


In [None]:
output_file = "/Users/emmatosato/Documents/PhD/ANM_Verona/output/umap_regression_MMSE/OLS_regression_HAR_mask.txt"

with open(output_file, "w") as f:
    with contextlib.redirect_stdout(f):
        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask without Threshold ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask without Threshold ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.2 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.2 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.1 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.1 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )


##### Yeo Networks

In [None]:
target_col = "MMSE"
plot_flag_regression

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_networks_no_thr,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = None,
    title_prefix = "network_gm_mask"
)

In [None]:
output_file = "/Users/emmatosato/Documents/PhD/ANM_Verona/output/umap_regression_MMSE/OLS_regression_GM_mask.txt"

with open(output_file, "w") as f:
    with contextlib.redirect_stdout(f):
        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask without Threshold ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask without Threshold ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.2 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.2 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.1 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - GM Mask with Threshold 0.1 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_gm_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )


In [None]:
output_file = "/Users/emmatosato/Documents/PhD/ANM_Verona/output/umap_regression_MMSE/OLS_regression_HAR_mask.txt"

with open(output_file, "w") as f:
    with contextlib.redirect_stdout(f):
        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask without Threshold ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask without Threshold ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.2 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.2 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr02_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.1 ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = None,
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )

        print("\n\n\n" + "="*70)
        print("   OLS Regression Results - Harvard Mask with Threshold 0.1 ")
        print("   Covariates: Sex, Education, Age ")
        print("="*70 + "\n")

        model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
            df_masked = df_thr01_har_masked,
            df_meta = df_meta,
            target_variable = target_col,
            covariates = ["Sex", "Education", "Age"],
            y_log_transform = False,
            plot_flag = False,
            save_path = None,
        )


##### Yeo Networks

In [None]:
target_col = "MMSE"
plot_flag_regression

In [None]:
model, y_pred, residuals, subject_errors, group_rmse_stats = main_regression(
    df_masked = df_networks_no_thr,
    df_meta = df_meta,
    target_variable = target_col,
    covariates = None,
    y_log_transform = False,
    plot_flag = plot_flag_regression,
    save_path = None,
    title_prefix = "network_gm_mask"
)