In [3]:
import pandas as pd
import numpy as np
import scipy.io
from sklearn.linear_model import LassoCV
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_squared_error
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

def lasso_cv(fc_type):

    # Load cognitive scores and keep only the last 232 rows (for test files)
    scores_df = pd.read_excel('/home/chanae/Data__chanae/HCP_cognitive_scores.xlsx')[820:]

    # Load functional connectivity data
    # fc_path = '/home/chanae/thesis/nn_predicted_fc/predicted_fc.mat'
    fc_path = '/home/chanae/thesis/residual_matrices/residual_matrices.mat'
    fc = scipy.io.loadmat(fc_path)[fc_type]

    # Prepare data
    X = fc
    y = scores_df['cognitive_score'].values

    # 10-Fold Cross validation
    kf = KFold(n_splits=10, shuffle=True, random_state=42)

    correlations = []
    errors = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Check if y_train or y_test has NaN values 
        if np.isnan(y_train).any() or np.isnan(y_test).any():
            print("Warning: y_train or y_test contains NaN values.")

        # Internal cross-validation to optimize the Lasso parameter
        lasso = LassoCV(cv=5, random_state=0, max_iter=60000).fit(X_train, y_train)

        # Make predictions using the test set
        y_pred = lasso.predict(X_test)

        if np.std(y_test) == 0 or np.std(y_pred) == 0:
            print("Warning: y_test or y_pred has zero standard deviation.")

        # Compute Pearson correlation
        corr, _ = pearsonr(y_test, y_pred)
        correlations.append(corr)

        # Compute mean squared error
        error = mean_squared_error(y_test, y_pred)
        errors.append(error)

    # Average correlation, std and error over the 10 folds
    avg_corr = np.mean(correlations)
    avg_std = np.std(correlations)
    avg_error = np.mean(errors)

    return lasso, avg_corr, avg_std, avg_error

In [4]:
efc_lasso, efc_corr, efc_std, efc_error = lasso_cv('out')
print(f"eFC: \nAverage Pearson correlation: {efc_corr} (std: {efc_std}) \nAverage mean squared error: {efc_error}")

eFC: 
Average Pearson correlation: 0.30022905191843346 (std: 0.19841434211710898) 
Average mean squared error: 140.06942930864454


In [5]:
pfc_lasso, pfc_corr, pfc_std, pfc_error = lasso_cv('predicted')
print(f"pFC: \nAverage Pearson correlation: {pfc_corr} (std: {pfc_std}) \nAverage mean squared error: {pfc_error}")

pFC: 
Average Pearson correlation: 0.2307423373422946 (std: 0.1699288361585388) 
Average mean squared error: 147.8366061846072
