In [None]:
import csv
import os
import glob
import datetime
import pandas as pd
import numpy as np
import scipy.stats as ss
import time
from datetime import date, datetime

from scipy import stats
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter, MultipleLocator, AutoMinorLocator, MaxNLocator, FixedLocator
import sklearn
import seaborn as sns
import textwrap 
import warnings
import skbold
from scipy.stats import pearsonr
from skbold.preproc import ConfoundRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


import ukbiobank.utils.utils
from ukbiobank.utils import fieldNamesToIds
from ukbiobank.utils import loadCsv
from ukbiobank.utils import addFields
from ukbiobank.utils.utils import getFieldnames
from ukbiobank.utils.utils import fieldIdsToNames

## Resting State

1. Amplitudes
2. Nodes timeseries => 
- full correlation matrix
- partial correlation matrix
- tangent transformation

21 out of 25 and 55 out of 100 'good' components

# 1. Get RS amplitudes

## 21 IC

In [None]:
path = "/Resting_State/rsfMRI_matrices/rfMRI-amplitudes-21-25754/"
# Change the directory 
os.chdir(path) 
my_files = glob.glob(path + "*.txt")

file_names = []
# Loop over the file list and get the base name of each file
for file in my_files:
    # Use os.path.basename to get the file name without the directory
    file_name = os.path.basename(file)
    # Append the file name to the list
    file_names.append(file_name)

# Print the list of file names
print(file_names)

In [None]:
file_names_df = pd.DataFrame(file_names)
file_names_df.columns = ['filenames']
file_names_df_sorted = file_names_df.sort_values(by='filenames', ascending=True).reset_index(drop=True)
file_names_df_sorted

In [None]:
eid_list = []
for file in file_names:
    parts = file.split("_")
    # Get the first element of the list
    eid = parts[0]
    eid_list.append(eid)

eid = pd.DataFrame(eid_list)
eid.columns = ['ID']
eid_sorted = eid.sort_values(by='ID', ascending=True).reset_index(drop=True)
eid_sorted_list = eid_sorted.values.tolist()

Save file names

In [None]:
# DF with full file names
file_names_df.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/rfMRI-amplitudes-21-25754_file_names_full_not_sorted.csv", index=False)
# DF with full file names sorted in an ascending order
file_names_df_sorted.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/rfMRI-amplitudes-21-25754_file_names_full_sorted.csv", index=False)
# DF with ID
eid.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/rfMRI-amplitudes-21-25754_file_names_ID_dataframe_not_sorted.csv", index=False)
# DF with ID sorted
eid_sorted.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/rfMRI-amplitudes-21-25754_file_names_ID_dataframe_sorted.csv", index=False)

Read one participant to see what's inside

In [None]:
p1 = pd.read_csv("/Resting_State/rsfMRI_matrices/rfMRI-amplitudes-21-25754/5440077_25754_2_0.txt", sep=" ", header=None)
file_name = os.path.basename("/Resting_State/rsfMRI_matrices/rfMRI-amplitudes-21-25754/5440077_25754_2_0.txt")
prefix = "Component"
p1.columns = [ f'{prefix} {i+1} amplitude' for i in range(p1.shape[1]) ]
#file_name = file_name.split(".")[0]
p1.index = [file_name.split(".")[0]]
#p1.index = [file_name]
p1

### Assemble amplitude files into one data frame

In [None]:
folder_path = "/Resting_State/rsfMRI_matrices/rfMRI-amplitudes-21-25754/"
file_names_5000 = file_names_df_sorted["filenames"][0:5000]
# Do all the same for other files
file_names_5001_10000 = file_names_df_sorted["filenames"][5000:10000]
file_names_10001_15000 = file_names_df_sorted["filenames"][10000:15000]
file_names_15001_20000 = file_names_df_sorted["filenames"][15000:20000]
file_names_20001_25000 = file_names_df_sorted["filenames"][20000:25000]
file_names_25001_30000 = file_names_df_sorted["filenames"][25000:30000]
file_names_30001_35000 = file_names_df_sorted["filenames"][30000:35000]
file_names_35001_40000 = file_names_df_sorted["filenames"][35000:40000]
file_names_40001_45000 = file_names_df_sorted["filenames"][40000:45000]
file_names_45001_49239 = file_names_df_sorted["filenames"][45000:]

participants = []

# Loop over the values in the filenames column of the data frame
for file_name in file_names_5000: # file_names_df_sorted_5["filenames"]:
    # Create the full file path by joining the folder path and the file name
    file_path = os.path.join(folder_path, file_name)
    # Read the file into a data frame
    p = pd.read_csv(file_path, sep="\\s+", header=None)
    # Create a list of column names
    prefix = "Component"
    p.columns = [ f'{prefix} {i+1} Amplitude' for i in range(p.shape[1]) ]
    # Set the index of the data frame using the file name or a part of it
    p.index = [file_name.split(".")[0]]
    #p.reset_index(drop=True, inplace=True)
    # Append the data frame to the list
    participants.append(p)

# Concatenate all the data frames in the list into one big data frame along the column axis
amplitudes_5000 = pd.concat(participants, axis=0)

# Save the big data frame as a CSV file
amplitudes_5000.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_5000.csv")

When saving csv, set index to false or index_col=0 to avoid writing index name as a new column

In [None]:
#file_names_df_sorted['filenames'].index.get_loc("1521364_25754_2_0.txt")
#print(file_names_df_sorted['filenames'].index.tolist())

# Get the id of the subject in the 4000th row
subject_id = file_names_df_sorted["filenames"].iloc[39999]

# Check if it matches the desired id
if subject_id == "5080093_25754_2_0.txt":
    print("The subject is in the row")
else:
    print("The subject is not in the row")


In [None]:
amplitudes_5000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_5000.csv")
amplitudes_5000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_10000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_10000.csv")
amplitudes_10000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_15000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_15000.csv")
amplitudes_15000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_20000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_20000.csv")
amplitudes_20000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_25000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_25000.csv")
amplitudes_25000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_30000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_30000.csv")
amplitudes_30000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_35000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_35000.csv")
amplitudes_35000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_40000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_40000.csv")
amplitudes_40000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_45000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_45000.csv")
amplitudes_45000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_49239 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_49239.csv")
amplitudes_49239.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

In [None]:
print(amplitudes_5000.shape, amplitudes_10000.shape, amplitudes_15000.shape, amplitudes_20000.shape, amplitudes_25000.shape,
amplitudes_30000.shape, amplitudes_35000.shape, amplitudes_40000.shape, amplitudes_45000.shape, amplitudes_49239.shape)

Read in and concatenate files

In [None]:
amplitudes_5000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_5000.csv", index_col=0)
amplitudes_10000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_10000.csv", index_col=0)
amplitudes_15000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_15000.csv", index_col=0)
amplitudes_20000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_20000.csv", index_col=0)
amplitudes_25000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_25000.csv", index_col=0)
amplitudes_30000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_30000.csv", index_col=0)
amplitudes_35000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_35000.csv", index_col=0)
amplitudes_40000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_40000.csv", index_col=0)
amplitudes_45000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_45000.csv", index_col=0)
amplitudes_49238 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_49239.csv", index_col=0)

In [None]:
amplitudes_full = pd.concat([amplitudes_5000, amplitudes_10000, amplitudes_15000, amplitudes_20000, amplitudes_25000,
amplitudes_30000, amplitudes_35000, amplitudes_40000, amplitudes_45000, amplitudes_49239], axis=0, ignore_index=True)
amplitudes_full.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_full.csv")

Exclude Instance 3

In [None]:
amplitudes_full_instance_2 = amplitudes_full[~amplitudes_full["ID"].str.contains('_25754_3_0')].reset_index(drop=True)
# Extract ID number
amplitudes_fin = amplitudes_full_instance_2.copy()
amplitudes_fin['ID'] = amplitudes_fin['ID'].str.split("_").str[0].astype(int)
# Save
amplitudes_fin.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_21_fin_CLEAN.csv")
amplitudes_21 = amplitudes_fin.copy()

## 55 IC

In [None]:
path = "/Resting_State/rsfMRI_matrices/rfMRI-amplitudes-55-25755/"
# Change the directory 
os.chdir(path) 
my_files = glob.glob(path + "*.txt")

file_names_55 = []

# Loop over the file list and get the base name of each file
for file in my_files:
    # Use os.path.basename to get the file name without the directory
    file_name = os.path.basename(file)
    # Append the file name to the list
    file_names_55.append(file_name)

# Print the list of file names
print(file_names_55)

# Make a data frame
file_names_55_df = pd.DataFrame(file_names_55)
file_names_55_df.columns = ['filenames']
file_names_55_df_sorted = file_names_55_df.sort_values(by='filenames', ascending=True).reset_index(drop=True)
file_names_55_df_sorted

eid_list = []
for file in file_names_55:
    parts = file.split("_")
    # Get the first element of the list
    eid = parts[0]
    eid_list.append(eid)

eid = pd.DataFrame(eid_list)
eid.columns = ['ID']
eid_sorted = eid.sort_values(by='ID', ascending=True).reset_index(drop=True)
eid_sorted_list = eid_sorted.values.tolist()
eid_sorted_list

In [None]:
# DF with full file names
file_names_55_df.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/rfMRI-amplitudes-55-25755_file_names_full_not_sorted.csv")
# DF with full file names sorted in an ascending order
file_names_55_df_sorted.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/rfMRI-amplitudes-55-25755_file_names_full_sorted.csv")
# DF with ID
eid.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/rfMRI-amplitudes-55-25755_file_names_ID_dataframe_not_sorted.csv")
# DF with ID sorted
eid_sorted.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/rfMRI-amplitudes-55-25755_file_names_ID_dataframe_sorted.csv")

### Assemble amplitude files into one data frame

In [None]:
folder_path = "/Resting_State/rsfMRI_matrices/rfMRI-amplitudes-55-25755/"

file_names_5000 = file_names_55_df_sorted["filenames"][0:5000]
# Do all the same for other files
file_names_5000 = file_names_55_df_sorted["filenames"][0:5000]
file_names_10000 = file_names_55_df_sorted["filenames"][5000:10000]
file_names_15000 = file_names_55_df_sorted["filenames"][10000:15000]
file_names_20000 = file_names_55_df_sorted["filenames"][15000:20000]
file_names_25000 = file_names_55_df_sorted["filenames"][20000:25000]
file_names_30000 = file_names_55_df_sorted["filenames"][25000:30000]
file_names_35000 = file_names_55_df_sorted["filenames"][30000:35000]
file_names_40000 = file_names_55_df_sorted["filenames"][35000:40000]
file_names_45000 = file_names_55_df_sorted["filenames"][40000:45000]
file_names_49239 = file_names_55_df_sorted["filenames"][45000:]

participants = []

# Loop over the values in the filenames column of the data frame
for file_name in file_names_5000: # file_names_df_sorted_5["filenames"]:
    # Create the full file path by joining the folder path and the file name
    file_path = os.path.join(folder_path, file_name)
    # Read the file into a data frame
    p = pd.read_csv(file_path, sep="\\s+", header=None)
    # Create a list of column names
    prefix = "Component"
    p.columns = [ f'{prefix} {i+1} Amplitude (55 IC)' for i in range(p.shape[1]) ]
    # Set the index of the data frame using the file name or a part of it
    p.index = [file_name.split(".")[0]]
    #p.reset_index(drop=True, inplace=True)
    # Append the data frame to the list
    participants.append(p)

# Concatenate all the data frames in the list into one big data frame along the column axis
amplitudes_5000 = pd.concat(participants, axis=0)

# Save the big data frame as a CSV file
amplitudes_5000.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_5000.csv") # Set index to false or index_col=0 to avoid 


In [None]:
amplitudes_5000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_5000.csv")
amplitudes_5000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_10000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_10000.csv")
amplitudes_10000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_15000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_15000.csv")
amplitudes_15000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_20000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_20000.csv")
amplitudes_20000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_25000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_25000.csv")
amplitudes_25000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_30000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_30000.csv")
amplitudes_30000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_35000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_35000.csv")
amplitudes_35000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_40000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_40000.csv")
amplitudes_40000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_45000 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_45000.csv")
amplitudes_45000.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

amplitudes_49239 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_49239.csv")
amplitudes_49239.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

# df = pd.read_csv("your_file.csv", index_col=0, names=["ID"] + list(df.columns[1:]))

In [None]:
print(amplitudes_5000.shape, amplitudes_10000.shape, amplitudes_15000.shape, amplitudes_20000.shape, amplitudes_25000.shape,
amplitudes_30000.shape, amplitudes_35000.shape, amplitudes_40000.shape, amplitudes_45000.shape, amplitudes_49239.shape)

In [None]:
amplitudes_full_55 = pd.concat([amplitudes_5000, amplitudes_10000, amplitudes_15000, amplitudes_20000, amplitudes_25000,
amplitudes_30000, amplitudes_35000, amplitudes_40000, amplitudes_45000, amplitudes_49239], axis=0, ignore_index=True)
amplitudes_full_55.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_full_55.csv")

In [None]:
# Exclude Instance 3
amplitudes_full_instance_2_55 = amplitudes_full_55[~amplitudes_full_55["ID"].str.contains('_25755_3_0')].reset_index(drop=True)
# Extract ID number
amplitudes_fin_55 = amplitudes_full_instance_2_55.copy()
amplitudes_fin_55['ID'] = amplitudes_fin_55['ID'].str.split("_").str[0].astype(int)
# Save
amplitudes_fin_55.to_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_55_fin_CLEAN.csv", index=False)
amplitudes_55 = amplitudes_fin_55.copy()

# Get original timeseries files

## 21 IC (out of 25)

In [None]:
# Make a list of folder paths
folder_paths = ["/Resting_State/rsfMRI_bulk_main/1-4000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/5000-14000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/14000_24000_split/unzipped/",
"/Resting_State/rsfMRI_bulk_main/14000-24000/unzipped/", 
"/Resting_State/rsfMRI_bulk_main/24000-34000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/24000-34000_split/unzipped/",
"/Resting_State/rsfMRI_bulk_main/34000-44000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/34000-44000_split/unzipped/",
"/Resting_State/rsfMRI_bulk_main/44000-54413/unzipped/"]

missing_file_count = 0

timeseries_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:
    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)

        if os.path.isdir(os.path.join(subfolder_path, 'fMRI')):
            subfolder_path = os.path.join(subfolder_path, 'fMRI')
        else:
            subfolder_path = subfolder_path

        timeseries_file = os.path.join(subfolder_path, "rfMRI_25.dr", "dr_stage1.txt")

        if not os.path.exists(timeseries_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        timeseries_25 = np.loadtxt(timeseries_file)
        timeseries_list.append(timeseries_25)
        index_list.append(subject_folder_name)

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

In [None]:
# Index list data frame 25 
index_list_df = pd.DataFrame(index_list)
index_list_df.columns = ['ID']
index_list_df.sort_values(by='ID')

# Extract Instance 2 25
timeseries_instance_2 = []
index_instance_2 = []
for folder_name, timeseries in zip(index_list, timeseries_list): # Split folder name by underscore and get the first and last parts
    id_part = folder_name.split("_")[0]
    mid_part = folder_name.split("_")[-2]
    id_part = int(id_part)
    if mid_part == '2':
        timeseries_instance_2.append(timeseries)
        index_instance_2.append(folder_name)

## 55 IC (out of 100)

In [None]:
# Make a list of folder paths
folder_paths = ["/Resting_State/rsfMRI_bulk_main/1-4000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/5000-14000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/14000_24000_split/unzipped/",
"/Resting_State/rsfMRI_bulk_main/14000-24000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/24000-34000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/24000-34000_split/unzipped/",
"/Resting_State/rsfMRI_bulk_main/34000-44000/unzipped/",
"/Resting_State/rsfMRI_bulk_main/34000-44000_split/unzipped/",
"/Resting_State/rsfMRI_bulk_main/44000-54413/unzipped/"]

missing_file_count = 0

timeseries_100_list = []
index_100_list = []
skipped_folders = []

for folder_path in folder_paths:
    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)

        if os.path.isdir(os.path.join(subfolder_path, 'fMRI')):
            subfolder_path = os.path.join(subfolder_path, 'fMRI')
        else:
            subfolder_path = subfolder_path

        timeseries_file = os.path.join(subfolder_path, "rfMRI_100.dr", "dr_stage1.txt")

        if not os.path.exists(timeseries_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        timeseries_100 = np.loadtxt(timeseries_file)
        timeseries_100_list.append(timeseries_100)
        index_100_list.append(subject_folder_name)

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

In [None]:
# Index list data frame 100
index_list_100_df = pd.DataFrame(index_100_list)
index_list_100_df.columns = ['ID']
index_list_100_df.sort_values(by='ID')
index_list_100_df

# Extract Instance 2 100
timeseries_100_instance_2 = []
index_100_instance_2 = []
for folder_name, timeseries in zip(index_100_list, timeseries_100_list): # Split folder name by underscore and get the first and last parts
    id_part = folder_name.split("_")[0]
    mid_part = folder_name.split("_")[-2]
    id_part = int(id_part)
    if mid_part == '2':
        timeseries_100_instance_2.append(timeseries)
        index_100_instance_2.append(folder_name)

# **Upload correlation matrices provided by UK BB**

## 1. 21 IC, FULL

First, get filenames in the folder

In [None]:
path = "/Resting_State/rsfMRI_matrices/rfMRI-full_CM-25750/"
os.chdir(path) 
my_files = glob.glob(path + "*.txt")

file_names_21 = []

# Loop over the file list and get the base name of each file
for file in my_files:
    file_name = os.path.basename(file)
    file_names_21.append(file_name)

# Print the list of file names
print(file_names_21)

# Make a data frame
file_names_21_df = pd.DataFrame(file_names_21)
file_names_21_df.columns = ['filenames']
file_names_21_df_sorted = file_names_21_df.sort_values(by='filenames', ascending=True).reset_index(drop=True)
file_names_21_df_sorted

# Get IDs
eid_list = []
for file in file_names_21:
    parts = file.split("_")
    # Get the first element of the list
    eid = parts[0]
    eid_list.append(eid)

eid = pd.DataFrame(eid_list)
eid.columns = ['ID']
eid_sorted = eid.sort_values(by='ID', ascending=True).reset_index(drop=True)
eid_sorted_list = eid_sorted.values.tolist()
eid_sorted_list

Read each portion of the data

In [None]:
folder_path = "/Resting_State/rsfMRI_matrices/rfMRI-full_CM-25750/"
file_names_5000 = file_names_21_df_sorted["filenames"][0:5000]
file_names_10000 = file_names_21_df_sorted["filenames"][5000:10000]
file_names_15000 = file_names_21_df_sorted["filenames"][10000:15000]
file_names_20000 = file_names_21_df_sorted["filenames"][15000:20000]
file_names_25000 = file_names_21_df_sorted["filenames"][20000:25000]
file_names_30000 = file_names_21_df_sorted["filenames"][25000:30000]
file_names_35000 = file_names_21_df_sorted["filenames"][30000:35000]
file_names_40000 = file_names_21_df_sorted["filenames"][35000:40000]
file_names_45000 = file_names_21_df_sorted["filenames"][40000:45000]
file_names_49239 = file_names_21_df_sorted["filenames"][45000:]


participants = []

# Loop over the values in the filenames column of the data frame
for file_name in file_names_49239: # file_names_df_sorted_5["filenames"]:
    # Create the full file path by joining the folder path and the file name
    file_path = os.path.join(folder_path, file_name)
    # Read the file into a data frame
    p = pd.read_csv(file_path, sep="\\s+", header=None)
    # Create a list of column names
    prefix = "Component"
    p.columns = [ f'{prefix} {i+1} Full corr. (21 IC)' for i in range(p.shape[1]) ]
    # Set the index of the data frame using the file name or a part of it
    p.index = [file_name.split(".")[0]]
    #p.reset_index(drop=True, inplace=True)
    # Append the data frame to the list
    participants.append(p)

# Concatenate all the data frames in the list into one big data frame along the column axis
fcorr_49239 = pd.concat(participants, axis=0)

# Save the big data frame as a CSV file
fcorr_49239.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_49239.csv", index=False) # Set index to false or index_col=0 to avoid 

Save & check

In [None]:
# Save individual tables
fcorr_5000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_5000.csv", index=True, index_label="ID")
fcorr_10000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_10000.csv", index=True, index_label="ID")
fcorr_15000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_15000.csv", index=True, index_label="ID")
fcorr_20000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_20000.csv", index=True, index_label="ID")
fcorr_25000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_25000.csv", index=True, index_label="ID")
fcorr_30000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_30000.csv", index=True, index_label="ID")
fcorr_35000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_35000.csv", index=True, index_label="ID")
fcorr_40000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_40000.csv", index=True, index_label="ID")
fcorr_45000.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_45000.csv", index=True, index_label="ID")
fcorr_49239.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_49239.csv", index=True, index_label="ID")

In [None]:
# Upload and check
fullcorr_5000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_5000.csv")
fullcorr_10000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_10000.csv")
fullcorr_15000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_15000.csv")
fullcorr_20000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_20000.csv")
fullcorr_25000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_25000.csv")
fullcorr_30000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_30000.csv")
fullcorr_35000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_35000.csv")
fullcorr_40000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_40000.csv")
fullcorr_45000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_45000.csv")
fullcorr_49239 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_49239.csv")

Merge sub-tables

In [None]:
fcorr_21 = pd.concat([fullcorr_5000, fullcorr_10000, fullcorr_15000, fullcorr_20000, fullcorr_25000,
fullcorr_30000, fullcorr_35000, fullcorr_40000, fullcorr_45000, fullcorr_49239], axis=0)
fcorr_21.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_21_FULL.csv", index=False)

Exclude Instance 3 and leave only ID

In [None]:
# Exclude Instance 3
fcorr_21_instance_2 = fcorr_21[~fcorr_21["ID"].str.contains('_25750_3_0')].reset_index(drop=True)
# Extract ID number
fcorr_21_fin = fcorr_21_instance_2.copy()
fcorr_21_fin['ID'] = fcorr_21_fin['ID'].str.split("_").str[0].astype(int)
# Save
fcorr_21_fin.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_21_instance_2_FINAL.csv", index=False)
full_correlation_21 = fcorr_21_fin.copy()

## 2. 55 IC, FULL

Get filenames

In [None]:
path = "/Resting_State/rsfMRI_matrices/rfMRI-full_CM-25751/"
os.chdir(path) 
my_files = glob.glob(path + "*.txt")

file_names_55 = []

# Loop over the file list and get the base name of each file
for file in my_files:
    file_name = os.path.basename(file)
    file_names_55.append(file_name)

# Print the list of file names
print(file_names_55)

# Make a data frame
file_names_55_df = pd.DataFrame(file_names_55)
file_names_55_df.columns = ['filenames']
file_names_55_df_sorted = file_names_55_df.sort_values(by='filenames', ascending=True).reset_index(drop=True)
file_names_55_df_sorted

# Get IDs
eid_list = []
for file in file_names_55:
    parts = file.split("_")
    # Get the first element of the list
    eid = parts[0]
    eid_list.append(eid)

eid = pd.DataFrame(eid_list)
eid.columns = ['ID']
eid_sorted = eid.sort_values(by='ID', ascending=True).reset_index(drop=True)
eid_sorted_list = eid_sorted.values.tolist()
eid_sorted_list

Upload each portion of the data

In [None]:
folder_path = "/Resting_State/rsfMRI_matrices/rfMRI-full_CM-25751/"

file_names_5000 = file_names_55_df_sorted["filenames"][0:5000]
file_names_10000 = file_names_55_df_sorted["filenames"][5000:10000]
file_names_15000 = file_names_55_df_sorted["filenames"][10000:15000]
file_names_20000 = file_names_55_df_sorted["filenames"][15000:20000]
file_names_25000 = file_names_55_df_sorted["filenames"][20000:25000]
file_names_30000 = file_names_55_df_sorted["filenames"][25000:30000]
file_names_35000 = file_names_55_df_sorted["filenames"][30000:35000]
file_names_40000 = file_names_55_df_sorted["filenames"][35000:40000]
file_names_45000 = file_names_55_df_sorted["filenames"][40000:45000]
file_names_49239 = file_names_55_df_sorted["filenames"][45000:]
file_names_49239 = file_names_55_df_sorted["filenames"][45000:]

participants = []

# Loop over the values in the filenames column of the data frame
for file_name in file_names_49239: # file_names_df_sorted_5["filenames"]:
    # Create the full file path by joining the folder path and the file name
    file_path = os.path.join(folder_path, file_name)
    # Read the file into a data frame
    p = pd.read_csv(file_path, sep="\\s+", header=None)
    # Create a list of column names
    prefix = "Component"
    p.columns = [ f'{prefix} {i+1} Full corr. (55 IC)' for i in range(p.shape[1]) ]
    # Set the index of the data frame using the file name or a part of it
    p.index = [file_name.split(".")[0]]
    #p.reset_index(drop=True, inplace=True)
    # Append the data frame to the list
    participants.append(p)

# Concatenate all the data frames in the list into one big data frame along the column axis
fcorr_55_49239 = pd.concat(participants, axis=0)

# Save the big data frame as a CSV file
fcorr_55_49239.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_49239.csv", index=True, index_label="ID")

In [None]:
# Upload and check
fullcorr_55_5000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_5000.csv")
fullcorr_55_10000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_10000.csv")
fullcorr_55_15000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_15000.csv")
fullcorr_55_20000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_20000.csv")
fullcorr_55_25000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_25000.csv")
fullcorr_55_30000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_30000.csv")
fullcorr_55_35000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_35000.csv")
fullcorr_55_40000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_40000.csv")
fullcorr_55_45000 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_45000.csv")
fullcorr_55_49239 = pd.read_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_49239.csv")

Merge sub-tables

In [None]:
fcorr_55 = pd.concat([fullcorr_55_5000, fullcorr_55_10000, fullcorr_55_15000, fullcorr_55_20000, fullcorr_55_25000,
fullcorr_55_30000, fullcorr_55_35000, fullcorr_55_40000, fullcorr_55_45000, fullcorr_55_49239], axis=0)
fcorr_55.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_FULL.csv", index=False)

Exclude Instance 3 and leave only ID

In [None]:
# Exclude Instance 3
fcorr_55_instance_2 = fcorr_55[~fcorr_55["ID"].str.contains('_25751_3_0')].reset_index(drop=True)
# Extract ID number
fcorr_55_fin = fcorr_55_instance_2.copy()
fcorr_55_fin['ID'] = fcorr_55_fin['ID'].str.split("_").str[0].astype(int)
# Save
fcorr_55_fin.to_csv("/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_instance_2_FINAL.csv", index=False)
full_correlation_55 = fcorr_55_fin.copy()

In [None]:
fcorr_55 = pd.read_csv('/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_instance_2_FINAL.csv')

## 3. 21 IC, PARTIAL

In [None]:
path = "/Resting_State/rsfMRI_matrices/rfMRI-partial-25752/"
os.chdir(path) 
my_files = glob.glob(path + "*.txt")

file_names_21 = []

# Loop over the file list and get the base name of each file
for file in my_files:
    file_name = os.path.basename(file)
    file_names_21.append(file_name)

# Print the list of file names
print(file_names_21)

# Make a data frame
file_names_21_df = pd.DataFrame(file_names_21)
file_names_21_df.columns = ['filenames']
file_names_21_df_sorted = file_names_21_df.sort_values(by='filenames', ascending=True).reset_index(drop=True)
file_names_21_df_sorted

# Get IDs
eid_list = []
for file in file_names_21:
    parts = file.split("_")
    # Get the first element of the list
    eid = parts[0]
    eid_list.append(eid)

eid = pd.DataFrame(eid_list)
eid.columns = ['ID']
eid_sorted = eid.sort_values(by='ID', ascending=True).reset_index(drop=True)
eid_sorted_list = eid_sorted.values.tolist()

In [None]:
folder_path = "/Resting_State/rsfMRI_matrices/rfMRI-partial-25752/"
file_names_5000 = file_names_21_df_sorted["filenames"][0:5000]
file_names_10000 = file_names_21_df_sorted["filenames"][5000:10000]
file_names_15000 = file_names_21_df_sorted["filenames"][10000:15000]
file_names_20000 = file_names_21_df_sorted["filenames"][15000:20000]
file_names_25000 = file_names_21_df_sorted["filenames"][20000:25000]
file_names_30000 = file_names_21_df_sorted["filenames"][25000:30000]
file_names_35000 = file_names_21_df_sorted["filenames"][30000:35000]
file_names_40000 = file_names_21_df_sorted["filenames"][35000:40000]
file_names_45000 = file_names_21_df_sorted["filenames"][40000:45000]
file_names_49239 = file_names_21_df_sorted["filenames"][45000:]

participants = []

# Loop over the values in the filenames column of the data frame
for file_name in file_names_49239: # file_names_df_sorted_5["filenames"]:
    # Create the full file path by joining the folder path and the file name
    file_path = os.path.join(folder_path, file_name)
    # Read the file into a data frame
    p = pd.read_csv(file_path, sep="\\s+", header=None)
    # Create a list of column names
    prefix = "Component"
    p.columns = [ f'{prefix} {i+1} Partial corr. (21 IC)' for i in range(p.shape[1]) ]
    # Set the index of the data frame using the file name or a part of it
    p.index = [file_name.split(".")[0]]
    #p.reset_index(drop=True, inplace=True)
    # Append the data frame to the list
    participants.append(p)

# Concatenate all the data frames in the list into one big data frame along the column axis
pcorr_21_49239 = pd.concat(participants, axis=0)

# Save the big data frame as a CSV file
pcorr_21_49239.to_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_49239.csv", index=True, index_label="ID")

In [None]:
partcorr_21_5000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_5000.csv")
partcorr_21_10000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_10000.csv")
partcorr_21_15000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_15000.csv")
partcorr_21_20000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_20000.csv")
partcorr_21_25000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_25000.csv")
partcorr_21_30000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_30000.csv")
partcorr_21_35000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_35000.csv")
partcorr_21_40000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_40000.csv")
partcorr_21_45000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_45000.csv")
partcorr_21_49239 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_49239.csv")

In [None]:
pcorr_21 = pd.concat([partcorr_21_5000, partcorr_21_10000, partcorr_21_15000, partcorr_21_20000, partcorr_21_25000,
partcorr_21_30000, partcorr_21_35000, partcorr_21_40000, partcorr_21_45000, partcorr_21_49239], axis=0)
pcorr_21.to_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_FULL.csv", index=False)

In [None]:
# Exclude Instance 3
pcorr_21_instance_2 = pcorr_21[~pcorr_21["ID"].str.contains('_25752_3_0')].reset_index(drop=True)
# Extract ID number
pcorr_21_fin = pcorr_21_instance_2.copy()
pcorr_21_fin['ID'] = pcorr_21_fin['ID'].str.split("_").str[0].astype(int)
# Save
pcorr_21_fin.to_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_instance_2_FINAL.csv", index=False)
partial_correlation_21 = pcorr_21_fin.copy()

'Good' components
2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 45 46 48 49 50 52 53 57 58 60 63 64 93

In [None]:
path = "/Resting_State/rsfMRI_matrices/rfMRI-partial-25753/"
os.chdir(path) 
my_files = glob.glob(path + "*.txt")

file_names_55 = []

# Loop over the file list and get the base name of each file
for file in my_files:
    file_name = os.path.basename(file)
    file_names_55.append(file_name)

# Print the list of file names
print(file_names_55)

# Make a data frame
file_names_55_df = pd.DataFrame(file_names_55)
file_names_55_df.columns = ['filenames']
file_names_55_df_sorted = file_names_55_df.sort_values(by='filenames', ascending=True).reset_index(drop=True)
file_names_55_df_sorted

# Get IDs
eid_list = []
for file in file_names_55:
    parts = file.split("_")
    # Get the first element of the list
    eid = parts[0]
    eid_list.append(eid)

eid = pd.DataFrame(eid_list)
eid.columns = ['ID']
eid_sorted = eid.sort_values(by='ID', ascending=True).reset_index(drop=True)
eid_sorted_list = eid_sorted.values.tolist()
eid_sorted_list

In [None]:
folder_path = "/Rdata_no_idesting_State/rsfMRI_matrices/rfMRI-partial-25753/"

file_names_5000 = file_names_55_df_sorted["filenames"][0:5000]
file_names_10000 = file_names_55_df_sorted["filenames"][5000:10000]
file_names_15000 = file_names_55_df_sorted["filenames"][10000:15000]
file_names_20000 = file_names_55_df_sorted["filenames"][15000:20000]
file_names_25000 = file_names_55_df_sorted["filenames"][20000:25000]
file_names_30000 = file_names_55_df_sorted["filenames"][25000:30000]
file_names_35000 = file_names_55_df_sorted["filenames"][30000:35000]
file_names_40000 = file_names_55_df_sorted["filenames"][35000:40000]
file_names_45000 = file_names_55_df_sorted["filenames"][40000:45000]
file_names_49239 = file_names_55_df_sorted["filenames"][45000:]

participants = []

# Loop over the values in the filenames column of the data frame
for file_name in file_names_49239: # file_names_df_sorted_5["filenames"]:
    # Create the full file path by joining the folder path and the file name
    file_path = os.path.join(folder_path, file_name)
    # Read the file into a data frame
    p = pd.read_csv(file_path, sep="\\s+", header=None)
    # Create a list of column names
    prefix = "Component"
    p.columns = [ f'{prefix} {i+1} Partial corr. (55 IC)' for i in range(p.shape[1]) ]
    # Set the index of the data frame using the file name or a part of it
    p.index = [file_name.split(".")[0]]
    #p.reset_index(drop=True, inplace=True)
    # Append the data frame to the list
    participants.append(p)

# Concatenate all the data frames in the list into one big data frame along the column axis
pcorr_55_49239 = pd.concat(participants, axis=0)

# Save the big data frame as a CSV file
pcorr_55_49239.to_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_49239.csv", index=True, index_label="ID")


In [None]:
partcorr_55_5000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_5000.csv")
partcorr_55_10000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_10000.csv")
partcorr_55_15000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_15000.csv")
partcorr_55_20000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_20000.csv")
partcorr_55_25000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_25000.csv")
partcorr_55_30000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_30000.csv")
partcorr_55_35000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_35000.csv")
partcorr_55_40000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_40000.csv")
partcorr_55_45000 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_45000.csv")
partcorr_55_49239 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_49239.csv")

In [None]:
pcorr_55 = pd.concat([partcorr_55_5000, partcorr_55_10000, partcorr_55_15000, partcorr_55_20000, partcorr_55_25000,
partcorr_55_30000, partcorr_55_35000, partcorr_55_40000, partcorr_55_45000, partcorr_55_49239], axis=0)
pcorr_55.to_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_FULL.csv", index=False)

In [None]:
# Exclude Instance 3
pcorr_55_instance_2 = pcorr_55[~pcorr_55["ID"].str.contains('_25753_3_0')].reset_index(drop=True)
# Extract ID number
pcorr_55_fin = pcorr_55_instance_2.copy()
pcorr_55_fin['ID'] = pcorr_55_fin['ID'].str.split("_").str[0].astype(int)
# Save
pcorr_55_fin.to_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_instance_2_FINAL.csv", index=False)
partial_correlation_55 = pcorr_55_fin.copy()

Merge all dataframes into one if needed

In [None]:
rs_mri = full_correlation_21
for df in [full_correlation_55, partial_correlation_21, partial_correlation_55]:
    rs_mri = pd.merge(rs_mri, df, on='ID')
rs_mri.to_csv("/Resting_State/rsfMRI_matrices/rs_mri.csv", index=False)

## Rename columns according to ICs

For 21 ICs

In [None]:
lower_indices_21 = np.tril_indices(21, k=-1)
pairs_tril_21 = list(zip(lower_indices_21[0], lower_indices_21[1]))
new_pairs_21 = [(x+1, y+1) for x, y in pairs_tril_21]

# Create a dictionary for numbering according to IC
mapping_21 = {1: 1, 2: 2, 3: 3, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 
           11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 
           18: 19, 19:20,20:21,21:22}

new_pairs_21_ic = [(mapping_21[x], mapping_21[y]) for x, y in new_pairs_21]
new_pairs_21_ic_str = [f"{x} & {y}" for x, y in new_pairs_21_ic]
new_pairs_21_ic_str

For 55 ICs

In [None]:
lower_indices_55 = np.tril_indices(55, k=-1)
pairs_tril_55 = list(zip(lower_indices_55[0], lower_indices_55[1]))
new_pairs_55 = [(x+1, y+1) for x, y in pairs_tril_55]

# Create a dictionary for numbering according to IC
mapping_55 = {1:2, 2:3, 3:4, 4:5, 5:6, 6:7,
            7:8, 8:9, 9:10, 10:11, 11:12, 12:13,
            13:14, 14:15, 15:16, 16:17, 17:18, 18:19,
            19:20, 20:21, 21:22, 22:23, 23:24, 24:25,
            25:26, 26:27, 27:28, 28:29, 29:30, 30:31,
            31:32, 32:33, 33:34, 34:35, 35:36, 36:37,
            37:38, 38:39, 39:40, 40:41, 41:42, 42:43,
            43:45, 44:46, 45:48, 46:49, 47:50, 48:52,
            49:53, 50:57, 51:58, 52:60, 53:63, 54:64,
            55:93}

new_pairs_55_ic = [(mapping_55[x], mapping_55[y]) for x, y in new_pairs_55]
new_pairs_55_ic_str = [f"{x} & {y}" for x, y in new_pairs_55_ic]
new_pairs_55_ic_str

Exclude ID column first

In [None]:
full_correlation_21_no_id = full_correlation_21.iloc[:, 1:]
ic_21_names = [name + ' Full corr. (21IC)' for name in new_pairs_21_ic_str]
full_correlation_21_no_id.columns = ic_21_names
#full_correlation_21_no_id = full_correlation_21_no_id.set_axis(ic_21_names, axis=1)
full_correlation_21_no_id

Rename full correlation 21

In [None]:
full_correlation_21_ic = full_correlation_21.copy()
old_names = ["Component " + str(i) + " Full corr. (21 IC)" for i in range(1, 211)]
new_names = [name + ' Full corr. (21IC)' for name in new_pairs_21_ic_str]
name_map = dict(zip(old_names, new_names))
full_correlation_21_ic = full_correlation_21_ic.rename(columns=name_map)
full_correlation_21_ic

Rename full correlation 55

In [None]:
full_correlation_55_ic = full_correlation_55.copy()
old_names = ["Component " + str(i) + " Full corr. (55 IC)" for i in range(1, 1486)]
new_names = [name + ' Full corr. (55IC)' for name in new_pairs_55_ic_str]
name_map = dict(zip(old_names, new_names))
full_correlation_55_ic = full_correlation_55_ic.rename(columns=name_map)
full_correlation_55_ic

Rename partial correlation 21

In [None]:
partial_correlation_21_ic = partial_correlation_21.copy()
old_names = ["Component " + str(i) + " Partial corr. (21 IC)" for i in range(1, 211)]
new_names = [name + ' Partial corr. (21IC)' for name in new_pairs_21_ic_str]
name_map = dict(zip(old_names, new_names))
partial_correlation_21_ic = partial_correlation_21_ic.rename(columns=name_map)
partial_correlation_21_ic

Rename partial correlation 55

In [None]:
partial_correlation_55_ic = partial_correlation_55.copy()
old_names = ["Component " + str(i) + " Partial corr. (55 IC)" for i in range(1, 1486)]
new_names = [name + ' Partial corr. (55IC)' for name in new_pairs_55_ic_str]
name_map = dict(zip(old_names, new_names))
partial_correlation_55_ic = partial_correlation_55_ic.rename(columns=name_map)
partial_correlation_55_ic

## An easier way to rename columns

In [None]:
amplitudes_21 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/21/amplitudes_21_fin_CLEAN.csv")
amplitudes_55 = pd.read_csv("/Resting_State/rsfMRI_matrices/amplitudes_tables/55/amplitudes_55_fin_CLEAN.csv")
full_correlation_21 = pd.read_csv('/Resting_State/rsfMRI_matrices/full_corr_tables/21/fcorr_21_instance_2_FINAL.csv')
full_correlation_55 = pd.read_csv('/Resting_State/rsfMRI_matrices/full_corr_tables/55/fcorr_55_instance_2_FINAL.csv')
partial_correlation_21 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/21/pcorr_21_instance_2_FINAL.csv")
partial_correlation_55 = pd.read_csv("/Resting_State/rsfMRI_matrices/partial_corr_tables/55/pcorr_55_instance_2_FINAL.csv")

In [None]:
def generate_column_names_21(components):
    column_names = []
    for i in range(1, len(components)):
        for j in range(i):
            column_names.append(f'Component {components[i]} & Component {components[j]} (21 IC)')
    return column_names

def generate_column_names_55(components):
    column_names = []
    for i in range(1, len(components)):
        for j in range(i):
            column_names.append(f'Component {components[i]} & Component {components[j]} (55 IC)')
    return column_names

# Original components
good_21_orig = [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
good_55_orig = [2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                40, 41, 42, 43, 45, 46, 48, 49, 50, 52, 53, 57, 58, 60, 63, 64, 93]


# Generate column names
column_names_21 = generate_column_names_21(good_21_orig)
column_names_55 = generate_column_names_55(good_55_orig)

In [None]:
dfs = ['full_correlation_21', 'partial_correlation_21']
for df in dfs:
    globals()[df].columns = ['ID'] + column_names_21
    # Save the updated DataFrames back to CSV files
    globals()[df].to_csv(f'/Resting_State/rsfMRI_matrices/{df}_fin_renamed', index=False)
    print(f"Columns renamed for fold {df}")

In [None]:
dfs_55 = ['full_correlation_55', 'partial_correlation_55']
for df in dfs_55:
    globals()[df].columns = ['ID'] + column_names_55
    # Save the updated DataFrames back to CSV files
    globals()[df].to_csv(f'/Resting_State/rsfMRI_matrices/{df}_fin_renamed', index=False)
    print(f"Columns renamed for fold {df}")

# rsMRI confounds!

[Forum](https://www.jiscmail.ac.uk/cgi-bin/webadmin?A0=UKB-NEUROIMAGING)

# List of confounds from DTI

- Head size: 25000
- Site: 54

*Other*
- Discrepancy between dMRI brain image and T1 brain image: 25737
- dMRIScaling: 25928 (Intensity scaling for dMRI)
- Acquisition date: 53

*Table position*
- X-position of centre-of-gravity of brain mask in scanner co-ordinates: 25756
- Z-position of centre-of-gravity of brain mask in scanner co-ordinates: 25758
- Y-position of back of brain mask in scanner co-ordinates: 25757
- Scanner table position: 25759 (Z-co-ordinate of the coil (and the scanner table that the coil sits on) within the scanner. The Z axis points down the centre of the magnet) / -999999 represents "Measure not cleanly recoverable from data"

*Head Motion*
- 90th percentile of relative head motion from dMRI: 24455
- 90th percentile of absolute head motion from dMRI: 24452
- Mean absolute head motion from dMRI: 24450
- Mean relative head motion from dMRI: 24453
- Median absolute head motion from dMRI: 24451
- Median relative head motion from dMRI: 24454
- STRUCT MOTION: 24419

*Eddy*
- Number of slices that Eddy estimated to be outliers in dMRI data: 24456
- Number of dMRI outlier slices detected and corrected: 25746
- New Eddy: 25921 (Whether increased search space in eddy current estimation was used for dMRI)
- YTranslation: 25922 (Standard deviation of apparent translation in the Y axis as measured by eddy)

- Discrepancy between T1 brain image and standard-space brain template (linearly-aligned): 25731
- Discrepancy between T1 brain image and standard-space brain template (nonlinearly-aligned): 25732
- Mean rfMRI head motion, averaged across space and time points: 25741
- Mean tfMRI head motion, averaged across space and time points: 25742

[Forum](https://www.jiscmail.ac.uk/cgi-bin/webadmin?A0=UKB-NEUROIMAGING)

# List of confounds for rsMRI

**Main**

- Head size: 25000
- Site: 54

*Head motion*
- Mean rfMRI head motion, averaged across space and time points: 25741
- Mean absolute head motion from rfMRI 24438
- Mean of mean (across space) in-plane displacement from rfMRI 24457
- Mean of mean (across space) of Z displacement from rfMRI 24460
- Mean relative head motion from rfMRI 24441
- 90th percentile of absolute head motion from rfMRI 24440	
- 90th percentile of mean (across space) in-plane displacement from rfMRI 24459	
- 90th percentile of mean (across space) of Z displacement from rfMRI 24462	
- 90th percentile of mean (across time) in-plane displacement from rfMRI 24464	
- 90th percentile of mean (across time) of Z displacement from rfMRI 24466	
- 90th percentile of relative head motion from rfMRI 24443
- Median absolute head motion from rfMRI 24439
- Median of mean (across space) in-plane displacement from rfMRI 24458
- Median of mean (across space) of Z displacement from rfMRI 24461
- Median of mean (across time) in-plane displacement from rfMRI 24463
- Median of mean (across time) of Z displacement from rfMRI 24465
- Median relative head motion from rfMRI 24442

*Non-rs Head motion*
- STRUCT MOTION: 24419

rs-specific confounds
- Inverted temporal signal-to-noise ratio in artefact-cleaned pre-processed rfMRI 25744
- Discrepancy between rfMRI brain image and T1 brain image 25739
- Intensity scaling for rfMRI 25929	

*Other*
- Acquisition date: 53


*Table position - from structural brain MRI*
- X-position of centre-of-gravity of brain mask in scanner co-ordinates: 25756
- Z-position of centre-of-gravity of brain mask in scanner co-ordinates: 25758
- Y-position of back of brain mask in scanner co-ordinates: 25757
- Scanner table position: 25759 (Z-co-ordinate of the coil (and the scanner table that the coil sits on) within the scanner. The Z axis points down the centre of the magnet) / -999999 represents "Measure not cleanly recoverable from data"
- Discrepancy between T1 brain image and standard-space brain template (linearly-aligned): 25731
- Discrepancy between T1 brain image and standard-space brain template (nonlinearly-aligned): 25732


EXCLUDE
25923	Echo Time for rfMRI

Absolute head motion = referenced to middle time-point, i.e., from a reference position
Relative head motion = compared with previous time-point, i.e., between consecutive volumes

ALL RS CONFOUNDS

- 24440	90th percentile of absolute head motion from rfMRI
- 24459	90th percentile of mean (across space) in-plane displacement from rfMRI
- 24462	90th percentile of mean (across space) of Z displacement from rfMRI
- 24464	90th percentile of mean (across time) in-plane displacement from rfMRI
- 24466	90th percentile of mean (across time) of Z displacement from rfMRI
- 24443	90th percentile of relative head motion from rfMRI
- 24434	DVARS 90th percentile D from cleaned rfMRI
- 24425	DVARS 90th percentile D from uncleaned rfMRI
- 24431	DVARS 90th percentile S from cleaned rfMRI
- 24422	DVARS 90th percentile S from uncleaned rfMRI
- 24437	DVARS 90th percentile SD from cleaned rfMRI
- 24428	DVARS 90th percentile SD from uncleaned rfMRI
- 24432	DVARS Mean D from cleaned rfMRI
- 24423	DVARS Mean D from uncleaned rfMRI
- 24429	DVARS Mean S from cleaned rfMRI
- 24420	DVARS Mean S from uncleaned rfMRI
- 24435	DVARS Mean SD from cleaned rfMRI
- 24426	DVARS Mean SD from uncleaned rfMRI
- 24433	DVARS Median D from cleaned rfMRI
- 24424	DVARS Median D from uncleaned rfMRI
- 24430	DVARS Median S from cleaned rfMRI
- 24421	DVARS Median S from uncleaned rfMRI
- 24436	DVARS Median SD from cleaned rfMRI
- 24427	DVARS Median SD from uncleaned rfMRI
- 25923	Echo Time for rfMRI
- 25929	Intensity scaling for rfMRI
- 24438	Mean absolute head motion from rfMRI
- 24457	Mean of mean (across space) in-plane displacement from rfMRI
- 24460	Mean of mean (across space) of Z displacement from rfMRI
- 24441	Mean relative head motion from rfMRI
- 24439	Median absolute head motion from rfMRI
- 24458	Median of mean (across space) in-plane displacement from rfMRI
- 24461	Median of mean (across space) of Z displacement from rfMRI
- 24463	Median of mean (across time) in-plane displacement from rfMRI
- 24465	Median of mean (across time) of Z displacement from rfMRI
- 24442	Median relative head motion from rfMRI
- 25739	Discrepancy between rfMRI brain image and T1 brain image
- 25741	Mean rfMRI head motion, averaged across space and time points
- 25744	Inverted temporal signal-to-noise ratio in artefact-cleaned pre-processed rfMRI
- 25743	Inverted temporal signal-to-noise ratio in pre-processed rfMRI

RS CONFOUNDS TO CHECK

- 24440	90th percentile of absolute head motion from rfMRI
- 24459	90th percentile of mean (across space) in-plane displacement from rfMRI
- 24462	90th percentile of mean (across space) of Z displacement from rfMRI
- 24464	90th percentile of mean (across time) in-plane displacement from rfMRI
- 24466	90th percentile of mean (across time) of Z displacement from rfMRI
- 24443	90th percentile of relative head motion from rfMRI
- 24434	DVARS 90th percentile D from cleaned rfMRI
- 24431	DVARS 90th percentile S from cleaned rfMRI
- 24437	DVARS 90th percentile SD from cleaned rfMRI
- 24432	DVARS Mean D from cleaned rfMRI
- 24429	DVARS Mean S from cleaned rfMRI
- 24435	DVARS Mean SD from cleaned rfMRI
- 24433	DVARS Median D from cleaned rfMRI
- 24430	DVARS Median S from cleaned rfMRI
- 24436	DVARS Median SD from cleaned rfMRI
- 25923	Echo Time for rfMRI
- 25929	Intensity scaling for rfMRI
- 24438	Mean absolute head motion from rfMRI
- 24457	Mean of mean (across space) in-plane displacement from rfMRI
- 24460	Mean of mean (across space) of Z displacement from rfMRI
- 24441	Mean relative head motion from rfMRI
- 24439	Median absolute head motion from rfMRI
- 24458	Median of mean (across space) in-plane displacement from rfMRI
- 24461	Median of mean (across space) of Z displacement from rfMRI
- 24463	Median of mean (across time) in-plane displacement from rfMRI
- 24465	Median of mean (across time) of Z displacement from rfMRI
- 24442	Median relative head motion from rfMRI
- 25739	Discrepancy between rfMRI brain image and T1 brain image
- 25741	Mean rfMRI head motion, averaged across space and time points
- 25744	Inverted temporal signal-to-noise ratio in artefact-cleaned pre-processed rfMRI
- 25743	Inverted temporal signal-to-noise ratio in pre-processed rfMRI

[Cole: Multimodality neuroimaging brain-age in UK biobank: relationship to biomedical, lifestyle, and cognitive factors](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7280786/)

[Predicting sex, age, general cognition and mental health with machine learning on brain structural connectomes](https://onlinelibrary.wiley.com/doi/full/10.1002/hbm.26182)

age2, sex, height, volumetric scaling from T1-weighted MRI to standard (data field #25000), and mean task fMRI head motion (averaged across space and time points; data field #25742) as covariates

In [None]:
csv_path = '/FULL/ukb.csv'
ukb = ukbiobank.ukbio(ukb_csv=csv_path)

In [None]:
df_rs_conf = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
25000,
54,
53,
25758,
25756,
25757,
25759,
25731,
25732,
25923,
25929,
25739,
25741,
25744,
25743
], instance=2)
#24419 STRUCT MOTION
#The following variables were not found:
#[24440, 24459, 24462, 24464, 24466, 24443, 24434, 24431, 24437, 24432, 24429, 24435, 24433, 24430, 24436, 24438, 24457, 24460, 24441, 24439, 24458, 24461, 24463, 24465, 24442]

In [None]:
rs_conf = addFields(ukbio=ukb, df=df_rs_conf, fields=['eid',
25000,
54,
53,
25758,
25756,
25757,
25759,
25731,
25732,
25923,
25929,
25739,
25741,
25744,
25743], instances=2)

In [None]:
rs_conf_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_rs_conf)

**Get confounds that could not be uploaded from the main csv file**

- 24419 STRUCT MOTION
- 24456 X-position of centre-of-gravity of brain mask in scanner co-ordinates
- 24440, 24459, 24462, 24464, 24466, 24443, 24434, 24431, 24437, 24432, 24429, 24435, 24433, 24430, 24436, 24438, 24457, 24460, 24441, 24439, 24458, 24461, 24463, 24465, 24442

Now need to get from the UK Biobank desired fields

In [None]:
rs_conf_add = pd.read_csv('/Oct_2023_resting_conf/ukb.csv')
rs_conf_add_selected = rs_conf_add[['eid',
                                    '24419-2.0',
                                    '24440-2.0',
                                    '24459-2.0',
                                    '24462-2.0',
                                    '24464-2.0',
                                    '24466-2.0',
                                    '24443-2.0', 
                                    '24434-2.0',
                                    '24431-2.0',
                                    '24437-2.0',
                                    '24432-2.0',
                                    '24429-2.0',
                                    '24435-2.0',
                                    '24433-2.0',
                                    '24430-2.0', 
                                    '24436-2.0', 
                                    '24438-2.0',
                                    '24457-2.0',
                                    '24460-2.0',
                                    '24441-2.0',
                                    '24439-2.0',
                                    '24458-2.0',
                                    '24461-2.0',
                                    '24463-2.0',
                                    '24465-2.0',
                                    '24442-2.0']]

Rename

In [None]:
rs_conf_add_selected.columns = ['eid',
                                    'Struct. motion',
                                    '90th percentile of absolute head motion from rfMRI',
                                    '90th percentile of mean (across space) in-plane displacement from rfMRI',
                                    '90th percentile of mean (across space) of Z displacement from rfMRI',
                                    '90th percentile of mean (across time) in-plane displacement from rfMRI',
                                    '90th percentile of mean (across time) of Z displacement from rfMRI',
                                    '90th percentile of relative head motion from rfMRI', 
                                    'DVARS 90th percentile D from cleaned rfMRI',
                                    'DVARS 90th percentile S from cleaned rfMRI',
                                    'DVARS 90th percentile SD from cleaned rfMRI',
                                    'DVARS Mean D from cleaned rfMRI',
                                    'DVARS Mean S from cleaned rfMRI',
                                    'DVARS Mean SD from cleaned rfMRI',
                                    'DVARS Median D from cleaned rfMRI',
                                    'DVARS Median S from cleaned rfMRI', 
                                    'DVARS Median SD from cleaned rfMRI', 
                                    'Mean absolute head motion from rfMRI',
                                    'Mean of mean (across space) in-plane displacement from rfMRI',
                                    'Mean of mean (across space) of Z displacement from rfMRI',
                                    'Mean relative head motion from rfMRI',
                                    'Median absolute head motion from rfMRI',
                                    'Median of mean (across space) in-plane displacement from rfMRI',
                                    'Median of mean (across space) of Z displacement from rfMRI',
                                    'Median of mean (across time) in-plane displacement from rfMRI',
                                    'Median of mean (across time) of Z displacement from rfMRI',
                                    'Median relative head motion from rfMRI']

#24440	90th percentile of absolute head motion from rfMRI
#24459	90th percentile of mean (across space) in-plane displacement from rfMRI
#24462	90th percentile of mean (across space) of Z displacement from rfMRI
#24464	90th percentile of mean (across time) in-plane displacement from rfMRI
#24466	90th percentile of mean (across time) of Z displacement from rfMRI
#24443	90th percentile of relative head motion from rfMRI
#24434	DVARS 90th percentile D from cleaned rfMRI
#24431	DVARS 90th percentile S from cleaned rfMRI
#24437	DVARS 90th percentile SD from cleaned rfMRI
#24432	DVARS Mean D from cleaned rfMRI
#24429	DVARS Mean S from cleaned rfMRI
#24435	DVARS Mean SD from cleaned rfMRI
#24433	DVARS Median D from cleaned rfMRI
#24430	DVARS Median S from cleaned rfMRI
#24436	DVARS Median SD from cleaned rfMRI
#25923	Echo Time for rfMRI
#25929	Intensity scaling for rfMRI
#24438	Mean absolute head motion from rfMRI
#24457	Mean of mean (across space) in-plane displacement from rfMRI
#24460	Mean of mean (across space) of Z displacement from rfMRI
#24441	Mean relative head motion from rfMRI
#24439	Median absolute head motion from rfMRI
#24458	Median of mean (across space) in-plane displacement from rfMRI
#24461	Median of mean (across space) of Z displacement from rfMRI
#24463	Median of mean (across time) in-plane displacement from rfMRI
#24465	Median of mean (across time) of Z displacement from rfMRI
#24442	Median relative head motion from rfMRI
#25739	Discrepancy between rfMRI brain image and T1 brain image
#25741	Mean rfMRI head motion, averaged across space and time points
#25744	Inverted temporal signal-to-noise ratio in artefact-cleaned pre-processed rfMRI
#25743	Inverted temporal signal-to-noise ratio in pre-processed rfMRI

In [None]:
rs_confounds_full = rs_conf_names.merge(rs_conf_add_selected, how='inner', on='eid')
rs_confounds_full.to_csv(r'/ML_DATASETS/Brain/rsMRI/rs_confounds_full.csv', index=False)
rs_confounds_full = rs_confounds_full.rename(columns={'eid': 'ID'})

Explore confounds

In [None]:
from scipy.stats import pearsonr, spearmanr
rs_confounds_full_nona = rs_confounds_full.copy().dropna(axis=0)
rho, p = spearmanr(rs_confounds_full_nona)
p = pd.DataFrame(p, columns = rs_confounds_full_nona.columns, index=rs_confounds_full_nona.columns)
p = p.T
rho = pd.DataFrame(rho, columns = rs_confounds_full_nona.columns, index=rs_confounds_full_nona.columns)
rho = rho.T
rho

Extract only significant rho

In [None]:
rho_sign = rho.where(rho > 0.7)
with pd.option_context('display.max_columns', None):
    display(rho_sign)

Extract correlations that are aboe 0.7

In [None]:
#rho_sign = [rho[col][rho[col] > 0.7].values for col in rho.columns]
for i, col in enumerate(rho.columns):
    rho_sign = rho[col][(rho[col] > 0.7) & (rho[col] < 1.0)]
    if not rho_sign.isna().all():
        positions = [rho.columns.get_loc(label) for label in rho_sign.index]
        print(col, 'is correlated with', rho.columns[positions], 'with values', rho_sign.values)

# Full cycle to get confounds

In [None]:
import datetime
rs_confounds = pd.read_csv('/ML_DATASETS/Brain/rsMRI/rs_confounds_full.csv')
rs_confounds.columns = rs_confounds.columns.str.replace("-2.0", "")
rs_confounds_nona = rs_confounds.dropna(axis=0).reset_index(drop=True)
rs_confounds_nona['Date of attending assessment centre'] = pd.to_datetime(rs_confounds_nona['Date of attending assessment centre'], format="%Y-%m-%d")  #"%m/%d/%Y")
rs_confounds_nona['Date of attending assessment centre'] = rs_confounds_nona['Date of attending assessment centre'].apply(datetime.datetime.timestamp)
# Round values
rs_confounds_nona['Date of attending assessment centre'] = rs_confounds_nona['Date of attending assessment centre'].apply(int)
rs_confounds_nona_dummy = pd.get_dummies(rs_confounds_nona, columns=['UK Biobank assessment centre'], dtype=int)
rs_confounds_nona_dummy

rs_confounds_fin = rs_confounds_nona_dummy.drop(columns=['90th percentile of mean (across space) in-plane displacement from rfMRI',
                                    '90th percentile of mean (across space) of Z displacement from rfMRI',
                                    '90th percentile of mean (across time) in-plane displacement from rfMRI',
                                    '90th percentile of mean (across time) of Z displacement from rfMRI',
                                    '90th percentile of relative head motion from rfMRI',
                                    'DVARS Median SD from cleaned rfMRI',
                                    'Mean absolute head motion from rfMRI',
                                    'Mean of mean (across space) in-plane displacement from rfMRI',
                                    'Mean of mean (across space) of Z displacement from rfMRI',
                                    'Mean relative head motion from rfMRI',
                                    'Median of mean (across space) in-plane displacement from rfMRI',
                                    'Median of mean (across space) of Z displacement from rfMRI',
                                    'Median of mean (across time) in-plane displacement from rfMRI',
                                    'Median of mean (across time) of Z displacement from rfMRI',
                                    '90th percentile of absolute head motion from rfMRI'])
rs_confounds_fin.to_csv('/ML_DATASETS/Brain/rsMRI/rs_confounds_fin_full.csv', index=False)

Drop these two:

- 24463	Median of mean (across time) in-plane displacement from rfMRI	Resting functional brain MRI  
- 24465	Median of mean (across time) of Z displacement from rfMRI	Resting functional brain MRI  

because they are highly correlated with 'Mean rfMRI head motion, averaged across space and time points'