In [1]:
import os
import re
import pandas as pd

# Define the directory path
directory_path = '/home/fgomezacebo/lood_storage/divi/Projects/fMRS/analysis/BIDS/data/derivatives/v_20.1.1/postprocessing_rsfmri_nocosine_smoothing_delvols/Firstlevel_pseudoblock/Higher_level/results/'

In [2]:
import re

def parse_filename(filename):
    """
    Extracts session, modality, contrast, and type from a filename.
    
    Filename format examples:
    - Autoaq_ses-high_m2_c1_thresh.nii.gz_cortical.txt
    - Autoaq_plac_high_m1_c2_thresh.nii.gz_cortical.txt
    """
    # Adjusted regex pattern to better capture varied session indicators
    pattern = re.compile(
        r'Autoaq_(?:ses-)?(high|low|plac)_(?:ses-)?(high|low)?_?(m\d)_(c\d)_thresh\.nii\.gz_(cortical|subcortical)\.txt$'
    )
    match = pattern.search(filename)
    if match:
        session1 = match.group(1)
        session2 = match.group(2)
        modality = match.group(3)
        contrast = match.group(4)
        type_ = match.group(5)
        
        # Determine the correct session to use based on captured groups
        session = session2 if session2 else session1
        
        return session, modality, contrast, type_
    else:
        return None, None, None, None


In [3]:
files_to_process = []
for filename in os.listdir(directory_path):
    if filename.endswith('_cortical.txt') or filename.endswith('_subcortical.txt'):
        print(parse_filename(filename))
        print(filename)

('high', 'm2', 'c1', 'cortical')
Autoaq_ses-high_m2_c1_thresh.nii.gz_cortical.txt
('high', 'm1', 'c2', 'cortical')
Autoaq_plac_high_m1_c2_thresh.nii.gz_cortical.txt
('plac', 'm3', 'c2', 'subcortical')
Autoaq_ses-plac_m3_c2_thresh.nii.gz_subcortical.txt
('plac', 'm8', 'c2', 'cortical')
Autoaq_ses-plac_m8_c2_thresh.nii.gz_cortical.txt
('low', 'm2', 'c2', 'subcortical')
Autoaq_plac_low_m2_c2_thresh.nii.gz_subcortical.txt
('high', 'm7', 'c2', 'cortical')
Autoaq_plac_high_m7_c2_thresh.nii.gz_cortical.txt
('high', 'm5', 'c2', 'cortical')
Autoaq_plac_high_m5_c2_thresh.nii.gz_cortical.txt
('high', 'm5', 'c2', 'subcortical')
Autoaq_plac_high_m5_c2_thresh.nii.gz_subcortical.txt
('high', 'm2', 'c2', 'subcortical')
Autoaq_plac_high_m2_c2_thresh.nii.gz_subcortical.txt
('plac', 'm3', 'c2', 'cortical')
Autoaq_ses-plac_m3_c2_thresh.nii.gz_cortical.txt
('plac', 'm6', 'c2', 'cortical')
Autoaq_ses-plac_m6_c2_thresh.nii.gz_cortical.txt
('low', 'm2', 'c1', 'subcortical')
Autoaq_ses-low_m2_c1_thresh.nii.gz_

In [4]:
def process_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Find indices of lines with "------------------------------------------"
    divider_indices = [i for i, line in enumerate(lines) if line.strip() == '-'*42]
    
    # Assuming the relevant data is outside of these dividers
    relevant_lines = lines[:divider_indices[0]] + lines[divider_indices[1]+1:]
    
    # Extracting the initial table data
    cluster_table_lines = [line.strip() for line in relevant_lines if line[0].isdigit() or line.startswith('ClusterIndex')]
    cluster_table_df = pd.DataFrame([line.split('\t') for line in cluster_table_lines[1:]],
                                    columns=cluster_table_lines[0].split('\t'))
    
    # Extracting structures per cluster, organized in a neat table
    structures_data = {}
    current_cluster = ''
    for line in relevant_lines:
        if line.startswith('Cluster #'):
            current_cluster = line.strip().replace('Cluster #', '').replace(':', '').strip()
            structures_data[current_cluster] = []
        elif current_cluster and line.strip():  # Only process if we're within a cluster section
            parts = line.strip().split(':')
            if len(parts) == 2:  # Ensure line is formatted as expected
                structures_data[current_cluster].append(parts)
    
    structures_df_dict = {cluster: pd.DataFrame(data, columns=['Structure', 'Volume Percentage']) for cluster, data in structures_data.items() if data}
    
    return cluster_table_df, structures_df_dict

In [5]:
for filename in os.listdir(directory_path):
    if filename.endswith('cortical.txt'):  # Filter for specific files
        condition, contrast_modality = parse_filename(filename)
        file_path = os.path.join(directory_path, filename)
        table_data, structures_data = process_file(file_path)
        
        # Here you can do something with the extracted data
        print(f"Condition: {condition}, Contrast/Modality: {contrast_modality}")
        print(f"Table Data for {filename}: {table_data}")
        print(f"Structures Data for {filename}: {structures_data}")

        

ValueError: too many values to unpack (expected 2)

In [6]:
cluster_table, structures_dfs = process_file(file_path)

print("Cluster Table:")
print(cluster_table)
print("\nStructures Data Frames:")
for cluster, df in structures_dfs.items():
    print(f"\n{cluster}:")
    print(df)

NameError: name 'file_path' is not defined

In [7]:
files_to_process = []
for filename in os.listdir(directory_path):
    if filename.endswith('_cortical.txt') or filename.endswith('_subcortical.txt'):
        file_path = os.path.join(directory_path, filename)
        files_to_process.append(file_path)

In [8]:
all_clusters = pd.DataFrame()
all_structures = pd.DataFrame()

for file_path in files_to_process:
    # Extract the session, modality, contrast, and cortex type from the filename
    session, modality, contrast, cortex_type = parse_filename(os.path.basename(file_path))
    # Process the file to obtain cluster and structure data
    cluster_table, structures_dfs = process_file(file_path)

    # Enrich cluster data with session, modality, contrast, and type
    cluster_table['Session'] = session
    cluster_table['Modality'] = modality
    cluster_table['Contrast'] = contrast
    cluster_table['Type'] = cortex_type
    # Append the enriched data to the aggregate DataFrame
    all_clusters = pd.concat([all_clusters, cluster_table], ignore_index=True)

    # Process and accumulate structure data for each cluster
    for cluster, df in structures_dfs.items():
        df['Cluster'] = cluster
        df['Session'] = session
        df['Modality'] = modality
        df['Contrast'] = contrast
        df['Type'] = cortex_type
        all_structures = pd.concat([all_structures, df], ignore_index=True)

# Sort all_clusters and all_structures DataFrames
# Ensure that 'Cluster' is an integer for correct sorting
all_clusters['ClusterIndex'] = pd.to_numeric(all_clusters['ClusterIndex'], errors='coerce')
all_structures['Cluster'] = pd.to_numeric(all_structures['Cluster'], errors='coerce')

# Sort by session, modality, contrast, type, and then by cluster index/number in descending order
all_clusters_sorted = all_clusters.sort_values(by=['Session', 'Modality', 'Contrast', 'Type', 'ClusterIndex'], ascending=[True, True, True, True, False])
all_structures_sorted = all_structures.sort_values(by=['Session', 'Modality', 'Contrast', 'Type', 'Cluster'], ascending=[True, True, True, True, False])

In [9]:
print(all_clusters_sorted)

     ClusterIndex Voxels     MAX MAX X (mm) MAX Y (mm) MAX Z (mm) COG X (mm)  \
557             8  31423  0.0498        -48        -58         18       1.25   
558             7   1120  0.0496         34        -82        -34      -2.31   
559             6     31  0.0484         10        -14          6       4.44   
560             5     26  0.0499        -36        -62        -24      -42.5   
561             4     24  0.0499        -44        -52         56      -41.5   
..            ...    ...     ...        ...        ...        ...        ...   
332             5     15  0.0495        -14        -32         74      -14.3   
333             4     12  0.0494        -64        -18         -2        -63   
334             3     11  0.0496          8         24         56       5.66   
335             2      4  0.0496         -8         40         36         -7   
336             1      4  0.0497        -30         20         42        -31   

    COG Y (mm) COG Z (mm) Session Modal

In [10]:
# For the all_clusters DataFrame
cluster_columns = ['Session', 'Modality', 'Contrast', 'Type', 'ClusterIndex']
# Ensure all other columns are included without manual listing
other_columns = [col for col in all_clusters_sorted.columns if col not in cluster_columns]
# Reorder the columns
all_clusters_sorted = all_clusters_sorted[cluster_columns + other_columns]

# For the all_structures DataFrame
structure_columns = ['Session', 'Modality', 'Contrast', 'Type', 'Cluster']
# Ensure all other columns are included without manual listing
other_columns_structures = [col for col in all_structures_sorted.columns if col not in structure_columns]
# Reorder the columns
all_structures_sorted = all_structures_sorted[structure_columns + other_columns_structures]

In [11]:
all_clusters_sorted = all_clusters_sorted.drop('ClusterIndex', axis=1)

In [12]:
print(all_clusters_sorted)

    Session Modality Contrast         Type Voxels     MAX MAX X (mm)  \
557    high       m1       c1     cortical  31423  0.0498        -48   
558    high       m1       c1     cortical   1120  0.0496         34   
559    high       m1       c1     cortical     31  0.0484         10   
560    high       m1       c1     cortical     26  0.0499        -36   
561    high       m1       c1     cortical     24  0.0499        -44   
..      ...      ...      ...          ...    ...     ...        ...   
332    plac       m8       c2  subcortical     15  0.0495        -14   
333    plac       m8       c2  subcortical     12  0.0494        -64   
334    plac       m8       c2  subcortical     11  0.0496          8   
335    plac       m8       c2  subcortical      4  0.0496         -8   
336    plac       m8       c2  subcortical      4  0.0497        -30   

    MAX Y (mm) MAX Z (mm) COG X (mm) COG Y (mm) COG Z (mm)  
557        -58         18       1.25      -2.95       27.3  
558        -8

In [14]:
# Save all_clusters_sorted DataFrame as a CSV file
all_clusters_sorted.to_csv('all_phMRS_clusters_sorted.csv', index=False)

# Save all_structures_sorted DataFrame as a CSV file
all_structures_sorted.to_csv('all_phMRSstructures_sorted.csv', index=False)


In [None]:
`1qqwq