## Step 2: Split Files Based on AD
Here, separate the processed files into different directories based on the gender of the individual associated with each file.


In [1]:
import os
import shutil
import pandas as pd

map_dict_path = 'ROSMAP-graph-data/survival_label_map_dict.csv'
map_dict_df = pd.read_csv(map_dict_path)
num_to_id_dict = pd.Series(map_dict_df['individualID'].values, index=map_dict_df['individualID_Num']).to_dict()

label_path = 'ROSMAP-graph-data/survival-label.csv'
label_df = pd.read_csv(label_path)
id_to_AD_dict = pd.Series(label_df['ceradsc'].values, index=label_df['individualID']).to_dict()

survival_dir = './ROSMAP-analysis/avg/'
files = os.listdir(survival_dir)

AD_dir = './ROSMAP-analysis/fold_0/AD'
NOAD_dir = './ROSMAP-analysis/fold_0/NOAD'

os.makedirs(AD_dir, exist_ok=True)
os.makedirs(NOAD_dir, exist_ok=True)

for file in files:
    if file.endswith('.csv'):
        num = int(file.split('survival')[1].split('.csv')[0])

        if num in num_to_id_dict:
            individual_id = num_to_id_dict[num]

            if individual_id in id_to_AD_dict:
                ad = id_to_AD_dict[individual_id]

                if ad == 0:
                    shutil.copy(os.path.join(survival_dir, file), os.path.join(NOAD_dir, file))
                elif ad == 1:
                    shutil.copy(os.path.join(survival_dir, file), os.path.join(AD_dir, file))

## Step 3: Merge Files and Calculate Average Attention
Finally, merge the filtered files and compute the average attention across them, based on certain key columns.


In [2]:
import os
import pandas as pd

def merge_and_average_attention(folder_path):
    all_data = []
    key_columns = ['From', 'To', 'EdgeType']
    
    # Read each file and collect the data
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            
            # Select relevant columns, ensuring 'individualID' is not included
            if 'individualID' in df.columns:
                df = df.drop(columns=['individualID'])
            
            # Check if all necessary columns are present
            if all(col in df.columns for col in key_columns + ['Attention']):
                all_data.append(df)
            else:
                print(f"File {filename} is missing one of the required columns.")
    
    # Concatenate all the dataframes in the list
    if not all_data:
        print("No valid files to process.")
        return None
    
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Group by the key columns and calculate the mean of 'Attention'
    result_df = combined_df.groupby(key_columns)['Attention'].mean().reset_index()
    
    return result_df

# Process the female data
female_folder_path = './ROSMAP-analysis/fold_0/NOAD/'

female_result_df = merge_and_average_attention(female_folder_path)

if female_result_df is not None:
    female_result_df.to_csv('./ROSMAP-analysis/fold_0/average_attention_NOAD.csv', index=False)
    print("File saved to 'average_attention_NOAD.csv'")
else:
    print("No valid data found to process.")

# Process the male data
male_folder_path = './ROSMAP-analysis/fold_0/AD/'

male_result_df = merge_and_average_attention(male_folder_path)

if male_result_df is not None:
    male_result_df.to_csv('./ROSMAP-analysis/fold_0/average_attention_AD.csv', index=False)
    print("File saved to 'average_attention_AD.csv'")
else:
    print("No valid data found to process.")


File saved to 'average_attention_NOAD.csv'
File saved to 'average_attention_AD.csv'
