In [1]:
import pandas as pd
import os

def add_subject_column(csv_path):
    pd.read_csv(csv_path).assign(subject=lambda df: df.iloc[:, 0].apply(lambda x: os.path.basename(x).split('.nii')[0])).to_csv(csv_path.replace('.csv', '_and_subjects.csv'), index=False)

# Example usage:
# add_subject_column('your_file_path_here.csv')


In [2]:
csv_path = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/analyses/paths_to_niftis_for_elmira'

In [3]:
add_subject_column(csv_path)

Merge two CSVs

In [13]:
csv_1 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/cleaned_information/all_adni_patient_diagnoses.csv'
csv_2 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/nifti_paths/paths_to_t1_files_and_subjects.csv'
merge_column = 'PTID'
save_path = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/cleaned_information/master_spreadsheet.csv'

Unconditional Merge

In [12]:
merged_df = pd.merge(pd.read_csv(csv_1), pd.read_csv(csv_2), on=merge_column).to_csv(save_path, index=False)


Conditional Merge

In [48]:
csv_1 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/cleaned_information/master_spreadsheet_6mo_12mo_change.csv'
csv_2 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/nifti_paths/paths_to_corrected_gm_vbm_files_and_subjects.csv'
merge_column = 'PTID'
save_path = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/cleaned_information/master_spreadsheet_6mo_12mo_change_uncgm.csv'
condition_column_in_first_csv = 'VISCODE'
condition_to_keep_merges_on = 'bl'
columns_to_conditionally_merge = ['local_path_spm_gm_vol_uncorrected']

In [49]:
# Read both CSV files into DataFrames
df_paths = pd.read_csv(csv_1)
df_patient = pd.read_csv(csv_2)

# Merge the DataFrames on 'PTID', keeping all records
merged_df = pd.merge(df_patient, df_paths, on=merge_column, how='left')

# Loop through each column in the list and apply the condition
condition = (merged_df[condition_column_in_first_csv] == condition_to_keep_merges_on)
for col in columns_to_conditionally_merge:
    merged_df.loc[~condition, col] = None

# Save the merged DataFrame to 'total.csv'
merged_df.to_csv(save_path, index=False)


Remove Every Nth Row of a DataFrame

In [24]:
csv_1 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/cleaned_information/ADNI1_1_yearpercent_change_adas_scores.csv'

In [25]:
# Read the first CSV into a DataFrame
df1 = pd.read_csv(csv_1)

# Remove every even-indexed row
filtered_df = df1.iloc[1::2]

# Save the filtered DataFrame
filtered_df.to_csv(csv_1.replace('.csv', '_filtered.csv'), index=False)

Copy Random Subsample of Niftis to A Directory

In [50]:
import pandas as pd
import os
import shutil

def sample_and_copy_files(csv_path, column_name, N, target_dir):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_path)
    
    # Drop NaN values from the specified column
    df = df.dropna(subset=[column_name])
    
    # Sample N random rows from the specified column
    sampled_paths = df[column_name].sample(n=N)
    
    # Create the target directory if it doesn't exist
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    
    # Copy each sampled file to the target directory
    for path in sampled_paths:
        if os.path.exists(path):  # Check if the file exists
            shutil.copy(path, target_dir)
        else:
            print(f"File not found: {path}")


In [51]:
# Your specified variables
csv_path = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/metadata/cleaned_information/master_spreadsheet_6mo_12mo_change.csv'
column_name = 'local_paths_to_t1_file'
N = 50  # Number of samples
target_dir = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/for_elmira'


In [52]:
# Call the function
sample_and_copy_files(csv_path, column_name, N, target_dir)


Create A BIDS Architecture Given a Spreadsheet

In [2]:
import os
import pandas as pd

def create_subject_folders(excel_path, base_directories):
    """
    Creates subject folders based on the ID column in the first sheet of the Excel file.

    Args:
        excel_path (str): Path to the Excel file containing subject IDs.
        base_directories (list): List of base directories where subject folders will be created.

    """
    # Read Excel file and get the first sheet
    df = pd.read_excel(excel_path, sheet_name=0)

    # Check if 'ID' column exists
    if 'ID' not in df.columns:
        raise ValueError("No 'ID' column found in the Excel sheet.")

    # Loop through base directories
    for base_dir in base_directories:
        # Create base directory if it does not exist
        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        # Loop through subject IDs
        for subject_id in df['ID']:
            subject_folder = os.path.join(base_dir, f"sub-{subject_id}")

            # Create subject folder if it does not exist
            if not os.path.exists(subject_folder):
                os.makedirs(subject_folder)

In [3]:
# Example usage
excel_path = "/Volumes/One Touch/Manitoba_PET/master_list_howard.xlsx"
base_directories = ["/Volumes/One Touch/Manitoba_PET/DICOMS", "/Volumes/One Touch/Manitoba_PET/PET"]
create_subject_folders(excel_path, base_directories)

