In [1]:
import s3fs
import os
import os.path as op
import pandas as pd
import numpy as np

import afqinsight as afqi



In [2]:
subjects = ["100206", "162228", "175540"]

In [5]:
# download profiles
fs = s3fs.S3FileSystem()

if not op.exists('hcp_data'):
    os.mkdir('hcp_data')

for subject in subjects:
    if not op.exists(f'hcp_data/sub-{subject}'):
        os.mkdir(f'hcp_data/sub-{subject}')
    try:
        fs.get(f'profile-hcp/afq/afq/sub-{subject}/sess-01/sub-{subject}_dwi_profiles.csv', f'hcp_data/sub-{subject}/nodes.csv')
    except FileNotFoundError:
        print(f'subject {subject} profiles not found')

In [None]:
# organize profiles
profile_all = pd.DataFrame(columns=['subjectID', 'tractID', 'nodeID', 'fa' , 'md'])
for subject in subjects:
    profile_curr = pd.read_csv(f'hcp_data/sub-{subject}/nodes.csv')
    for index, row in profile_curr[profile_curr['scalar'] == 'dti_fa'].iterrows():
        md = profile_curr[(profile_curr['scalar'] == 'dti_md') &
                         (profile_curr['bundle'] == row['bundle']) &
                         (profile_curr['node'] == row['node'])]['profiles']
        profile_all = profile_all.append({'subjectID':subject,
                            'tractID':row['bundle'],
                            'nodeID':row['node'],
                            'fa':row['profiles'],
                            'md':md.to_numpy()[0]}, ignore_index=True)
profile_all.to_csv(f'hcp_data/nodes.csv', index=False)
     

In [10]:
# organize subjects
subjects_pd = pd.read_csv('hcp_data/subjects.csv')
columns = subjects_pd.columns.values
if 'subjectID' not in columns:
    columns[0] = 'subjectID'
    subjects_pd.columns = columns
    subjects_pd.to_csv('hcp_data/subjects.csv', index=True)

In [12]:
subjects_pd = pd.read_csv('hcp_data/subjects.csv', index_col="subjectID").drop(
        ["Unnamed: 0"], axis="columns"
    )
mat = afqi.load_afq_data('hcp_data', subjects_pd.columns)