In [None]:
import cloudknot as ck

In [None]:
ck.set_region('us-west-2')

In [None]:
def R1_tract_profiles(params):
    sub, bundles = params
    import s3fs
    import dipy.stats.analysis as dsa
    import dipy.io.streamline as dis
    import AFQ
    import AFQ.data as afd
    import numpy as np
    import pandas as pd
    from AFQ.data import s3bids as s3b

    fs = s3fs.S3FileSystem()

    sub_folder = f"dhcp-afq/afq_rel2/output/sub-{sub}/"
    sub_folder_ls = fs.ls(sub_folder)
    for ses_folder in sub_folder_ls:
        ses = ses_folder.split('/')[-1]
        key_myelin = f"dhcp-afq/dhcp_anat_pipeline/sub-{sub}/{ses}/anat/sub-{sub}_{ses}_space-T2w_myelinmap.nii.gz"
        myelin_img = s3b.s3fs_nifti_read(key_myelin)
        myelin = myelin_img.get_fdata()
        fs = s3fs.S3FileSystem()
        tract_profiles = {}

        for bundle in bundles:
            trk = fs.get(f'dhcp-afq/afq_rel2/output/sub-{sub}/{ses}/bundles/sub-{sub}_{ses}_coordsys-RASMM_trkmethod-probCSD_recogmethod-AFQ_desc-{bundle}_tractography.trk', 
                         'bundle.trk')
            sft = dis.load_trk("bundle.trk", "same")

            tract_profiles[bundle] = dsa.afq_profile(myelin, sft.streamlines, myelin_img.affine, weights=dsa.gaussian_weights(sft.streamlines))
        df = pd.DataFrame(tract_profiles)
        df_m = df.melt(ignore_index=False)
        df_m["nodeID"] = df_m.index
        df_m = df_m.rename(columns={"variable": "tractID", "value": "t1wt2w"})
        df_m.to_csv("local.csv", index=False)
        fs.put("local.csv", f"dhcp-afq/afq_rel2/output/sub-{sub}/{ses}/sub-{sub}_{ses}_t1wt2w_tract_profiles.csv")

In [None]:
import pandas as pd

In [None]:
participants = pd.read_csv('s3://dhcp-afq/dhcp_dmri_pipeline/participants.tsv', sep='\t')

In [None]:
subjects = participants['participant_id']

In [None]:
bundles = ['ARCR', 'ARCL', 'ATRR', 'ATRL', 'CGCR', 'CGCL', 'CSTR',
           'CSTL', 'FA', 'FP', 'IFOR', 'IFOL', 'ILFR', 'ILFL', 'MdLFR',
           'MdLFL', 'ORL', "ORR", "pARCL", "pARCR", 'SLFR', 'SLFL','UNCR', 'UNCL', "VOFL", "VOFR"]

In [None]:
inputs = [(sub, bundles) for sub in subjects]

In [None]:
!open local.csv

In [None]:
di = ck.DockerImage(
    name="dhcp-r1-profiles-new",
    func=R1_tract_profiles,
    base_image = 'pennbbl/qsiprep:0.19.0',
    github_installs=("https://github.com/yeatmanlab/pyAFQ.git@master"),
    overwrite=True,
)

In [None]:
knot = ck.Knot(
    name="dhcp-t1wt2w-profiles-rel2",
    docker_image=di,
    pars_policies=("AmazonS3FullAccess",),
    bid_percentage=100,
    aws_resource_tags={"Project": "DHCP"},
)

In [None]:
len(inputs)

In [None]:
results = knot.map(inputs)

In [None]:
j0 = knot.jobs[0]

In [None]:
j0.status