## jsonファイル出力

In [20]:
import pathlib
import re
import json
import pandas as pd
from collections import defaultdict


# == Base ==
DATA_DIR = pathlib.Path("/data") / "radiology_datas"

# == Dataset ==
ADNI1 = DATA_DIR / "ADNI1"
ADNI2 = DATA_DIR / "JHU-radiology" / "20170509"
ADNI2_2 = DATA_DIR / "JHU-radiology" / "MNI_skull_stripped" / "output"
PPMI = DATA_DIR / "JHU-radiology" / "PPMI"
FourRTNI = DATA_DIR / "JHU-radiology" / "4RTNI"

BLACKLIST_DIR = DATA_DIR / "util" / "lists"

DATA_CSV = {
    "ADNI": DATA_DIR / "JHU-radiology" / "ADNIMERGE.csv",
    "PPMI": DATA_DIR / "JHU-radiology" / "PPMI.csv",
    "4RTNI": FourRTNI / "csv" / "4RTNI_DATA.csv",
}

DATA_DIRS_DICT = {
    "ADNI1": ADNI1,
    "ADNI2": ADNI2,
    "ADNI2-2": ADNI2_2,
    "PPMI": PPMI,
    "4RTNI": FourRTNI / "SkullStripped",
}


In [21]:
def get_uid(path):
    """
    pathを受け取ってuidを返すだけ
    Args
    ----------
    path : pathlib
        pklファイルへのパス
    Return
    ----------
    uid : int
        uid
    """
    uid = path.name
    for key, value in DATA_DIRS_DICT.items():
        if str(value) in str(path):

            if key == "ADNI2":
                uid = path.name.split("_")[-2]
                uid = int(uid[1:])

            elif key == "ADNI2-2":
                uid = path.name.split("_")[-4]
                uid = int(uid[1:])

            elif key == "PPMI":
                uid = path.name.split("_")[-4]
                uid = int(uid)

            elif key == "4RTNI":
                uid = path.name.split("_")[-4]
                uid = int(uid)

            return uid


def get_blacklist(dir):
    """
    brain/util/listsの中にいるblacklistたちをuidのリストで返す
    Args
    ----------
    Return
    ----------
    uid : list of int
        uids
    """
    key = "**/uids.txt"
    excluded_uid_paths = dir.glob(key)
    excluded_uids = []
    for path in excluded_uid_paths:
        with open(path, "r") as rf:
            [excluded_uids.append(int(uid.rstrip("\n"))) for uid in rf]
    return excluded_uids

black_list = get_blacklist(BLACKLIST_DIR)


In [54]:
contents = []

PTID = {"ADNI": "PTID", "PPMI": "Subject", "4RTNI": "SUBID"}
PTCLASS = {"ADNI": "DX_bl", "PPMI": "Group", "4RTNI": "DX"}

for csv_label, csv in DATA_CSV.items():

  df = pd.read_csv(csv)
  if csv_label == "4RTNI":
    df["DX"] = df["DX"].map(
        {"CBS": "CBD", "PSP": "PSP", "Oth": "Oth"}, na_action=None)

  for data in df[:3].to_dict(orient="records"):    
    ptclass = data[PTCLASS[csv_label]]
    ptid = str(data[PTID[csv_label]])
    for label, dir in DATA_DIRS_DICT.items():
      path = pathlib.Path(dir / ptclass / ptid)
      files = [p for p in path.glob('**/*')
                      if re.search('/*\.(pkl|npy)', str(p))]
      # files = list(path.glob("*.[p,n][k,p][l,y]"))
      if len(files) != 0:
        dataset = label
        break
    
    add_list = defaultdict(dict)
    for f in files:
      f_temp = str(f).split("/")[-1]

      uid = get_uid(f)
      if f_temp.startswith("fullsize"):
        add_list[uid]["full"] = str(f)
      if f_temp.startswith("half"):
        add_list[uid]["half"] = str(f)

    for uid, l in add_list.items():
      
      if uid not in black_list:
        blacklisted = False
      else:
        blacklisted = True
      
      content = {
        "id": ptid,
        "class": ptclass,
        "images": [
            {
                "uid": uid,
                "blacklisted": blacklisted,
                "fullsize_img_path": l["full"],
                "halfsize_img_path": l["half"]
            }
        ],
        "dataset": dataset
      }
      contents.append(content)

with open('./all_subject.json', 'w') as f:
  json.dump(contents, f, ensure_ascii=False, indent=2)

print(len(contents))

22


## CSVファイル確認

In [24]:
pd.set_option('display.max_columns', 100)
df = pd.read_csv(DATA_CSV["ADNI"])
df.head()


Unnamed: 0,RID,PTID,VISCODE,SITE,COLPROT,ORIGPROT,EXAMDATE,DX_bl,AGE,PTGENDER,PTEDUCAT,PTETHCAT,PTRACCAT,PTMARRY,APOE4,FDG,PIB,AV45,CDRSB,ADAS11,ADAS13,MMSE,RAVLT_immediate,RAVLT_learning,RAVLT_forgetting,RAVLT_perc_forgetting,FAQ,MOCA,EcogPtMem,EcogPtLang,EcogPtVisspat,EcogPtPlan,EcogPtOrgan,EcogPtDivatt,EcogPtTotal,EcogSPMem,EcogSPLang,EcogSPVisspat,EcogSPPlan,EcogSPOrgan,EcogSPDivatt,EcogSPTotal,FLDSTRENG,FSVERSION,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,ICV,DX,EXAMDATE_bl,CDRSB_bl,ADAS11_bl,ADAS13_bl,MMSE_bl,RAVLT_immediate_bl,RAVLT_learning_bl,RAVLT_forgetting_bl,RAVLT_perc_forgetting_bl,FAQ_bl,FLDSTRENG_bl,FSVERSION_bl,Ventricles_bl,Hippocampus_bl,WholeBrain_bl,Entorhinal_bl,Fusiform_bl,MidTemp_bl,ICV_bl,MOCA_bl,EcogPtMem_bl,EcogPtLang_bl,EcogPtVisspat_bl,EcogPtPlan_bl,EcogPtOrgan_bl,EcogPtDivatt_bl,EcogPtTotal_bl,EcogSPMem_bl,EcogSPLang_bl,EcogSPVisspat_bl,EcogSPPlan_bl,EcogSPOrgan_bl,EcogSPDivatt_bl,EcogSPTotal_bl,FDG_bl,PIB_bl,AV45_bl,Years_bl,Month_bl,Month,M,update_stamp
0,2,011_S_0002,bl,11,ADNI1,ADNI1,2005-09-08,CN,74.3,Male,16,Not Hisp/Latino,White,Married,0.0,1.36926,,,0.0,10.67,18.67,28.0,44.0,4.0,6.0,54.5455,0.0,,,,,,,,,,,,,,,,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,118233.0,8336.0,1229740.0,4177.0,16559.0,27936.0,1984660.0,NL,2005-09-08,0.0,10.67,18.67,28,44.0,4.0,6.0,54.5455,0.0,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,118233.0,8336.0,1229740.0,4177.0,16559.0,27936.0,1984660.0,,,,,,,,,,,,,,,,1.36926,,,0.0,0.0,0,0,2016-09-02 00:45:03.0
1,3,011_S_0003,bl,11,ADNI1,ADNI1,2005-09-12,AD,81.3,Male,18,Not Hisp/Latino,White,Married,1.0,1.09079,,,4.5,22.0,31.0,20.0,22.0,1.0,4.0,100.0,10.0,,,,,,,,,,,,,,,,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,84599.0,5319.0,1129830.0,1791.0,15506.0,18422.0,1920690.0,Dementia,2005-09-12,4.5,22.0,31.0,20,22.0,1.0,4.0,100.0,10.0,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,84599.0,5319.0,1129830.0,1791.0,15506.0,18422.0,1920690.0,,,,,,,,,,,,,,,,1.09079,,,0.0,0.0,0,0,2016-09-02 00:45:03.0
2,3,011_S_0003,m06,11,ADNI1,ADNI1,2006-03-13,AD,81.3,Male,18,Not Hisp/Latino,White,Married,1.0,1.0636,,,6.0,19.0,30.0,24.0,19.0,2.0,6.0,100.0,12.0,,,,,,,,,,,,,,,,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,88580.0,5446.0,1100060.0,2427.0,14400.0,16972.0,1906430.0,Dementia,2005-09-12,4.5,22.0,31.0,20,22.0,1.0,4.0,100.0,10.0,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,84599.0,5319.0,1129830.0,1791.0,15506.0,18422.0,1920690.0,,,,,,,,,,,,,,,,1.09079,,,0.498289,5.96721,6,6,2016-09-02 00:45:03.0
3,3,011_S_0003,m12,11,ADNI1,ADNI1,2006-09-12,AD,81.3,Male,18,Not Hisp/Latino,White,Married,1.0,1.10384,,,3.5,24.0,35.0,17.0,31.0,2.0,7.0,100.0,17.0,,,,,,,,,,,,,,,,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,90099.0,5157.0,1095640.0,1596.0,14617.0,17330.0,1903820.0,Dementia,2005-09-12,4.5,22.0,31.0,20,22.0,1.0,4.0,100.0,10.0,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,84599.0,5319.0,1129830.0,1791.0,15506.0,18422.0,1920690.0,,,,,,,,,,,,,,,,1.09079,,,0.999316,11.9672,12,12,2016-09-02 00:45:03.0
4,3,011_S_0003,m24,11,ADNI1,ADNI1,2007-09-12,AD,81.3,Male,18,Not Hisp/Latino,White,Married,1.0,1.03871,,,8.0,25.67,37.67,19.0,23.0,1.0,5.0,100.0,14.0,,,,,,,,,,,,,,,,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,97420.0,5139.0,1088560.0,1175.0,14033.0,16398.0,1903420.0,Dementia,2005-09-12,4.5,22.0,31.0,20,22.0,1.0,4.0,100.0,10.0,1.5 Tesla MRI,Cross-Sectional FreeSurfer (FreeSurfer Version...,84599.0,5319.0,1129830.0,1791.0,15506.0,18422.0,1920690.0,,,,,,,,,,,,,,,,1.09079,,,1.99863,23.9344,24,24,2016-09-02 00:45:03.0


In [23]:
pd.set_option('display.max_columns', 100)
df = pd.read_csv(DATA_CSV["PPMI"])
df.head()


Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
0,282136,3758,PD,M,69,1,MRI,MPRAGE GRAPPA,Original,10/12/2011,DCM,
1,374889,3758,PD,M,71,5,MRI,MPRAGE GRAPPA,Original,4/3/2013,DCM,
2,402066,3758,PD,M,71,7,MRI,MPRAGE GRAPPA,Original,10/9/2013,DCM,
3,440202,3757,PD,M,73,7,MRI,MPRAGE GRAPPA,Original,4/9/2014,DCM,
4,362580,3757,PD,M,72,5,MRI,MPRAGE GRAPPA,Original,2/13/2013,DCM,


In [52]:
pd.set_option('display.max_columns', 100)
df = pd.read_csv(DATA_CSV["4RTNI"])
df["DX"] = df["DX"].map({"CBS": "CBD", "PSP": "PSP", "Oth": "Oth"}, na_action=None)
df.head()

Unnamed: 0,SUBID,DX,DDURATION,AUTOPSYDATE,AUTOPSYPX,SEX,AGE_AT_TP0,EDUCATION,RACE,LATINO,SCANID_0,BIOSPECIMENS_0_SERUM,BIOSPECIMENS_0_PLASMA,BIOSPECIMENS_0_URINE,BIOSPECIMENS_0_CSF,PSPRS_0_DATE,PSPRS_0_IMPUTED,PSPRS_0_TOTAL,PSPRS_0_SUBSCORE_HISTORY,PSPRS_0_SUBSCORE_MENTATION,PSPRS_0_SUBSCORE_BULBAR,PSPRS_0_SUBSCORE_OCULARMOTOR,PSPRS_0_SUBSCORE_LIMBMOTOR,PSPRS_0_SUBSCORE_GAITMIDLINE,PSPRS_0_MEDS,SEADL_0_DATE,SEADL_0,CGI_S_0_DATE,CGI_S_0,UPDRS_0_DATE,UPDRS_0_IMPUTED,UPDRS_0_TOTAL,UPDRS_0_PDNORMAL,MOCA_0_DATE,MOCA_0_MOCATOTWITHEDUC,MOCA_0_BEFAFTNP,MOCA_0_LNGTH,MMSE_0_DATE,MMSE_0_MMSETOT,CVLT_0_DATE,CVLT_0_TRCOTOT,CVLT_0_CORR30,CVLT_0_CORR10,CVLT_0_CUEDCOR,CVLT_0_RECOG,CVLT_0_CORRLONG,BENSON_0_DATE,BENSON_0_MODREY,BENSON_0_REY10M,BENSON_0_REYRECG,...,NPI_Q_12MO_HALSEV,NPI_Q_12MO_HALDIS,NPI_Q_12MO_AGITATE,NPI_Q_12MO_AGSEV,NPI_Q_12MO_AGDIS,NPI_Q_12MO_DPRSSN,NPI_Q_12MO_DEPSEV,NPI_Q_12MO_DEPDIS,NPI_Q_12MO_ANXIETY,NPI_Q_12MO_ANXSEV,NPI_Q_12MO_ANXDIS,NPI_Q_12MO_EUPHORIA,NPI_Q_12MO_EUPSEV,NPI_Q_12MO_EUPDIS,NPI_Q_12MO_APATHY,NPI_Q_12MO_APTHSEV,NPI_Q_12MO_APTHDIS,NPI_Q_12MO_DISINHIBITION,NPI_Q_12MO_DISSEV,NPI_Q_12MO_DISDIS,NPI_Q_12MO_IRRITBLE,NPI_Q_12MO_IRRSEV,NPI_Q_12MO_IRRDIS,NPI_Q_12MO_MOTOR,NPI_Q_12MO_MOTSEV,NPI_Q_12MO_MOTDIS,NPI_Q_12MO_SLEEP,NPI_Q_12MO_SLESEV,NPI_Q_12MO_SLEDIS,NPI_Q_12MO_EAT,NPI_Q_12MO_EATSEV,NPI_Q_12MO_EATDIS,CDR_12MO_DATE,CDR_12MO_CDRTOT,CDR_12MO_BOXSCORE,CDR_12MO_MEMORY,CDR_12MO_BEHAV,CDR_12MO_MOTOR,FAQ_12MO_DATE,FAQ_12MO_FAQTOT,IRI_12MO_DATE,IRI_12MO_IRIEC,IRI_12MO_IRIPT,RSMS_12MO_DATE,RSMS_12MO_RSMSTOTI,RSMS_12MO_RSMSSUB1I,RSMS_12MO_RSMSSUB2I,BIS_12MO_DATE,BIS_12MO_BIS_TOT,update_stamp
0,1_S_0054,PSP,,2013-05-01,PSP,F,67,21.0,1.0,0.0,8/9/2011,1,1,1,1,2011-07-27,0.0,23.0,7.0,1.0,2.0,4.0,4.0,5.0,0.0,2011-07-27,90.0,2011-07-26,3.0,2011-07-27,0.0,19.0,0.0,,,,,2011-07-27,29.0,2011-07-27,26.0,7.0,7.0,6.0,8.0,8.0,2011-07-27,16.0,13.0,1.0,...,,,1.0,2.0,3.0,2.0,,,1.0,2.0,3.0,2.0,,,2.0,,,2.0,,,1.0,2.0,3.0,2.0,,,2.0,,,2.0,,,2012-03-05,0.5,1.5,0.0,1.0,1.0,2012-03-05,14.0,2012-03-05,20.0,13.0,2012-03-05,26.0,17.0,9.0,2012-03-05,20.0,2015-05-26 11:40:48.0
1,1_S_5000,CBD,2.0,,,M,69,16.0,1.0,0.0,1/28/2011,0,1,0,0,2011-01-19,0.0,9.0,1.0,2.0,0.0,0.0,5.0,1.0,0.0,2011-01-19,50.0,2011-01-31,3.0,2011-01-19,0.0,13.0,0.0,,,,,2011-01-11,21.0,2011-01-11,18.0,2.0,2.0,2.0,8.0,,2011-01-11,0.0,2.0,0.0,...,,,1.0,1.0,0.0,1.0,2.0,3.0,1.0,1.0,2.0,2.0,,,1.0,2.0,2.0,2.0,,,1.0,1.0,1.0,2.0,,,1.0,1.0,2.0,2.0,,,2012-07-09,2.0,9.0,1.0,1.0,0.5,2012-07-09,27.0,,,,,,,,,,2015-05-26 11:40:48.0
2,1_S_5001,CBD,3.0,2012-10-21,CBD,F,65,16.0,4.0,0.0,2/8/2011,1,1,1,0,2011-02-07,0.0,63.0,16.0,11.0,3.0,11.0,6.0,16.0,,2011-02-08,10.0,2011-02-07,5.0,2011-02-07,0.0,56.0,0.0,,,,,2011-02-07,10.0,,,,,,,,,,,,...,,,2.0,,,2.0,,,2.0,,,2.0,,,1.0,3.0,3.0,1.0,3.0,3.0,2.0,,,2.0,,,1.0,2.0,3.0,1.0,3.0,3.0,2011-09-01,3.0,18.0,3.0,3.0,3.0,2011-09-01,30.0,,,,,,,,,,2015-05-26 11:40:48.0
3,1_S_5002,Oth,3.0,,,M,67,18.0,3.0,0.0,2/14/2011,1,1,1,0,2011-02-14,0.0,23.0,4.0,1.0,2.0,5.0,5.0,6.0,,2011-02-14,30.0,2173-10-13,,2011-02-14,0.0,28.0,0.0,,,,,2011-02-15,21.0,2011-02-15,17.0,2.0,2.0,2.0,7.0,,2011-02-15,16.0,10.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2015-05-26 11:40:48.0
4,1_S_5003,PSP,10.0,2013-09-01,PSP,F,86,18.0,1.0,0.0,3/3/2011,1,1,1,0,,,,,,,,,,,,,2173-10-13,,2011-03-09,1.0,75.0,0.0,,,,,2011-03-09,24.0,2011-03-09,23.0,8.0,8.0,8.0,9.0,,,,,1.0,...,,,1.0,1.0,0.0,1.0,1.0,0.0,2.0,,,1.0,1.0,0.0,2.0,,,1.0,1.0,0.0,1.0,1.0,0.0,2.0,,,1.0,1.0,0.0,1.0,1.0,0.0,2012-04-16,1.0,10.5,0.5,0.5,3.0,2012-04-16,24.0,2012-04-16,21.0,15.0,2012-04-16,43.0,24.0,19.0,,,2015-05-26 11:40:48.0


## 画像の呼び出し

In [3]:
from load_images import load_images
images = load_images(datasets={"ADNI2", "ADNI2-2", "PPMI"}, 
      classes={"CN", "AD", "Control"}, size="half", unique=True, dryrun=True)
print(len(images))
images[0]


5


{'uid': 35475,
 'blacklisted': False,
 'fullsize_img_path': '/data/radiology_datas/JHU-radiology/MNI_skull_stripped/output/CN/011_S_0002/fullsize011_S_0002_2005-08-26_S9107_I35475_flipped_MNI_SS.pkl',
 'halfsize_img_path': '/data/radiology_datas/JHU-radiology/MNI_skull_stripped/output/CN/011_S_0002/half_011_S_0002_2005-08-26_S9107_I35475_flipped_MNI_SS.pkl',
 'subject_id': '011_S_0002',
 'class': 'CN',
 'dataset': 'ADNI2-2'}