# Goal

This notebook saves the full path to each MR series for each data folder in the hard drive.
In addition, it saves the extensions encounted in each hard drive folder.

Done:
- ABIDE.txt (1160 files)
- ABVIB.txt (778 files)
- ACRIN-FMISO-Brain.txt (1403 files)
- ADNI.txt (2640 files)
- PPMI.txt (1524 files)

In [142]:
import os
import time
import pickle

from pathlib import Path

In [21]:
# wsl: /home/rgologorsky/DeepPit
hd_path = "../../../mnt/d/PitMRdata"

# folders in HD
folders = os.listdir(hd_path)

# print folders, alphabetically
print(*sorted(folders), sep="\n")


._CPTAC-GBM
._REMBRANDT
._TCGA-GBM
ABIDE
ABVIB
ABVIB (1).zip
ACRIN-FMISO-Brain
ADNI
AIBL
CPTAC-GBM
ICMB
LGG-1p19qDeletion
Oasis_long
PPMI
REMBRANDT
TCGA-GBM
TCGA-LGG
central.xnat.org


In [109]:
# modified os.walk to stop at last subdir
mr_paths = []
def walk_to_series(top):
    names = os.listdir(top)
    subdirs = [name for name in names if os.path.isdir(os.path.join(top, name))]
    
    # terminal folder
    if len(subdirs) == 0:
        #print("terminal", top)
        mr_paths.append(top)
        #yield top
        
    for subdir in subdirs:
        newpath = os.path.join(top, subdir)
        walk_to_series(newpath)
#         for x in walk_to_series(newpath):
#             yield x

In [138]:
path = "PPMI"
os.listdir(f"{hd_path}/{path}")

['PPMI']

In [139]:
# set path
path = "PPMI"
#os.listdir(f"{hd_path}/{path}")

# start timer
start = time.time() 

# reset
mr_paths = []

# get series paths
walk_to_series(f"{hd_path}/{path}")

# end timer
elapsed = time.time() - start
print(f"Elapsed: {elapsed} s for {len(mr_paths)} files.")

# save results
with open(f"{path}.txt", "wb") as fp:   #Pickling
    pickle.dump(mr_paths, fp)

Elapsed: 1297.3338840007782 s for 1524 files.


In [136]:
with open(f"{path}.txt", "rb") as fp:   # Unpickling
    b = pickle.load(fp)

In [137]:
len(b)

1160

In [130]:
print(*sorted(b)[:10], sep="\n")

../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0413/MPR____N3__Scaled/2006-05-19_16_17_47.0/S14782
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0413/MPR____N3__Scaled/2007-06-01_07_57_43.0/S32945
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0413/MPR____N3__Scaled/2008-07-31_09_17_17.0/S54597
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0413/MPR____N3__Scaled_2/2006-05-19_16_17_47.0/S14782
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0559/MPR-R____N3__Scaled/2007-10-18_17_06_13.0/S41825
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0559/MPR____N3__Scaled/2006-06-27_18_28_33.0/S15922
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0559/MPR____N3__Scaled/2008-08-15_17_03_28.0/S55646
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0559/MPR____N3__Scaled_2/2006-06-27_18_28_33.0/S15922
../../../mnt/d/PitMRdata/ADNI/ADNI1_Annual_2_Yr_3T/002_S_0729/MPR____N3__Scaled/2006-08-02_07_02_00.0/S17535
../../../mnt/

# Extensions

In [141]:
# get extension
# assume all files in dir have same extension

def get_ext(dir_path):
    # assume all files in dir have same extension
    file = next(os.walk(dir_path))[2][0]
    return Path(file).suffix.lower()

In [144]:
def write_ext(path):

    # load path lists
    with open(f"{path}.txt", "rb") as fp:   # Unpickling
        mr_paths = pickle.load(fp)

    # start timer
    start = time.time() 

    # get extensions
    exts = set(get_ext(path) for path in mr_paths)

    # end timer
    elapsed = time.time() - start

    print(f"{path}: Elapsed {elapsed:.2f} s for {len(exts)} file type(s) = {exts}.")

    # save results
    with open(f"{path}-exts.txt", "wb") as fp:   #Pickling
        pickle.dump(exts, fp)

In [None]:
# ABIDE.txt (1160 files)
# ABVIB.txt (778 files)
# ACRIN-FMISO-Brain.txt (1403 files)
# ADNI.txt (2640 files)
# PPMI.txt (1524 files)

In [145]:
data_folders = ["ABIDE", "ABVIB", "ACRIN-FMISO-Brain", "ADNI", "PPMI"]

for path in data_folders:
    write_ext(path)

ABIDE: Elapsed 22.79 s for 2 file type(s) = {'.img', '.nii'}.
ABVIB: Elapsed 21.42 s for 1 file type(s) = {'.dcm'}.
ACRIN-FMISO-Brain: Elapsed 25.91 s for 1 file type(s) = {'.dcm'}.
ADNI: Elapsed 15.59 s for 1 file type(s) = {'.nii'}.
PPMI: Elapsed 16.15 s for 1 file type(s) = {'.dcm'}.


# 1. ABIDE

In [140]:
with open(f"ABIDE.txt", "rb") as fp:   # Unpickling
    b = pickle.load(fp)
    print(len(b))

1160


# 2. ABVIB

Notes: 
- multiple series per patient
    - D:\PitMRdata\ABVIB\ABVIB\129\t2w_3d_1mm

In [132]:
with open(f"ABVIB.txt", "rb") as fp:   # Unpickling
    b = pickle.load(fp)
    print(len(b))

778


# 3. ACRIN-FMISO-Brain

In [133]:
with open(f"ACRIN-FMISO-Brain.txt", "rb") as fp:   # Unpickling
    b = pickle.load(fp)
    print(len(b))

1403
