# Goal

This notebook outputs a list of MR paths that are filtered: between 100-300 slices.

Available MR data from:
- ABIDE.txt (1160 files)
- ABVIB.txt (778 files)
- ACRIN-FMISO-Brain.txt (1403 files)
- ADNI.txt (2640 files)
- PPMI.txt (1524 files)

In [1]:
import os
import time
import pickle
from pathlib import Path

import SimpleITK as sitk

import numpy as np
from pandas import DataFrame as DF

from helpers_general import sitk2np, print_sitk_info, round_tuple, lrange, lmap, get_roi_range, numbers2groups

In [2]:
# wsl: /home/rgologorsky/DeepPit
hd_path = "../" * 5 + "/media/labcomputer/e33f6fe0-5ede-4be4-b1f2-5168b7903c7a" + "/home/rachel/PitMRdata/samir_labels"

In [8]:
os.listdir(hd_path)

['50155-50212', '50373-50453', '50002-50153', '50213-50312', '50313-50372']

In [7]:
# modified os.walk to stop at last subdir
mr_paths = []
def walk_to_series(top):
    names = os.listdir(top)
    subdirs = [name for name in names if os.path.isdir(os.path.join(top, name))]
    
    # terminal folder
    if len(subdirs) == 0:
        #print("terminal", top)
        mr_paths.append(top)
        #yield top
        
    for subdir in subdirs:
        newpath = os.path.join(top, subdir)
        walk_to_series(newpath)

In [9]:
# set path
path = "50155-50212"
#os.listdir(f"{hd_path}/{path}")

# start timer
start = time.time() 

# reset
mr_paths = []

# get series paths
walk_to_series(f"{hd_path}/{path}")

# end timer
elapsed = time.time() - start
print(f"Elapsed: {elapsed} s for {len(mr_paths)} files.")

# save results
with open(f"{path}.txt", "wb") as fp:   #Pickling
    pickle.dump(mr_paths, fp)

Elapsed: 0.009708642959594727 s for 48 files.


In [15]:
print(f"{path}.txt")
print(mr_paths[0])

50155-50212.txt
../../../../..//media/labcomputer/e33f6fe0-5ede-4be4-b1f2-5168b7903c7a/home/rachel/PitMRdata/samir_labels/50155-50212/50198/MP-RAGE/2000-01-01_00_00_00.0/S164832


In [5]:
# ABIDE only labelled folder
folder = "ABIDE" 
with open(f"{folder}.txt", "rb") as fp:   # Unpickling
    file_paths = pickle.load(fp)

# Metadata

folder	imputedSeq	fn	sz	px	sp	dir

In [16]:
def get_folder_name(s):
    s = s[len("../../../../..//media/labcomputer/e33f6fe0-5ede-4be4-b1f2-5168b7903c7a/home/rachel/PitMRdata/samir_labels/50155-50212/"):]
    return s[0:s.index("/")]

In [18]:
get_folder_name(mr_paths[10])

'50187'

In [21]:
def get_imputed_seq(fn):
    for seq in ("MPR", "RAGE", "T1", "T2", "FLAIR", "WOW"):
        if seq.lower() in fn or seq.upper() in fn:
            if seq == "RAGE": return "MPR"
            else: return seq
    return "UNKNOWN"

In [26]:
# get extension
# assume all files in dir have same extension
def get_ext(dir_path):
    # assume all files in dir have same extension
    file = next(os.walk(dir_path))[2][0]
    
    # in case file is bytes not str
    try:
        file = file.decode()
    except:
        pass
    
    return Path(file).suffix.lower()

In [29]:
# data frame w/ meta data info
d = []

for path in mr_paths:
    
    # get folder name = data src
    folder = get_folder_name(path)
    
    # get file ext (nii, dcm, etc)
    ext    = get_ext(path)
    seq    = get_imputed_seq(path)
    
    # get 1st child file in terminal folder
    file = os.listdir(str(path))[0]
    file = f"{path}/{file}"
        
    # ASSUMES only 1 nii in folder
    
    if ext == ".nii" or ext == ".img":
        
        # read meta data
        reader = sitk.ImageFileReader()
        reader.SetImageIO("NiftiImageIO")
        reader.SetFileName(file)
        reader.ReadImageInformation()
        
        # get num slices
        sz = reader.GetSize()
        n  = min(sz)
            
    elif ext == ".dcm":
        n = len(os.listdir(str(path)))
        
        # read meta data
        reader = sitk.ImageFileReader()
        reader.SetFileName(file)
        reader.ReadImageInformation()
        
        # add n_slices to size
        sz = reader.GetSize()
        sz = (sz[0], sz[1], n)
        
    else:
        print(f"Weird ext - {ext}.")
    
    # save
    if n > 100 and n < 300:
        d.append({
            "folder": folder,
            "fn": file,
            "imputedSeq": seq,
            "sz": sz,
            "px": sitk.GetPixelIDValueAsString(reader.GetPixelID()),
            "sp": tuple(round(x,2) for x in reader.GetSpacing()),
            "dir": tuple(int(round(x,1)) for x in reader.GetDirection())
        })
    
d = DF(d)
d

Unnamed: 0,folder,fn,imputedSeq,sz,px,sp,dir
0,50198,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
1,50171,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(160, 239, 200)",16-bit signed integer,"(1.1, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
2,50190,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
3,50188,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
4,50212,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
5,50200,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
6,50157,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(160, 239, 200)",16-bit signed integer,"(1.1, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
7,50196,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
8,50167,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(160, 239, 200)",16-bit signed integer,"(1.1, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
9,50193,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"


In [31]:
path

'../../../../..//media/labcomputer/e33f6fe0-5ede-4be4-b1f2-5168b7903c7a/home/rachel/PitMRdata/samir_labels/50155-50212/50169/MP-RAGE/2000-01-01_00_00_00.0/S165727'

In [32]:
d.to_pickle("./50155-50212.pkl")

In [35]:
import pandas as pd

In [36]:
unpickled_df = pd.read_pickle("./50155-50212.pkl")

In [37]:
unpickled_df

Unnamed: 0,folder,fn,imputedSeq,sz,px,sp,dir
0,50198,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
1,50171,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(160, 239, 200)",16-bit signed integer,"(1.1, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
2,50190,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
3,50188,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
4,50212,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
5,50200,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
6,50157,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(160, 239, 200)",16-bit signed integer,"(1.1, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
7,50196,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
8,50167,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(160, 239, 200)",16-bit signed integer,"(1.1, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
9,50193,../../../../..//media/labcomputer/e33f6fe0-5ed...,MPR,"(172, 256, 256)",16-bit signed integer,"(1.0, 1.0, 1.0)","(1, 0, 0, 0, -1, 0, 0, 0, 1)"
