## Import Modules

In [1]:
import os
import sys

SCRIPT_DIR = os.path.dirname(os.path.abspath(__vsc_ipynb_file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))

from src.processing import processing_functions as pf

import glob
import os
import sys
import tifffile
import numpy as np
import h5py
import torch
from torchvision import transforms, utils
import yaml
import itertools
from torch.utils.data import DataLoader
from patchify import patchify


## Create Raw and Image tiffs

In [2]:
dir = "C:/Users/Fungj/My Drive (haaslabdataimages@gmail.com)/Completed"
file_type = "/*.h5"
h5_list = glob.glob(dir+file_type)
filename = [os.path.basename(f) for f in h5_list]

## Read in Completed h5 files

In [3]:
raw_folder = 'E:/Image_Folder/Raw/'
mask_folder = 'E:/Image_Folder/Mask/'
if not os.path.isdir(raw_folder) and not os.path.isdir(mask_folder):
    os.mkdir(raw_folder)
    os.mkdir(mask_folder)


for file in filename:
    root_ext = os.path.splitext(file) # split basename and extension
    new_ext = '.tif'
    if not os.path.exists(raw_folder + root_ext[0] + new_ext):
        print('tif doesnt exist, writing new tif file')
        hf = h5py.File(os.path.join(dir,file),'r')
        raw = np.array(hf['project_data'].get('raw_image'))
        mask = np.array(hf['project_data'].get('label'))
        tifffile.imwrite(raw_folder + root_ext[0] + new_ext,raw)
        tifffile.imwrite(mask_folder + root_ext[0] + new_ext,mask)
    else:
        print('tif file exists')


tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists
tif file exists


## Read In Configurations

In [4]:
project_path = os.path.dirname(os.getcwd())
exp_path = "/config/convnets/ResUNet/kfold_training/"
# exp_file = args['config']
exp_file = "soma_dendrite.yml"
print(exp_file)
# exp_file = "post_hptune_exp.yml"

with open(project_path + exp_path + exp_file, "r") as stream:
    try:
        config = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)  

soma_dendrite.yml


In [5]:
# get list of h5 files
dir = "C:/Users/Fungj/My Drive (haaslabdataimages@gmail.com)/Completed"
file_type = "/*.h5"
h5_list = glob.glob(dir+file_type)
filename = [f for f in h5_list]

In [6]:
# read in segmentation_exp
segmentation_exp = config['DATASET']['exp'] # +s+d+f
lateral_steps = config['DATASET']['lateral_steps'] # integer
axial_steps = config['DATASET']['axial_steps'] # integer
patch_z, patch_y, patch_x = config['DATASET']['patch_z'], config['DATASET']['patch_y'], config['DATASET']['patch_x']
patch_size = (patch_z, patch_y, patch_x)
#split_size = config['DATASET']['split_size'] # integer 
ex_autofluor = config['DATASET']['ex_autofluorescence'] # True/False
ex_melanocytes = config['DATASET']['ex_melanocytes'] # True/False
remove_artifacts = config['DATASET']['remove_artifacts']
artifacts = config['DATASET']['artifacts']

In [7]:
if segmentation_exp == "+s_+d_+f":
    num_classes = 4 # classes: soma, dendrite, filopodia
if segmentation_exp == '+s_+d_-f':
    num_classes = 3 # classes: soma, dendrite
if segmentation_exp == '-s_+d_-f':
    num_classes = 2 # classes: neuron and background

if ex_autofluor and ex_melanocytes:
    num_classes += 2
elif ex_autofluor == False and ex_melanocytes == True:
    num_classes += 1
elif ex_autofluor == True and ex_melanocytes == False:
    num_classes += 1
else:
    pass

print("Number of Classes: ", num_classes)

Number of Classes:  4


## Configure kFold from config file

In [8]:
kfold_path = "/config/convnets/ResUNet/kfold_training/kfold_indices/"
# kfold_file = args['kfold']
kfold_file = "fold_1.yml"

with open(project_path + kfold_path + kfold_file, "r") as stream:
    try:
        fold_config = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc) 

fold_idx = fold_config['fold_idx']
train_idx = fold_config['train_idx']
val_idx = fold_config['val_idx']

In [38]:
parent_folder = "E:/Image_Folder/kFold_Training/"
fold_folder = os.path.join(parent_folder,f"fold_{fold_idx}/")
target_raw_folder = os.path.join(fold_folder,"Raw/")
target_mask_folder = os.path.join(fold_folder,"Mask/")

In [41]:
raw_filename_list = np.array(os.listdir(raw_folder))
mask_filename_list = np.array(os.listdir(mask_folder))
device = "cpu"

if not os.path.isdir(target_raw_folder) and not os.path.isdir(target_mask_folder):
    os.makedirs(target_raw_folder)
    os.makedirs(target_mask_folder)

patch_transform = transforms.Compose([
#                                       new_shape(new_xy = (600,960)),
                                    pf.MinMaxScalerVectorized(),
                                    pf.patch_imgs(xy_step = lateral_steps, z_step = axial_steps, patch_size = patch_size, is_mask = False)])

# define transforms for labeled masks
label_transforms = transforms.Compose([
#                                        new_shape(new_xy = (600,960)),
                                    pf.process_masks(exp = segmentation_exp,
                                                    ex_autofluor=ex_autofluor,
                                                    ex_melanocytes=ex_melanocytes,
                                                    ),
                                    pf.patch_imgs(xy_step = lateral_steps, z_step = axial_steps, patch_size = patch_size, is_mask = True)])

raw_training_list, mask_training_list = raw_filename_list[train_idx], mask_filename_list[train_idx]
# raw_testing_list, mask_testing_list = raw_filename_list[val_idx], mask_filename_list[val_idx]
for (raw_name, mask_name) in zip(raw_training_list, mask_training_list):
    print(raw_name, mask_name)
    raw_img = tifffile.imread(os.path.join(raw_folder,raw_name)).astype(np.float16)
    print(raw_img.shape)
    mask_img = tifffile.imread(os.path.join(mask_folder,mask_name)).astype(np.int16)

    upper_raw,_,lower_raw,_ = patch_transform(raw_img)
    # stack_upper_and_lower_raw = np.concatenate([upper_raw,lower_raw],0)
    _, upper_mask, lower_mask = label_transforms(mask_img)
    # stack_upper_and_lower_mask = np.concatenate([upper_mask,lower_mask],0)

    basefile_name = os.path.splitext(raw_name)
    # save the folds into its respective
    for i in range(len(upper_raw)):
        # get basename of file name
        tifffile.imwrite(os.path.join(target_raw_folder,basefile_name[0]+f"_raw_upper{i}.tif"),upper_raw[i,...])
        tifffile.imwrite(os.path.join(target_mask_folder,basefile_name[0]+f"_mask_upper{i}.tif"),upper_mask[i,...])
    for i in range(len(lower_raw)):
        # get basename of file name
        tifffile.imwrite(os.path.join(target_raw_folder,basefile_name[0]+f"_raw_lower{i}.tif"),lower_raw[i,...])
        tifffile.imwrite(os.path.join(target_mask_folder,basefile_name[0]+f"_mask_lower{i}.tif"),lower_mask[i,...])


000_D_180907_A_N1B3_52616a61.tif 000_D_180907_A_N1B3_52616a61.tif
(69, 512, 512)
0 0 0
0 0 1
0 0 2
0 0 3
0 0 4
0 0 5
0 0 6
0 1 0
0 1 1
0 1 2
0 1 3
0 1 4
0 1 5
0 1 6
0 2 0
0 2 1
0 2 2
0 2 3
0 2 4
0 2 5
0 2 6
0 3 0
0 3 1
0 3 2
0 3 3
0 3 4
0 3 5
0 3 6
0 4 0
0 4 1
0 4 2
0 4 3
0 4 4
0 4 5
0 4 6
0 5 0
0 5 1
0 5 2
0 5 3
0 5 4
0 5 5
0 5 6
0 6 0
0 6 1
0 6 2
0 6 3
0 6 4
0 6 5
0 6 6
1 0 0
1 0 1
1 0 2
1 0 3
1 0 4
1 0 5
1 0 6
1 1 0
1 1 1
1 1 2
1 1 3
1 1 4
1 1 5
1 1 6
1 2 0
1 2 1
1 2 2
1 2 3
1 2 4
1 2 5
1 2 6
1 3 0
1 3 1
1 3 2
1 3 3
1 3 4
1 3 5
1 3 6
1 4 0
1 4 1
1 4 2
1 4 3
1 4 4
1 4 5
1 4 6
1 5 0
1 5 1
1 5 2
1 5 3
1 5 4
1 5 5
1 5 6
1 6 0
1 6 1
1 6 2
1 6 3
1 6 4
1 6 5
1 6 6
2 0 0
2 0 1
2 0 2
2 0 3
2 0 4
2 0 5
2 0 6
2 1 0
2 1 1
2 1 2
2 1 3
2 1 4
2 1 5
2 1 6
2 2 0
2 2 1
2 2 2
2 2 3
2 2 4
2 2 5
2 2 6
2 3 0
2 3 1
2 3 2
2 3 3
2 3 4
2 3 5
2 3 6
2 4 0
2 4 1
2 4 2
2 4 3
2 4 4
2 4 5
2 4 6
2 5 0
2 5 1
2 5 2
2 5 3
2 5 4
2 5 5
2 5 6
2 6 0
2 6 1
2 6 2
2 6 3
2 6 4
2 6 5
2 6 6
3 0 0
3 0 1
3 0 2
3 0 3
3 0 4
3 0 5
3

In [20]:
test_stack_raw = np.concatenate([upper_raw,lower_mask],0)

In [19]:
test_stack.shape

(576, 16, 64, 64)

In [21]:
len(upper_raw)

512

In [22]:
os.path.join(parent_folder,f"fold_1/")

'E:/Image_Folder/kFold_Training/fold_1'

In [31]:
os.path.splitext(raw_name)

('000_D_180907_A_N1B3_52616a61', '.tif')

In [36]:
np.unique(upper_mask)

array([0., 1., 2., 3.])

## Try Using Dataloader to load in images

In [2]:
raw_dir = "E:/Image_Folder/kFold_Training/fold_0/Raw"
mask_dir = "E:/Image_Folder/kFold_Training/fold_0/Mask"
file_type = "/*.tif"
raw_list = glob.glob(raw_dir+file_type)
mask_list = glob.glob(mask_dir+file_type)
# raw_filename_list = [f for f in raw_list]
# mask_filename_list = [f for f in mask_list]


In [4]:
raw_list.__len__()
mask_list.__len__()

11968

In [4]:
dim_order = (0,4,1,2,3)

In [4]:
dataset_loader = torch.utils.data.DataLoader(fold_0_dataset,
                                             batch_size = 64,
                                             shuffle = True,
                                             num_workers = 8)

NameError: name 'fold_0_dataset' is not defined

In [10]:
from torch.utils.data import Dataset

In [16]:
class SubVolumeDataset(Dataset):
    # this Dataset module assumes that you have already preprocessed your images and stored them 
    # into a specified folder.

    def __init__(self, raw_directory, mask_directory, num_classes, img_order, mask_order, device):
        self.img_list = raw_directory
        self.mask_list = mask_directory
        self.num_classes = num_classes
        self.img_order = img_order
        self.mask_order = mask_order
        self.device = device

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):

        # read and process the raw image
        image = tifffile.imread(self.img_list[idx]).astype(np.float16) # shape = (Batch Size, Z_size, Y_size, X_size)
        image = torch.FloatTensor(image).to(self.device) # type = np.array


        # read and process the image mask
        mask = tifffile.imread(self.mask_list[idx]).astype(np.float16)
        mask = torch.FloatTensor(pf.to_categorical(mask, self.num_classes)).to(self.device)

        # print(self.mask_list[idx])
        return image, mask

In [17]:
training_dataset = SubVolumeDataset(raw_directory = raw_list,
                                       mask_directory = mask_list,
                                       num_classes = 4,
                                       img_order = dim_order,
                                       mask_order = dim_order,
                                       device = "cpu")

In [18]:
training_dataloader = DataLoader(training_dataset, batch_size = 64, shuffle = True)

In [23]:
from torch.utils.data import sampler

In [24]:
random_sampler = sampler.RandomSampler()

TypeError: __init__() missing 1 required positional argument: 'data_source'

In [19]:
raw, mask = next(iter(training_dataloader))

torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])
torch.Size([16, 64, 64])


In [25]:
raw = torch.unsqueeze(raw, -1)

In [26]:
raw.shape

torch.Size([64, 16, 64, 64, 1])