In [12]:
import os
import sys
import random
import warnings

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms
import torch.nn.functional as F

'''
from google.colab import drive
drive.mount('/content/drive')
'''

# Set some parameters
BATCH_SIZE = 5 # the higher the better
IMG_WIDTH = 512 # for faster computing on kaggle
IMG_HEIGHT = 512 # for faster computing on kaggle
IMG_CHANNELS = 3


TRAIN_PATH = os.path.abspath("../data/sample_data/40x_magnification") 
TEST_PATH = os.path.abspath("../data/sample_data/40x_magnification") 
warnings.filterwarnings('ignore', category=UserWarning, module='skimage')
seed = 42



In [13]:

train_ids = next(os.walk(TRAIN_PATH))[1]

num_test_items = int(0.1 * len(train_ids))

# Slice the train_ids list to get the test IDs
test_ids = train_ids[-num_test_items:]
np.random.seed(10)

In [14]:

import re

#print(len(train_ids))
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.float64)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.uint8)
Y_train_1 = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.uint8)
Y_train_2 = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.uint8)

print('Getting and resizing train images and masks ... ')
sys.stdout.flush()
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):


    
    path = os.path.join(TRAIN_PATH, id_)

    img_path = os.path.join(path, 'images', id_ +'_mSLIM'+ '.png')
    if path != "./Cell/Cell/f0_t0_i0_ch0_c15_r44_z0":
        img = imread(img_path)
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        img = np.expand_dims(img, axis=-1).astype(np.float64)
        
        X_train[n] = img.astype(np.float64)
        #print("*******************************************************")
        for mask_file in next(os.walk(path + '/masks/'))[2]:
            mask = imread(os.path.join(path, 'masks', mask_file))
            mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
            if mask_file.endswith('Prot.png'):
                Y_train_2[n] = np.where(mask[:, :, np.newaxis] > 0, 2, 0)
            else:
                Y_train_1[n] = np.where(mask[:, :, np.newaxis] > 0, 1, 0)
        Y_train[n] = np.maximum(Y_train_1[n], Y_train_2[n])
print("All train images and masks images are processed!")
        


Getting and resizing train images and masks ... 


100%|██████████| 6/6 [00:02<00:00,  2.33it/s]

All train images and masks images are processed!





In [10]:
print(Y_train.shape)
zero = 0
one = 0
two = 0
for array in Y_train[5]:
    for i in array:
        if i == 0:
            zero = zero + 1
        elif i == 1:
            one = one + 1
        else:
            two = two + 1
print(zero)
print(one)
print(two)

(6, 512, 512, 1)
260972
1050
122


In [None]:
dir = "../data/processed"
os.makedirs(dir, exist_ok = True) #create the dir if its not exist

np.save(os.path.join(dir, "X_train.npy"), X_train)
np.save(os.path.join(dir, "Y_train.npy"), Y_train)