In [1]:
"""Doing data augmentation"""

import os #using this library to join this path 
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
import numpy as np
import cv2
from glob import glob 
from tqdm import  tqdm
from sklearn.model_selection import  train_test_split
from albumentations import HorizontalFlip, VerticalFlip, Rotate


#creating a dir function for checking the path
def create_dir(path):
    #"create a directory"
    if not os.path.exists(path):
        os.makedirs(path)
        
#function for loading data
def load_data(path,split=0.2):
    #laod the images and mask
    images = sorted(glob(f"{path}/*/image/*.png"))
    masks  = sorted(glob(f"{path}/*/mask/*.png"))
    #print(len(images),len(masks))
    """Splitting the data"""
    split_size=int(len(images)*split)  # how many we want the images we want in validation set
    
    #random state in train_x and train_y should be same
    train_x,valid_x=train_test_split(images,test_size=split_size,random_state=42)
    train_y,valid_y=train_test_split(masks ,test_size=split_size,random_state=42)
    return (train_x,train_y),(valid_x,valid_y)


#Creating augmented funtion
def augment_data(images, masks, save_path, augment=True):
    
    H = 512
    W = 512
    #""" Performing data augmentation. """
    for idx, (x, y) in tqdm(enumerate(zip(images, masks)), total=len(images)):
        dir_name = x.split("/")[-3]  #Extracting Directory name
        name = dir_name + "_" + x.split("/")[-1].split(".")[0]  #now the name of the image is "folder_name+image_name"
        #"""Read the image and mask"""
        x= cv2.imread(x,cv2.IMREAD_COLOR)
        y= cv2.imread(y,cv2.IMREAD_COLOR)

        if augment == True:
            aug=HorizontalFlip(p=1.0)
            augmented=aug(image=x,mask=y)
            x1=augmented["image"]
            y1=augmented['mask']
            
            aug=VerticalFlip(p=1)
            augmented=aug(image=x,mask=y)
            x2=augmented["image"]
            y2=augmented['mask']
            
            aug=Rotate(limit=45,p=1.0)
            augmented=aug(image=x,mask=y)
            x3=augmented["image"]
            y3=augmented['mask']
            
            #appending the dataset after augmentation
            X = [x,x1,x2,x3]
            Y = [y,y1,y2,y3]
            
        else:
            X = [x]
            Y = [y]
            
        idx=0
        for i, m in zip(X, Y): #i and m are image and mask respectively
            #"""Now resiziing the image and mask"""
            i=cv2.resize(i, (W,H))
            m=cv2.resize(m, (W,H))
            m=m/255.0
            m=(m>0.5)*255  #value in the mask is b/w 0-255
            
            #saving images and mask  
            if len(X)==1:
                tmp_image_name = f"{name}.jpg"
                tmp_mask_name  = f"{name}.jpg"
            else:
                tmp_image_name = f"{name}_{idx}.jpg"
                tmp_mask_name  = f"{name}_{idx}.jpg"
                
            image_path=os.path.join(save_path,"image/",tmp_image_name)
            mask_path=os.path.join(save_path,"mask/",tmp_mask_name)
            cv2.imwrite(image_path,i)
            cv2.imwrite(mask_path,m)
            idx+=1



if __name__ == "__main__":
    """ Load the dataset """
    dataset_path = os.path.join("data", "train")
    (train_x, train_y), (valid_x, valid_y) = load_data(dataset_path, split=0.2)

    print("Train: ", len(train_x))
    print("Valid: ", len(valid_x))

    #creating dir for  saving data augumentation data    
    create_dir("new_data/train/image/")
    create_dir("new_data/train/mask/")
    create_dir("new_data/valid/image/")
    create_dir("new_data/valid/mask/")

    #applying data augmentation(HorizontalFlip, VerticalFlip, Rotate) only in train data 
    augment_data(train_x, train_y, "new_data/train/", augment=True)
    augment_data(valid_x, valid_y, "new_data/valid/", augment=False)
    
    

Train:  2026
Valid:  506


100%|██████████| 2026/2026 [02:23<00:00, 14.14it/s]
100%|██████████| 506/506 [00:10<00:00, 46.78it/s]


In [2]:
"""# defining model"""


from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D,Conv2DTranspose, Concatenate,Input
from tensorflow.keras.models import  Model

#def funtion for Creating convolution block 
def conv_block(input, num_filters):
    x=Conv2D(num_filters,3,padding='same')(input)
    x=BatchNormalization()(x)
    x=Activation('relu')(x)
    
    x=Conv2D(num_filters,3,padding='same')(x)
    x=BatchNormalization()(x)
    x=Activation('relu')(x)
    return x

#def fun for encoder_block
def encoder_block(input,num_filters):
    x=conv_block(input,num_filters)
    p=MaxPool2D((2,2))(x)
    return x,p  #x act as an skipconnection and p is a output feature for next block

def decoder_block(input,skip_features,num_filters):
    #applying the transpose conv2d on b1
    x=Conv2DTranspose(num_filters,(2,2),strides=2,padding="same")(input)
    x=Concatenate()([x,skip_features]) #concatenation skip_connections from previous output and after convolution transpose 
    x=conv_block(x,num_filters)
    return x
        
    

#def fun for building model
def unet_model(input_shape):
    inputs=Input(input_shape)
    
    #4 encoder blocks of Unet_model
    s1,p1=encoder_block(inputs,64)  #s1,s2,s3,s4 are skip_connection
    s2,p2=encoder_block(p1,128)
    s3,p3=encoder_block(p2,256)
    s4,p4=encoder_block(p3,512)
    
    #bridge and the bottlneck part of the structure
    b1=conv_block(p4,1024)
    
    #print(s1.shape,s2.shape,s3.shape,s4.shape) chechking for proper skip_connection
    d1= decoder_block(b1,s4,512)
    d2=decoder_block(d1,s3,256)
    d3=decoder_block(d2,s2,128)
    d4=decoder_block(d3,s1,64)
    
    #output layers of 1X1 conv layer with sigmoid fuction
    outputs= Conv2D(1,1,padding="same",activation="sigmoid")(d4)
    
    model=Model(inputs,outputs,name="U-net")
    return model
    
        
    
    
    
if __name__=="__main__":
    input_shape=(512,512,3)
    model=unet_model(input_shape)
    model.summary()
    

Model: "U-net"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 512, 512, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 512, 512, 64) 1792        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 512, 512, 64) 256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 512, 512, 64) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [3]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243


In [4]:
# metric function
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as k

def iou(y_true,y_pred):
    def f(y_true,y_pred):
        intersection=(y_true*y_pred).sum()
        union=y_true.sum()+y_pred.sum()-intersection
        x=(intersection + 1e-15)/(union+1e-15)
        x=x.astype(np.float32)
        return x
    return tf.numpy_function(f,[y_true,y_pred],tf.float32)

smooth=1e-15
def dice_coef(y_true,y_pred):
    y_true=tf.keras.layers.Flatten()(y_true)
    y_pred=tf.keras.layers.Flatten()(y_pred)
    intersection=tf.reduce_sum(y_true*y_pred)
    return (2.* intersection+smooth)/(tf.reduce_sum(y_true)+tf.reduce_sum(y_pred)+smooth)

def dice_loss(y_true,y_pred):
    return 1.0- dice_coef(y_true,y_pred)


In [5]:
import tensorflow as tf 
gpus= tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        #currently, memory growth need to be the same across the gpus
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu,True)
        logical_gpus=tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus),"P`hysical GPUs",len(logical_gpus),"logical_gpus")
    except RuntimeError as e:
        print(e)

Physical devices cannot be modified after being initialized


In [6]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.2
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [17]:
"""Training our model on dataset"""

#importing important library
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']="2"
# os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

import numpy as np
import cv2
from glob import glob
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau,EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import  Adam
from tensorflow.keras.metrics import Recall, Precision
from model import unet_model
from metrics import dice_loss, dice_coef, iou

H=512
W=512
#creating a dir function for checking the path
def create_dir(path):
    #"create a directory"
    if not os.path.exists(path):
        os.makedirs(path)
        
def shuffling(x,y):
    x,y=shuffle(x,y,random_state=42)
    return x,y

def load_data(path):
    x=sorted(glob(os.path.join(path,"image","*.jpg")))
    y=sorted(glob(os.path.join(path,"mask","*.jpg")))
    return x,y

#def funtion reading the images
def read_image(path):
    path= path.decode()
    x=cv2.imread(path,cv2.IMREAD_COLOR)
    x=x/255.0
    x=x.astype(np.float32)
    return x

#def function reading the mask
def read_mask(path):
    path= path.decode()
    x=cv2.imread(path,cv2.IMREAD_GRAYSCALE)
    x=x/255.0
    x=x>0.5   #threshold for creating image to 0 and 1
    x = x.astype(np.float32)
    x= np.expand_dims(x,axis=-1)
    return x

#def tf_parse for building data pipeline
def tf_parse(x,y):
    def _parse(x,y):
        x=read_image(x)
        y=read_mask(y)
        return x,y
    x,y=tf.numpy_function(_parse,[x,y],[tf.float32,tf.float32])  #using tf.numpy_function since we have used cv2.funtion outside the tf 
    x.set_shape([H,W,3])
    y.set_shape([H,W,1])
    return x,y


#last main function for buuilding data pipeline
def tf_dataset(x,y,batch=8):
    dataset= tf.data.Dataset.from_tensor_slices((x,y)) #this from_tensor_slices will give individuall image and mask path 
    dataset= dataset.map(tf_parse)                     #to tf_prarse funtion
    dataset= dataset.batch(batch)                      #not it willcreate a batch
    dataset= dataset.prefetch(10)                      #it will fetch some of the batch in the memory
    return dataset




if __name__=="__main__":
    """Seeding"""
    np.random.seed(42)
    tf.random.set_seed(42)
    
    """creating dir for storing files"""
    create_dir("files")
    
    
    """defining Hyperparameter"""
    batch_size=2
    lr=1e-4
    num_epochs=16
    model_path=os.path.join("files","model.h5")
    csv_path=os.path.join("files","data.csv")
    
    """Dataset_path"""
    dataset_path=os.path.join("new_data")
    train_path=os.path.join(dataset_path,"train")
    valid_path=os.path.join(dataset_path,"valid")
    
    
    train_x,train_y=load_data(train_path) #loading the training data
    train_x,train_y=shuffling(train_x,train_y) #shufflnig the traininng data
    valid_x,valid_y=load_data(valid_path)
    
    print("train:",len(train_x),"-",len(train_y))
    print("train:",len(valid_x),"-",len(valid_y))
    
    train_dataset= tf_dataset(train_x,train_y,batch=batch_size)
    valid_dataset= tf_dataset(valid_x,valid_y,batch=batch_size)
    
    #building the model
    model=unet_model((H,W,3))
    metrics=[dice_coef,iou,Recall(),Precision()]
    model.compile(loss=dice_loss,optimizer=Adam(lr), metrics=metrics )
    
    
    callbacks=[
        ModelCheckpoint(model_path,verbose=1,save_best_only=True),  #for saving model weight file
        ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=10,min_lr=1e-7,verbose=1),
        CSVLogger(csv_path),
        TensorBoard(),
        EarlyStopping(monitor='val_loss',patience=50,restore_best_weights=False)
        
    ]
    
    model.fit(
         train_dataset,
         epochs=num_epochs,
         validation_data=valid_dataset,
         callbacks=callbacks,
         shuffle=False
     )

train: 8104 - 8104
train: 506 - 506
Epoch 1/16

Epoch 00001: val_loss improved from inf to 0.66666, saving model to files/model.h5
Epoch 2/16

Epoch 00002: val_loss improved from 0.66666 to 0.65158, saving model to files/model.h5
Epoch 3/16

Epoch 00003: val_loss did not improve from 0.65158
Epoch 4/16

Epoch 00004: val_loss improved from 0.65158 to 0.65015, saving model to files/model.h5
Epoch 5/16

Epoch 00005: val_loss improved from 0.65015 to 0.62189, saving model to files/model.h5
Epoch 6/16

Epoch 00006: val_loss did not improve from 0.62189
Epoch 7/16

Epoch 00007: val_loss improved from 0.62189 to 0.61841, saving model to files/model.h5
Epoch 8/16

Epoch 00008: val_loss did not improve from 0.61841
Epoch 9/16

Epoch 00009: val_loss did not improve from 0.61841
Epoch 10/16

Epoch 00010: val_loss did not improve from 0.61841
Epoch 11/16

Epoch 00011: val_loss improved from 0.61841 to 0.61797, saving model to files/model.h5
Epoch 12/16

Epoch 00012: val_loss did not improve from 0

In [18]:
#evaluation
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]= "2"
import numpy as np
import cv2
import pandas as pd
from glob import glob
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.utils import CustomObjectScope
from sklearn.metrics import  accuracy_score, f1_score, jaccard_score, precision_score,recall_score
from metrics import dice_loss, dice_coef, iou
from train import  load_data

H=512
W=512

#creating directory
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
def save_results(image,mask,y_pred,save_image_path):
    line= np.ones((H,10,3))*128
    
    """expanding dimension for Mask"""
    mask= np.expand_dims(mask,axis=-1)  #now the size will be (512,512,1)
    mask=np.concatenate([mask,mask,mask],axis=-1) #concating 3 times bcz our image is in 3-dim sonow maskshape is (512,512,3)
                                                # now the image and mask and pred_mask are in same size and we can concatenate the easily
    
    """expanding dimension for PredMask"""
    y_pred= np.expand_dims(y_pred,axis=-1)  #now the size will be (512,512,1)
    y_pred=np.concatenate([y_pred,y_pred,y_pred],axis=-1) #concating 3 times bcz our image is in 3-dim sonow maskshape is (512,512,3)
    y_pred=y_pred*255
    
    """conacting image mask and pred_mask"""
    concatenate_images=np.concatenate([image,line,mask,line,y_pred],axis=1)
    cv2.imwrite(save_image_path,concatenate_images)
    

        
if __name__=="__main__":
    """Seeding"""
    np.random.seed(42)
    tf.random.set_seed(42)
    
    """Directory for storing files"""
    create_dir("results")
    
    """Loading model"""
    with CustomObjectScope({'iou': iou, 'dice_coef':dice_coef, "dice_loss": dice_loss}):
        model=tf.keras.models.load_model("files/model.h5")
        #model.summary()
    
    """Load the dataset"""
    test_x=sorted(glob(os.path.join("new_data","valid","image","*")))
    test_y=sorted(glob(os.path.join("new_data","valid","mask","*")))
    print(f"Test: {len(test_x)}-{len(test_y)}")
    
    """Evaluation and prediction """
    Scores=[]
    for x,y in tqdm(zip(test_x,test_y),total=len(test_x)):
        """Extract the name"""
        name=x.split("/")[-1].split(".")[0]
        
        "reading the image"
        image=cv2.imread(x,cv2.IMREAD_COLOR)
        x=image/255.0
        x=np.expand_dims(x,axis=0)
        
        """Reading the mask"""
        mask=cv2.imread(y,cv2.IMREAD_GRAYSCALE)
        y= mask/255.0
        y=y>0.5
        y=y.astype(np.int32)
        
        
        """Know doingPredicting"""
        y_pred=model.predict(x)[0]   #model will take "x" bcz it is the batch size of 1  
        y_pred=np.squeeze(y_pred,axis=-1) #it will sequeeze on the last axis and it will ocnverted into H,W of 512 ,512
        y_pred= y_pred>0.5
        y_pred=y_pred.astype(np.int32)
        
        
        """ savning the Prediction"""
        save_image_path=f"results/{name}.png"
        save_results(image,mask,y_pred,save_image_path) #saveresuts take 4 thing: "origna_image","original_maks", "predict_mask", "save_path"
        
            
        #Now working on metrics
        """Flatten the array"""
        y=y.flatten()
        y_pred=y_pred.flatten()

        """Calculating the metrics values"""
        acc_value=accuracy_score(y,y_pred)
        f1_value=f1_score(y,y_pred,labels=[0,1],average='binary',zero_division=1)
        jac_vaue=jaccard_score(y,y_pred,labels=[0,1],average='binary',zero_division=1)
        recall_value= recall_score(y,y_pred,labels=[0,1],average='binary',zero_division=1)
        precision_value=precision_score(y,y_pred,labels=[0,1],average='binary',zero_division=1)
        Scores.append([name,acc_value,f1_value,jac_vaue,recall_value,precision_value])
        
    
    """Meterics value"""
    score=[s[1:]for s in Scores]
    score= np.mean(score,axis=0)
    print(f"Accuracy:{score[0]:0.5f}")
    print(f"F1:{score[1]:0.5f}")
    print(f"Jaccard:{score[2]:0.5f}")
    print(f"Recall:{score[3]:0.5f}")
    print(f"Precision:{score[4]:0.5f}")

    

    df=pd.DataFrame(Scores,columns=["Image","Accuracy","F1", "Jaccard","Recall","Precision"])
    df.to_csv("files/score.csv")
        
    
    
        

Test: 506-506


100%|██████████| 506/506 [03:34<00:00,  2.36it/s]

Accuracy:0.99440
F1:0.80522
Jaccard:0.78429
Recall:0.97783
Precision:0.80418





In [22]:
#Prediction
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]= "2"
import numpy as np
import cv2
import pydicom as dicom
import pandas as pd
from glob import glob
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.utils import CustomObjectScope
from sklearn.metrics import  accuracy_score, f1_score, jaccard_score, precision_score,recall_score
from metrics import dice_loss, dice_coef, iou

#creating directory
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
if __name__=="__main__":
    """Seeding"""
    np.random.seed(42)
    tf.random.set_seed(42)
    
    """Directory for storing files"""
    create_dir("tests_result")
    
    """Loading model"""
    with CustomObjectScope({'iou': iou, 'dice_coef':dice_coef, "dice_loss": dice_loss}):
        model=tf.keras.models.load_model("files/model.h5")
        #model.summary()

    """Load the test dataset"""
    test_x=glob("data/test/*/*/*.dcm")
    print("testdata:",len(test_x))
    
    
    """Loop over the data"""
    for x in tqdm(test_x):
        """Extract the names"""
        dir_names=x.split("/")[-3]
        name=dir_names+"_"+x.split("/")[-1].split(".")[0]
        
        """Read the .dcm images"""
        images=dicom.dcmread(x).pixel_array
        #print(np.max(image))  #max pixel value is 2000 
        
        """Convertion the image  pixel b/t 0-255"""
        image=np.expand_dims(images,axis=-1)
        image=image/np.max(image)*255.0
        x=image/255.0   #since model the image btw 0 and 1
        x=np.concatenate([x,x,x],axis=-1)
        x=np.expand_dims(x,axis=0)
        
        
        """Doing prediction on test data """
        mask=model.predict(x)[0]
        mask=mask>0.5
        mask=mask.astype(np.int32)
        mask=mask*255
        """conacting image mask and pred_mask"""
        concatenate_images=np.concatenate([image,mask],axis=1)
        cv2.imwrite(f"tests_result/{name}.png",concatenate_images)
        
        
        

testdata: 832


  0%|          | 0/832 [00:00<?, ?it/s]



100%|██████████| 832/832 [02:21<00:00,  5.87it/s]
