In [None]:
# Imports

import os
import csv
import cv2
import time
import xlrd 
import h5py
import keras
import shutil
import random
import numpy as np
import scipy.ndimage
import pydicom as dicom
import matplotlib.pyplot as plot
from ipywidgets import IntProgress
from keras.models import Sequential
from tqdm.notebook import tqdm as tq
from keras.utils import to_categorical
from skimage import measure,morphology,segmentation
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator,array_to_img, img_to_array, load_img

In [None]:
def  find_horizontal_indexes(img,offset): 
    h_pixel_count = img.shape[1]
    v_pixel_count = img.shape[0]
    x_initial = 10000
    x_final = 0
    for j in range(-20,21):   
        for i in range(h_pixel_count):
            if(img[(v_pixel_count//2)+j][i][0]>offset and img[(v_pixel_count //2)+j][i][2]>offset and img[(v_pixel_count//2)+j][i][1]>offset):
                if(x_initial>i):
                    x_initial = i
                break

        for k in range(h_pixel_count):
            zh=h_pixel_count-k-1
            if(img[(v_pixel_count//2)+j][zh][0]>offset and img[(v_pixel_count//2)+j][zh][2]>offset and img[(v_pixel_count//2)+j][zh][1]>offset):
                if(x_final<zh):
                    x_final = zh
                break
    return (x_initial,x_final)



def  find_vertical_indexes(img,offset): 
    h_pixel_count = img.shape[1]
    v_pixel_count = img.shape[0]
    y_initial = 1000000
    y_final = 0
    for j in range(-20,21):   
        for i in range(v_pixel_count):
            if(img[i][(h_pixel_count//2)+j][0]>offset and img[i][(h_pixel_count//2)+j][1]>offset and img[i][(h_pixel_count//2)+j][2]>offset):
                if(y_initial>i):
                    y_initial = i
                break

        for k in range(v_pixel_count):
            zv=v_pixel_count-k-1
            if(img[zv][(h_pixel_count//2)+j][0]>offset and img[zv][(h_pixel_count//2)+j][1]>offset and img[zv][(h_pixel_count//2)+j][2]>offset):
                if(y_final<zv):
                    y_final = zv
                break
    return (y_initial,y_final)



def crop(img,offset):

    x1,x2 = find_horizontal_indexes(img,offset)
    y1,y2 = find_vertical_indexes(img,offset)
    imgc = img[y1:y2,x1:x2,:]
    
    return imgc

In [None]:
# Function for Data Augmentation
def DataAugmentor(Image_path,Save_image_path,Image_name,Aug_per_image):
    # Initialising the ImageDataGenerator class. 
    # We will pass in the augmentation parameters in the constructor. 
    datagen = ImageDataGenerator( 
                rotation_range=20,
                vertical_flip = True,  
                horizontal_flip = True,fill_mode ='constant',cval=0) 

    # Loading a sample image  
    img = load_img(Image_path)  
    # Converting the input sample image to an array 
    x = img_to_array(img) 
    # Reshaping the input image 
    x = x.reshape((1, ) + x.shape)  

    # Generating and saving 5 augmented samples  
    # using the above defined parameters.  
    i = 0
    for batch in datagen.flow(x, batch_size = 5, 
                              save_to_dir =Save_image_path,  
                              save_prefix =Image_name, save_format ='jpeg'): 
        i+=1
        if (i>=Aug_per_image):  
            break
        


In [None]:
def grey_clahe(img,clipLimitVal,tileGridSizeVal):
    # Method to apply CLAHE filter to the image (More Effective)
    img_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    obj_clahe = cv2.createCLAHE(clipLimit=clipLimitVal, tileGridSize=(tileGridSizeVal))
    output = obj_clahe.apply(img_grey)
    return output




def maxmin(img,minVal,maxVal):
    # Method to apply Maxmin to the image 
    newmin = minVal
    newmax = maxVal
    img = newmin+(img - img.min())*(newmax - newmin)/(img.max()-img.min())
    return img


In [None]:
# Reading the label provided in CSV file and  
# splitting the images to different folders
# based on thier class mentioned in the label
filename = "C:/Train/trainLabels.csv"
rows = [] 
  
# reading csv file 
with open(filename, 'r') as csvfile: 
    csvreader = csv.reader(csvfile) 
    for row in csvreader: 
        rows.append(row) 


# Splitting
for row in tq(rows):
    if(row[1]=='0'):
        shutil.move('C:/Train/train/'+row[0]+'.jpeg','C:/project/CategorizedData/class_0/'+row[0]+'.jpeg')
    elif(row[1]=='1'):
        shutil.move('C:/Train/train/'+row[0]+'.jpeg','C:/project/CategorizedData/class_1/'+row[0]+'.jpeg')
    elif(row[1]=='2'):
        shutil.move('C:/Train/train/'+row[0]+'.jpeg','C:/project/CategorizedData/class_2/'+row[0]+'.jpeg')
    elif(row[1]=='3'):
        shutil.move('C:/Train/train/'+row[0]+'.jpeg','C:/project/CategorizedData/class_3/'+row[0]+'.jpeg')
    elif(row[1]=='4'):
        shutil.move('C:/Train/train/'+row[0]+'.jpeg','C:/project/CategorizedData/class_4/'+row[0]+'.jpeg')
        
print("Transfer Completed.")

In [None]:
# This section of code is responsible for cropping the images
Input_path = "C:/Project/CategorizedData/"
classes = os.listdir(Input_path)
for clas in tq(classes,"progress"):
    Image_path = Input_path+clas
    
    Save_image_path = "C:/Project/CroppedData/"+clas+"/"
    for images  in tq(os.listdir(Image_path),clas,leave=True):
        name,ext = images.split(".")
        im = cv2.imread(os.path.join(Image_path,images))
        imc = crop(im,4)
        cv2.imwrite(Save_image_path+images,imc)
        

In [None]:
# This section of code is responsible for Augmenting the images
# The aim is to create atleast 10000 images in total for each class
# Classes which have more than 10000 images will be skipped 
# and which does not have 10000 images then this method will calculate the no of images needed to have 10000 image 
# and accordingly performs number of augmentation per image 
Input_path = "C:/Project/CroppedData/"
classes = os.listdir(Input_path)
for clas in tq(classes[1:],"progress"):
    Image_path = Input_path+clas
    Augperimg = int(np.ceil((10000-len(os.listdir(Image_path))/len(os.listdir(Image_path))))
    Save_image_path = "C:/Project/AugmentedData/"+clas+"/"
    print(Augperimg)
    for images  in tq(os.listdir(Image_path),clas,leave=True):
                    
                    
        name,ext = images.split(".")
        
        DataAugmentor(os.path.join(Image_path,images),Save_image_path,name,Augperimg)
        

In [None]:
# Now the random 10000 images are collected 
#from each class for further preprocessing

Source_path = "C:/Project/AugmentedData/"
Destination_path = "C:/Project/Data_10000/"

for classes in os.listdir(Source_path):
    image_list = os.listdir(Source_path+classes)
    img_move = random.sample(image_list,10000)
    for image in tq(img_move,"Progress"):
        shutil.move(os.path.join(Source_path,classes,image),os.path.join(Destination_path,classes,image)) 
        

In [None]:
# Those 10000 images from each classes 
# are now going to get preprocessed in this section

Source_path = "C:/Project/Data_10000/"
Destination_path = "C:/Project/PreprossedData/"
clipLimitVal = 8
tileGridSizeVal = (8,8)
sigmaX = 30
size = (224,224)
for classes in tq(os.listdir(Source_path),"Progress:"):
    image_list = os.listdir(Source_path+classes)
    for images in tq( image_list, classes, leave= False):
        img = cv2.imread(os.path.join(Source_path,classes,images))
        try:
            image = cv2.resize(img, size)
        except:
            
            print(images)
            continue
        image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)
        cv2.imwrite(os.path.join(Destination_path,classes,images),image)
        

In [None]:
# preprocessed data are again resized in this section
Source_path = "C:/Project/PreprossedData/"
Destination_path = "C:/Project/ResizedData/"
minVal = 0
maxVal= 1
size = (224,224)

for classes in tq(os.listdir(Source_path),"Progress:"):
    image_list = os.listdir(Source_path+classes)
    for image in tq( image_list, classes, leave= False):
        img = cv2.imread(os.path.join(Source_path,classes,image))
        imgr = cv2.resize(img,(size))
        imgm = maxmin(imgr,minVal,maxVal)
        cv2.imwrite(os.path.join(Destination_path,classes,image),imgm)


In [None]:
# Splits the Preprocessed Data into Training Set,Testing Set and Validation Set

Source_path = 'C:/Project/PreprossedData/'
Destination_path = 'D:/prog_data/'

for classes in tq(os.listdir(Source_path),"progress"):
    path_source = os.path.join(Source_path,classes)
    images = os.listdir(path_source)
    size=len(images)
    train=int(.85*size)
    val= train+ int(.10*size)
    
    
    
    random.shuffle(images)
    
    
    path_train_destination = os.path.join(Destination_path,"training",classes)
    path_test_destination = os.path.join(Destination_path,"testing",classes)
    path_val_destination = os.path.join(Destination_path,"validation",classes)
    for img in tq(images[0:train],"training", leave = False):
        shutil.move(os.path.join(path_source,img), os.path.join(path_train_destination,img))
        
    for img in tq(images[train:val],"testing", leave = False):
        shutil.move(os.path.join(path_source,img), os.path.join(path_val_destination,img))
        
    for img in tq(images[val:size],"validation",leave = False):
        shutil.move(os.path.join(path_source,img), os.path.join(path_test_destination,img))
    print(classes,"moved.")
    
