In [4]:
import create_n_crop_bounding_box as c
import matplotlib.pyplot as plt
import skimage.util as util
import skimage.io as io
import numpy as np
import os as os 
import skimage as sk
import skimage.color as cl
import skimage.filters as filt 
import scipy.ndimage.filters as fil
import skimage.morphology as morph
import skimage.segmentation as seg
import skimage.measure as measure
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
import skimage.feature as ft
from sklearn.model_selection import train_test_split
import skimage.exposure as exp

# Helper Functions for Feature Extraction

In [5]:
plt.rcParams['image.cmap'] = 'gray'

def element_counter(region):
    sum_val=0
    for i in range(region.shape[0]):
        for j in range(region.shape[1]):
            if region[i][j]==1 or  region[i][j]==True:
                sum_val=sum_val+1     
    return sum_val 

def HoCS(B, min_scale, max_scale, increment, num_bins):
    '''
    Computes a histogram of curvature scale for the shape in the binary image B.  
    Boundary fragments due to holes are ignored.
    :param B: A binary image consisting of a single foreground connected component.
    :param min_scale: smallest scale to consider (minimum 1)
    :param max_scale: largest scale to consider (max_scale > min_scale)
    :param increment:  increment on which to compute scales between min_scale and max_scale
    :param num_bins: number of bins for the histogram at each scale
    :return: 1D array of histograms concatenated together in order of increasing scale.
    '''
    concatenated_histograms=[]
    current_scale = min_scale
    while(current_scale<=max_scale):
        iterative_histogram=[]    
        C=morph.disk(current_scale)
        Label_B= morph.label(B, connectivity=1)

        B_boundaries = seg.find_boundaries(Label_B,connectivity=1,mode='inner')
        Boundry_points = np.where(B_boundaries>0)        
        R_points = np.transpose(np.vstack(Boundry_points))
        
        kp=[]
        area_C = element_counter(C)
        for i in range(len(R_points)):
            R_region = B[R_points[i][0]-current_scale:R_points[i][0]+current_scale+1,R_points[i][1]-current_scale:R_points[i][1]+current_scale+1] 
            area_RinC=0
            for j in range(R_region.shape[0]):
                for k in range(R_region.shape[1]):
                    if C[j][k] == 1 and R_region[j][k] == True:
                        area_RinC = area_RinC+1 
            
            kp.append(area_RinC/area_C)
      
        iterative_histogram,notneeded=np.histogram(kp,bins=num_bins,range=(0.0,1.0))
        iterative_histogram = iterative_histogram/len(kp)
        
        for l in range(0,len(iterative_histogram)):
            concatenated_histograms.append(iterative_histogram[l])
        
        current_scale=current_scale+increment
        
    return concatenated_histograms
 
def segment_clump(image):
    
    image = cl.rgb2gray(image.copy())
    image = filt.unsharp_mask(image, radius=1, amount=4)
    image = exp.equalize_hist(image,nbins=256)
    image = morph.opening(image,selem=morph.square(3))
    
    seeds = np.zeros_like(image,dtype=np.uint)
    thresh = filt.threshold_otsu(image)
    seeds[image>thresh]=1
    seeds[image<=0.2]=2
    labels = seg.random_walker(image,seeds,beta=10)
    
    
    segmented_image = np.zeros((image.shape[0],image.shape[1]),dtype=bool)
    for i in range(0,labels.shape[0]):
        for j in range(0,labels.shape[1]):
            if(labels[i][j]==1):
                segmented_image[i][j]=True
    
#     plt.figure()
#     plt.imshow(segmented_image)
#     plt.show()
    
    image_size = segmented_image.shape[0]* segmented_image.shape[1]
    
    segmented_image = morph.binary_opening(segmented_image,selem = morph.square(5))
    segmented_image = morph.remove_small_objects(segmented_image,min_size=image_size/256)
    segmented_image = morph.remove_small_holes(segmented_image,min_size=64)
    
    
    superimposed_image = seg.mark_boundaries(image,segmented_image,color=(1,0,0),outline_color=(1,0,0)) 
    
#     plt.figure()
#     plt.imshow(superimposed_image)
#     plt.show()
    
    return segmented_image

def segregate_connected_components(labeled_image,total_components):
    connected_components =[]
    segregated_image = labeled_image.copy()
    for i in range(1,total_components+1):
        segregated_image = labeled_image.copy()
        for x in range(0,labeled_image.shape[0]):
            for y in range(0,labeled_image.shape[1]):
                if(labeled_image[x][y]==i):
                    segregated_image[x][y]=1
                else:
                    segregated_image[x][y]=0
        connected_components.append(segregated_image)
    return connected_components
                    

def regional_statistics(segmented_image):
    
    '''
    I: Segmented Image
    Creates a connected component image of the segmented regions
    returns the number of connected components, average and total size(no of pixels) of connected components  
    '''
    histogram_list = []
    histogram_curvature = []
    labeled_image = measure.label(segmented_image,neighbors=8)
    
    connected_components = segregate_connected_components(labeled_image,np.amax(labeled_image))
    print("Number of Connected Components: "+str(np.amax(labeled_image)))
    
    for i in range(0,len(connected_components)):
        histogram_cc = HoCS(connected_components[i],7,43,7,7)
        for j in range(0,len(histogram_cc)):
            histogram_list.append(histogram_cc[j])
            
    histogram_curvature,notneeded = np.histogram(histogram_list,bins=7,range=(0.0,1.0))
    
#     print("HOCs")
#     print(histogram_curvature)
#     print("HOC Length:"+str(len(histogram_curvature)))
        
    return histogram_curvature


# Feature Extraction For Classifier Training

In [None]:
features = []
classes=[]

training_features_HOCS = []
training_classes = []

testing_features_HOCS = []
test_classes =[]

for root, dirs, files in os.walk('Data/Combined_Data'):
    for filename in files:
        cropped_image =  io.imread(root+'/'+filename)
        image_class = root.replace("\\","/").split('/')[2]
        print("Processed Image: "+str(filename)+" Class: "+str(image_class))
        features.append(regional_statistics(segment_clump(cropped_image)))
        classes.append(image_class)

training_features_HOCS,testing_features_HOCS,training_classes,test_classes=train_test_split(features,classes,test_size=0.33)

Processed Image: G0166380.JPG Class: 1
Number of Connected Components: 4


  "0.16. Use area_threshold instead.")


Processed Image: G0166381.JPG Class: 1
Number of Connected Components: 1
Processed Image: G0166382.JPG Class: 1
Number of Connected Components: 3
Processed Image: G0166383.JPG Class: 1
Number of Connected Components: 4
Processed Image: G0166384.JPG Class: 1
Number of Connected Components: 2
Processed Image: G0296469.JPG Class: 1
Number of Connected Components: 6
Processed Image: G0296472.JPG Class: 1
Number of Connected Components: 8
Processed Image: G0296473.JPG Class: 1
Number of Connected Components: 10
Processed Image: G0296474.JPG Class: 1
Number of Connected Components: 5
Processed Image: G0296476.JPG Class: 1
Number of Connected Components: 6
Processed Image: G0296477.JPG Class: 1
Number of Connected Components: 4
Processed Image: G0296478.JPG Class: 1
Number of Connected Components: 4
Processed Image: G0296481.JPG Class: 1
Number of Connected Components: 2
Processed Image: G0306486.JPG Class: 1
Number of Connected Components: 3
Processed Image: G0306487.JPG Class: 1
Number of C

Processed Image: G0306493.JPG Class: 2
Number of Connected Components: 5
Processed Image: G0306494.JPG Class: 2
Number of Connected Components: 5
Processed Image: G0336542.JPG Class: 2
Number of Connected Components: 6
Processed Image: G0336543.JPG Class: 2
Number of Connected Components: 8
Processed Image: G0336544.JPG Class: 2
Number of Connected Components: 3
Processed Image: G0336546.JPG Class: 2
Number of Connected Components: 6
Processed Image: G0336548.JPG Class: 2
Number of Connected Components: 8
Processed Image: G0336549.JPG Class: 2
Number of Connected Components: 4
Processed Image: G0336550.JPG Class: 2
Number of Connected Components: 8
Processed Image: G0346560.JPG Class: 2
Number of Connected Components: 7
Processed Image: G0346561.JPG Class: 2
Number of Connected Components: 4
Processed Image: G0346562.JPG Class: 2
Number of Connected Components: 6
Processed Image: G0346563.JPG Class: 2
Number of Connected Components: 8
Processed Image: G0346564.JPG Class: 2
Number of Co

Processed Image: G0286465.JPG Class: 3
Number of Connected Components: 10
Processed Image: G0286466.JPG Class: 3
Number of Connected Components: 8
Processed Image: G0286467.JPG Class: 3
Number of Connected Components: 5
Processed Image: G0296469.JPG Class: 3
Number of Connected Components: 4
Processed Image: G0296472.JPG Class: 3
Number of Connected Components: 4
Processed Image: G0296473.JPG Class: 3
Number of Connected Components: 4
Processed Image: G0296474.JPG Class: 3
Number of Connected Components: 9
Processed Image: G0296475.JPG Class: 3
Number of Connected Components: 13
Processed Image: G0296476.JPG Class: 3
Number of Connected Components: 8
Processed Image: G0296477.JPG Class: 3
Number of Connected Components: 8
Processed Image: G0296478.JPG Class: 3
Number of Connected Components: 10
Processed Image: G0296479.JPG Class: 3
Number of Connected Components: 8
Processed Image: G0296480.JPG Class: 3
Number of Connected Components: 11
Processed Image: G0296481.JPG Class: 3
Number o

Processed Image: G0036288.JPG Class: 4
Number of Connected Components: 9
Processed Image: G0036289.JPG Class: 4
Number of Connected Components: 11
Processed Image: G0036290.JPG Class: 4
Number of Connected Components: 9
Processed Image: G0036291.JPG Class: 4
Number of Connected Components: 12
Processed Image: G0036292.JPG Class: 4
Number of Connected Components: 13
Processed Image: G0036293.JPG Class: 4
Number of Connected Components: 15
Processed Image: G0046294.JPG Class: 4
Number of Connected Components: 13
Processed Image: G0046295.JPG Class: 4
Number of Connected Components: 8
Processed Image: G0046296.JPG Class: 4
Number of Connected Components: 6
Processed Image: G0046297.JPG Class: 4
Number of Connected Components: 9
Processed Image: G0046298.JPG Class: 4
Number of Connected Components: 12
Processed Image: G0046299.JPG Class: 4
Number of Connected Components: 12
Processed Image: G0066314.JPG Class: 4
Number of Connected Components: 10
Processed Image: G0066316.JPG Class: 4
Numb

Processed Image: IMG_4464.JPG Class: 4
Number of Connected Components: 10
Processed Image: IMG_4484.JPG Class: 4
Number of Connected Components: 14
Processed Image: IMG_4484_1.JPG Class: 4
Number of Connected Components: 12
Processed Image: IMG_4484_1_2.JPG Class: 4
Number of Connected Components: 12
Processed Image: IMG_4484_1_2_3.JPG Class: 4
Number of Connected Components: 14
Processed Image: IMG_4484_1_2_3_4.JPG Class: 4
Number of Connected Components: 10
Processed Image: IMG_4484_1_2_3_4_5.JPG Class: 4
Number of Connected Components: 19
Processed Image: IMG_4494.JPG Class: 4
Number of Connected Components: 7
Processed Image: IMG_4495.JPG Class: 4
Number of Connected Components: 6
Processed Image: IMG_4496.JPG Class: 4
Number of Connected Components: 9
Processed Image: G0016261.JPG Class: 5
Number of Connected Components: 12
Processed Image: G0016263.JPG Class: 5
Number of Connected Components: 21
Processed Image: G0016264.JPG Class: 5
Number of Connected Components: 18
Processed I

# Training and Testing Knn Classifier

In [None]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(training_features_HOCS,training_classes)
predicted_classes = model.predict(testing_features_HOCS)
print("HOCS Confusion Matrix")
print(confusion_matrix(test_classes, predicted_classes))
print("HOCS Accuracy Score")
print(accuracy_score(test_classes,predicted_classes))

# Training and Testing SVM Classifier

In [None]:
svm_rbf = svm.SVC(kernel='rbf', C=0.001)
svm_rbf.fit(training_features_HOCS,training_classes)
rbf_predicted_classes = svm_rbf.predict(testing_features_HOCS)
print("\n\nHOCS Confusion Matrix")
print(confusion_matrix(test_classes, rbf_predicted_classes))
print("HOCS Accuracy Score")
print(accuracy_score(test_classes,rbf_predicted_classes))

# Training and Testing GaussianNB

In [None]:
gnb = GaussianNB()
gnb.fit(training_features_HOCS,training_classes)
gnb_predicted_classes = gnb.predict(testing_features_HOCS)
print("\n\nHOCS Confusion Matrix")
print(confusion_matrix(test_classes, gnb_predicted_classes))
print("HOCS Accuracy Score")
print(accuracy_score(test_classes,gnb_predicted_classes))

In [7]:
#cropped_images,image_labels = c.cropped_bounding_boxes('Umair','IMG_4456.JPG')
#cropped_images,image_labels = c.cropped_bounding_boxes('Data/Training','G0236424.JPG')
#print(cropped_images[0])

# plt.figure()
# plt.imshow(cropped_images[1])
# plt.show()

# plt.figure()
# plt.imshow(exp.equalize_hist(cropped_images[1],nbins=256))
# plt.show()


# plt.figure()
# plt.imshow(exp.equalize_hist(filt.unsharp_mask(cropped_images[1], radius=1, amount=1),nbins=256))
# plt.show()

# plt.figure()
# plt.imshow(filt.unsharp_mask(exp.equalize_hist(cropped_images[1],nbins=256), radius=1, amount=1))
# plt.show()

# plt.figure()
# plt.imshow(filt.unsharp_mask(cropped_images[1], radius=1, amount=4))
# plt.show()

# plt.figure()
# plt.imshow(filt.unsharp_mask(cropped_images[1], radius=2, amount=4))
# plt.show()

# plt.figure()
# plt.imshow(filt.unsharp_mask(cropped_images[1], radius=4, amount=4))
# plt.show()

#cropped_images,image_labels = c.cropped_bounding_boxes('Umair/Training','IMG_4430.JPG')
# for x in range(0,len(image_labels)):
#     plt.figure()
#     plt.title("Clump Count:"+str(image_labels[x]))
#     plt.imshow(cropped_images[x])
#     plt.figure()
#     segmented_image = segment_clump(cropped_images[x])
#     predicted_count = regional_statistics(segmented_image)
#     plt.title("Actual Clump Count:"+str(image_labels[x])+" Predicted Count:"+str(predicted_count))
#     plt.imshow(segmented_image)
#     plt.figure()
#     plt.title("Actual Clump Count:"+str(image_labels[x])+" Predicted Count:"+str(predicted_count))
#     plt.imshow(seg.mark_boundaries(cropped_images[x],segmented_image,color=(1,0,0),outline_color=(1,0,0)))
    
    
# #grey_scaled = cl.rgb2gray(cropped_images[1])
# plt.hist((cropped_images[1]).ravel(),bins=257,range=(0,256))
# plt.show()
# plt.figure()
# plt.imshow(grey_scaled)
# #print(grey_scaled[:10,:])
# plt.figure()
# plt.imshow(cropped_images[0][:,:,0])