Image segmentation 

Image segmentation is the process of partitioning an image into multiple different regions (or segments). The goal is to change the representation of the image into an easier and more meaningful image.

It is an important step in image processing, as real world images doesn't always contain only one object that we wanna classify. For instance, for self driving cars, the image would contain the road, cars, pedestrians, etc. So we may need to use segmentation here to separate objects and analyze each object individually (i.e image classification) to check what it is.

K-Means clustering is unsupervised machine learning algorithm that aims to partition N observations into K clusters in which each observation belongs to the cluster with the nearest mean. A cluster refers to a collection of data points aggregated together because of certain similarities. For image segmentation, clusters here are different image colors.


Image segmentation is an essential topic in an image processing framework. It is the process to classify an image into different groups. There are many different methods, and k-means is one of the most popular methods.

In [None]:

from glob import glob
import numpy as np
import matplotlib.pyplot as plt

from skimage.io import imread
from skimage.color import rgb2grey
from sklearn.feature_extraction import image
from sklearn.cluster import KMeans

#https://medium.com/spinor/a-straightforward-introduction-to-image-thresholding-using-python-f1c085f02d5e
#https://datacarpentry.org/image-processing/07-thresholding/
#https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html
#https://campus.datacamp.com/courses/introduction-to-data-visualization-in-python/analyzing-time-series-and-images?ex=13
#https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html

from skimage.filters import rank, threshold_otsu
from skimage.morphology import closing, square, disk
from skimage import exposure as hist, data, img_as_float
from skimage.segmentation import chan_vese
from skimage.feature import canny
from skimage.color import rgb2gray
from scipy import ndimage as ndi 

In [None]:
mal_images = glob('../input/skin-cancer-malignant-vs-benign/data/train/malignant/*')
ben_images = glob('../input/skin-cancer-malignant-vs-benign/data/train/benign/*')

In [None]:
mal_images_1 = glob('../input/skin-cancer-malignant-vs-benign/data/train/malignant/*')[:5]
ben_images_1 = glob('../input/skin-cancer-malignant-vs-benign/data/train/benign/*')[:5]

In [None]:
len(mal_images)

In [None]:
def binary(image):
    return image > threshold_otsu(image)

def equalize(image):
    return hist.equalize_hist(image)

#https://homepages.inf.ed.ac.uk/rbf/HIPR2/median.htm
def mean_filter(image, raio_disk):
    return rank.mean_percentile(image, selem = disk(raio_disk))

def preenche_bords(image):
    return ndi.binary_fill_holes(image)

#https://www.unioviedo.es/compnum/labs/PYTHON/intro_image.html

def load_images(paths):
    tmp = []
    for path in paths:
        tmp.append(imread(path))
    return tmp
    
def plot_any(arr, title = ''):
    plt.figure(figsize = (15, 25))
    for i in range(len(arr)):
        plt.subplot(1,len(arr),i + 1)
        plt.title(title)
        plt.imshow(arr[i]);

        
def plot_camadas(img):
    plt.figure(figsize = (15, 25))
    for i in range(3):
        plt.subplot(1, 3, i + 1)
        plt.imshow(img[:,:,i], cmap = 'gray');
        
def d2Kmeans(img, k):
    return KMeans(n_jobs=-1, 
                  random_state=1, 
                  n_clusters = k, 
                  init='k-means++'
    ).fit(img.reshape((-1,1))).labels_.reshape(img.shape)

def merge_segmented_mask_ROI(uri_img, img_kluster):
    new_img = uri_img.copy()
    for ch in range(3):
        new_img[:,:, ch] *= img_kluster
    return new_img


def elbow(img, k):
    hist = []
    for kclusters in  range(1, k):
        Km = KMeans(n_jobs=-1, random_state=1, n_clusters = kclusters, init='k-means++').fit(img.reshape((-1,1)))  
        hist.append(Km.inertia_)
        
    plt.figure(figsize = (15, 8))
    plt.grid()
    plt.plot(range(1, k), hist, 'o-')
    plt.ylabel('Soma das distâncias quadradas')
    plt.xlabel('k clusters')
    plt.title('Elbow')
    plt.show();
    
    

In [None]:
mal = load_images(mal_images)
ben = load_images(ben_images)

mal_1 = load_images(mal_images_1)
ben_1 = load_images(ben_images_1)

In [None]:
plot_any(mal_1)
plot_any(ben_1)


In [None]:
img_selected = mal[1]

In [None]:
elbow(img_selected, 6)

In [None]:
k_klusters = 2

In [None]:
result_gray = d2Kmeans(rgb2grey(img_selected), k_klusters)
result_img = d2Kmeans(img_selected, k_klusters)

In [None]:
klusters_gray = [result_gray == i for i in range(k_klusters)]
plot_any(klusters_gray)

In [None]:
def select_cluster_index(clusters):
    minx = clusters[0].mean()
    index = 0
    for i in clusters:
        if i.mean() < minx:
            minx = i.mean()
            index += 1
    return index

In [None]:
index_kluster = select_cluster_index(klusters_gray)
print(index_kluster)
selecionado = klusters_gray[index_kluster]

In [None]:
 for ch in range(3):
    img_k = []
    for K in range(k_klusters):
         img_k.append(result_img[:, :, ch] == K)
    plot_any(img_k)

In [None]:
clusters = [(result_img[:,:,1] == K) for K in range(k_klusters)]

In [None]:
clusters

In [None]:
new_img = merge_segmented_mask_ROI(img_selected, selecionado)

In [None]:
plot_any([new_img])

In [None]:
image_mean_filter = mean_filter(selecionado, 20)
test_binary = binary(image_mean_filter)

In [None]:
plot_any([selecionado, image_mean_filter, test_binary])

In [None]:
final_result = merge_segmented_mask_ROI(img_selected ,test_binary)

In [None]:
final_result.shape

In [None]:
plot_any([test_binary, new_img, final_result])

In [None]:
 max_mean = 0
img_gray = rgb2gray(final_result)
 img_bin  = binary(img_gray)
x, y = img_bin.shape

 limits_before = []
for i in range(x):
    for j in range(y):
        if  img_bin[i, j]:
            limits_before.append(j)
            
stop_before = ( sum(limits_before) // len(limits_before) ) // 2
img_copy = img_bin.copy()
for i in range(x):
    for j in range(stop_before):
        img_copy[i, j] = 0
        limits_after = []
for i in range(x):
     for j in range(y - 1, 0, -1):
        if  img_copy[i, j]:
            limits_after.append(j)
            
stop_after = sum(limits_after) // len(limits_after) + min(limits_after)
for i in range(x):
    for j in range(stop_after, y):
        img_copy[i, j] = 0

mean_result = mean_filter(img_copy, 15)
mean_result = binary(mean_result)
final_result = merge_segmented_mask_ROI(img_selected , mean_result)


 plot_any([mean_result, final_result])

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import cv2
img = cv2.imread(mal_images[1]) 
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
r, g, b = cv2.split(img)
r = r.flatten()
g = g.flatten()
b = b.flatten()
#plotting 
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(r, g, b)
plt.show()

Starting the classification with segemented images

In [None]:
data_mal = list() 
for img in mal :
    img = merge_segmented_mask_ROI(img ,test_binary)
    data_mal.append(img) 


In [None]:
data_ben = list() 
for img in ben :
    img = merge_segmented_mask_ROI(img ,test_binary)
    data_ben.append(img)

In [90]:
len(data_ben)
len(data_mal)


1197

In [100]:
import cv2
import os
import io
import skimage
# create a directory in which to store cropped images
out_dir = "‪C:/Users/121/Pictures/skin_segmented"
if not os.path.exists(out_dir):
  os.makedirs(out_dir)

# save each cropped image by its index number
for c,image in enumerate(data_ben):
  skimage.io.imsave( out_dir + str(c) + ".jpg", image)

In [None]:
"""for img in data_ben :
    
    img = img.reshape((-1, 1, 28, 28))
    
for img in data_mal :
    
    img = img.reshape((-1, 1, 28, 28))"""

In [101]:
#plot_any(data_ben[2])
#plot_any(data_mal[3])



In [None]:
#Reading Image

import pandas as pd
data =[]
for img in  data_ben :
    data.append((img,0)) 
    
for img in data_mal : 
    data.append((img,1)) 

    
data = pd.DataFrame(data, columns = ['image','label'] , index = None)
data['image'][0].shape





In [None]:
data.shape

In [None]:
data

In [None]:
print(data['image'].shape)
print(data['image'].ndim)



#data['image'][10].reshape([28, 28])

In [None]:
 
    
data['image'] = np.empty_like(data['image'])
    
maxlat = 224
maxlon = 224
print(maxlat, maxlon)
for  k in data['image'] : 
    
    for i in range(maxlat) :
        
        for j in range(maxlon):
            data['image'][k][i][j] = temp[i][j]

In [None]:
from sklearn.model_selection import train_test_split
X = data['image'] 
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel='linear',gamma='auto')
svc.fit(X_train, y_train)