<a href="https://colab.research.google.com/github/AnhVietPham/Machine-Learning/blob/main/image-segmentation-RForest/Image_Segmentation_RForest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import cv2
import pandas as pd
import pickle
from matplotlib import pyplot as plt
from scipy import ndimage as nd
from skimage.filters import roberts, sobel, scharr, prewitt
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
image_dataset = pd.DataFrame()
image_path = "/content/drive/MyDrive/Luận Văn Thạc Sĩ/datasets/Sandstone-Versa/train_images/"
for image in os.listdir(image_path):
    print(image)
    df = pd.DataFrame()
    input_img = cv2.imread(image_path + image)
    if input_img.ndim == 3 and input_img.shape[-1] == 3:
        img = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
    elif input_img.ndim == 2:
        img = input_img
    else:
        raise Exception("The module works only with grayscale and RGB image!")

    # Add pixel values to the data frame
    pixel_values = img.reshape(-1)
    df['Pixel_value'] = pixel_values
    df['Image_Name'] = image

    #################################
    # Generate Gabor features
    num = 1
    kernels = []
    for theta in range(2):
        theta = theta / 4. * np.pi
        for sigma in (1, 3):
            for lamda in np.arange(0, np.pi, np.pi / 4):
                for gamma in (0.05, 0.5):
                    gabor_label = 'Gabor' + str(num)
                    ksize = 9
                    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                    kernels.append(kernel)
                    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                    filtered_img = fimg.reshape(-1)
                    df[gabor_label] = filtered_img
                    print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                    num += 1

    # CANNY EDGE
    edges = cv2.Canny(img, 100, 200)
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1

    # Roberts edge
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # Sobel
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # Scharr
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # Prewitt
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # Gaussian with sigma = 3
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # Gaussian with sigma = 7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # Median with sigma = 3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # Variance with size = 3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1

    image_dataset = image_dataset.append(df)

mask_dataset = pd.DataFrame()
mask_path = "/content/drive/MyDrive/Luận Văn Thạc Sĩ/datasets/Sandstone-Versa/train_masks/"
for mask in os.listdir(mask_path):
    print(mask)
    df2 = pd.DataFrame()
    input_mask = cv2.imread(mask_path + mask)

    if input_mask.ndim == 3 and input_mask.shape[-1] == 3:
        label = cv2.cvtColor(input_mask, cv2.COLOR_BGR2GRAY)
    elif input_mask.ndim == 2:
        label = input_mask
    else:
        raise Exception("The module works only with grayscale and RGB image!")

    label_values = label.reshape(-1)
    df2['Label_value'] = label_values
    df2['Mask_Name'] = mask
    mask_dataset = mask_dataset.append(df2)

dataset = pd.concat([image_dataset, mask_dataset],axis=1)

Sandstone_Versa0000.tif
Gabor1 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.05
Gabor2 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.5
Gabor3 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor4 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor5 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor6 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor7 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.05
Gabor8 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.5
Gabor9 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.05
Gabor10 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.5
Gabor11 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor12 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor13 : theta= 0.0 : sigma= 3 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor14 : theta= 0.0 : sigma= 3 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor15 : theta= 0

In [None]:
dataset

Unnamed: 0,Pixel_value,Image_Name,Gabor1,Gabor2,Gabor3,Gabor4,Gabor5,Gabor6,Gabor7,Gabor8,...,Roberts,Sobel,Scharr,Prewitt,Gaussian s3,Gaussian s7,Median s3,Variance s3,Label_value,Mask_Name
0,0,Sandstone_Versa0000.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0000.tif
1,0,Sandstone_Versa0000.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0000.tif
2,0,Sandstone_Versa0000.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0000.tif
3,0,Sandstone_Versa0000.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0000.tif
4,0,Sandstone_Versa0000.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0000.tif
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1019899,0,Sandstone_Versa0400.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0400.tif
1019900,0,Sandstone_Versa0400.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0400.tif
1019901,0,Sandstone_Versa0400.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0400.tif
1019902,0,Sandstone_Versa0400.tif,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,29,Sandstone_Versa0400.tif


In [None]:
dataset.to_csv("/content/drive/MyDrive/Luận Văn Thạc Sĩ/datasets/Sandstone-Versa/features.csv")

In [None]:
dataset = dataset[dataset.Label_value != 0]

In [None]:
X = dataset.drop(labels = ["Image_Name","Mask_Name","Label_value"], axis=1)

In [None]:
Y = dataset["Label_value"].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 20)

In [None]:
model = RandomForestClassifier(n_estimators=50, random_state=42)

In [None]:
model.fit(X_train, y_train)

RandomForestClassifier(n_estimators=50, random_state=42)

In [None]:
from sklearn import metrics

In [None]:
predict_test = model.predict(X_test)
print("Accuracy = ", metrics.accuracy_score(y_test, predict_test))

Accuracy =  0.9853995036572054


In [None]:
model_name = "/content/drive/MyDrive/Luận Văn Thạc Sĩ/datasets/Sandstone-Versa/sandstone_model"
pickle.dump(model, open(model_name, 'wb'))

In [None]:
filename = "/content/drive/MyDrive/Luận Văn Thạc Sĩ/datasets/Sandstone-Versa/sandstone_model"
load_model = pickle.load(open(filename, 'rb'))

In [None]:
def feature_extraction(img):
    df = pd.DataFrame()
    img2 = img.reshape(-1)
    df['Original Image'] = img2

    # Gabor features
    num = 1
    kernels = []
    for theta in range(2):
        theta = theta / 4. * np.pi
        for sigma in (1, 3):
            for lamda in np.arange(0, np.pi, np.pi / 4):
                for gamma in (0.05, 0.5):
                    gabor_label = 'Gabor' + str(num)
                    ksize = 9
                    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                    kernels.append(kernel)
                    fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                    filtered_img = fimg.reshape(-1)
                    df[gabor_label] = filtered_img
                    print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                    num += 1

    # CANNY EDGE
    edges = cv2.Canny(img, 100, 200)
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1

    # Roberts edge
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # Sobel
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # Scharr
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # Prewitt
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # Gaussian with sigma = 3
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # Gaussian with sigma = 7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # Median with sigma = 3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # Variance with size = 3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1
    return df

In [None]:
path = "/content/drive/MyDrive/Luận Văn Thạc Sĩ/datasets/Sandstone-Versa/train_images/*.tif"

In [None]:
import glob
from matplotlib import pyplot as plt

In [None]:
for file in glob.glob(path):
    img_read = cv2.imread(file)
    img = cv2.cvtColor(img_read, cv2.COLOR_BGR2GRAY)

    X = feature_extraction(img)
    result = load_model.predict(X)
    segmented = result.reshape((img.shape))
    name = file.split("e_")
    plt.imsave('/content/drive/MyDrive/Luận Văn Thạc Sĩ/datasets/Sandstone-Versa/Segmented/' + name[1], segmented, cmap='jet')
