___
# Pertemuan 11 (Feature Extraction)

- Statistical Feature 
- Gray Level Co-occurance Matrix (GLCM)

In [None]:
# import library 

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

___
### 0. Final Class Preprocessing

In [None]:
class Preprocessing : 
    def __init__(self, DATASET_FOLDER = "Dataset_Tomat/"):
        self.labels = []
        self.image_list = []
        self.image_range = []
        self.image_edged = []
        self.contours_list = []
        self.filtered_contours_list = []
        self.image_croped = []
        self.image_resized = []
        self.DATASET_FOLDER = DATASET_FOLDER
        
        # define range of red color in HSV
        self.lower_red = np.array([-10, 75, 50])
        self.upper_red = np.array([10, 255, 255])

        # define range of green color in HSV
        self.lower_green = np.array([35, 100, 50])
        self.upper_green = np.array([70, 255, 255])
        
        # define range of yellow color in HSV
        self.lower_yellow = np.array([10, 125, 50])
        self.upper_yellow = np.array([35, 255, 255])
        
        # define range of black color in HSV
        self.lower_black = np.array([0, 0, 0])
        self.upper_black = np.array([255, 255, 50])
        
    def ImageRead(self):
        for folder in os.listdir(self.DATASET_FOLDER):
            for file in os.listdir(self.DATASET_FOLDER + folder):
                img = cv2.imread(self.DATASET_FOLDER + folder + "/" + file)
                self.image_list.append(img)
                self.labels.append(folder) # append label (name) of image
                            
    def RangeTresholding(self):
        for img in self.image_list :          
            # convert to hsv
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

            # apply range thresholding
            mask_green = cv2.inRange(hsv.copy(), self.lower_green, self.upper_green)
            mask_red = cv2.inRange(hsv.copy(), self.lower_red, self.upper_red)
            mask_yellow = cv2.inRange(hsv.copy(), self.lower_yellow, self.upper_yellow)
            mask_black = cv2.inRange(hsv.copy(), self.lower_black, self.upper_black)

            mask = mask_green + mask_red + mask_yellow + mask_black 
            res = cv2.bitwise_and(img, img, mask= mask)
            self.image_range.append(res)
            
    def EdgeDetection(self):
        for img in self.image_range :
            edged = cv2.Canny(img, 200, 210)
            self.image_edged.append(edged)
            
    def FindContour(self):
        for img in self.image_edged:
            # find contour
            contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            self.contours_list.append(contours)
        
    def FilterContour(self, min_area=50, min_w=10, min_h=10):
        for contours in self.contours_list:
            filtered_contours = []
            for cnt in contours:
                x, y, w, h = cv2.boundingRect(cnt)
                area = w*h
                if not (area < min_area or w < min_w or h < min_h) :
                    filtered_contours.append(cnt)
            self.filtered_contours_list.append(filtered_contours)

    def CropByContour(self):
        for i in range(len(self.image_range)): # crop all removed background image by contour 
            img = self.image_range[i]
            cnt = np.concatenate(self.filtered_contours_list[i], axis=0) # concate all remaining contour each image
            x, y, w, h = cv2.boundingRect(cnt)
            roi = img[y:y+h, x:x+w]
            self.image_croped.append(roi)
                
    def Resize(self, size=(172,172)):
        for img in self.image_croped:
            resized = cv2.resize(img, (size[0], size[1]))
            self.image_resized.append(resized)
            
    def SaveAllImage(self, RESIZED_FOLDER = "resized_tomato/"):
        if not os.path.exists(RESIZED_FOLDER) :
            os.mkdir(RESIZED_FOLDER)
            
        for i in range(len(self.image_resized)):

            # get image
            img = self.image_resized[i]

            # check if folder exist. if not, create that folder    
            folder_path = RESIZED_FOLDER + self.labels[i] + "/"
            if not os.path.exists(folder_path) :
                os.mkdir(folder_path)

            # save image
            file_name = self.labels[i] + "_%03d.jpg" % i
            file_path = RESIZED_FOLDER + self.labels[i] + "/" + file_name

            cv2.imwrite(file_path, img)

In [None]:
Prepro = Preprocessing()

In [None]:
Prepro.ImageRead()
Prepro.RangeTresholding()
Prepro.EdgeDetection()
Prepro.FindContour()
Prepro.FilterContour()

In [None]:
# show all Range Thresholded Image

rows = 2
cols = 5
plt.figure(figsize=(20,7))
for i in range(len(Prepro.image_range)):
    plt.subplot(rows, cols, i+1)
    plt.title(Prepro.labels[i])
    plt.imshow(Prepro.image_range[i][:,:,::-1])

In [None]:
# show all edged Image

rows = 2
cols = 5
plt.figure(figsize=(20,7))
for i in range(len(Prepro.image_edged)):
    plt.subplot(rows, cols, i+1)
    plt.title(Prepro.labels[i])
    plt.imshow(Prepro.image_edged[i], cmap="gray")

In [None]:
Prepro.CropByContour()
Prepro.Resize()

In [None]:
# show all Resized Image

rows = 2
cols = 5
plt.figure(figsize=(20,7))
for i in range(len(Prepro.image_resized)):
    plt.subplot(rows, cols, i+1)
    plt.title(Prepro.labels[i])
    plt.imshow(Prepro.image_resized[i][:,:,::-1])

In [None]:
Prepro.SaveAllImage()

___

## Install library scipy & scikit-image
`conda install scipy`\
`conda install scikit-image`

___
### 1. Statistical Feature


#### 1.1 Color Mean
- Color Mean are extracted from each color channel.<br>
<img src="resource/Color Mean.png" ></img><br>
<img src="resource/mean.gif" ></img>

In [None]:
img = cv2.imread("Tomat.jpg")

In [None]:
# blue channel
img_blue = img[:,:,0]

# green channel
img_green = img[:,:,1]

# red channel
img_red = img[:,:,2]

In [None]:
plt.figure(figsize=(20,6))
plt.subplot(1,3,1)
plt.title("Blue channel")
plt.imshow(img_blue, cmap="gray")

plt.subplot(1,3,2)
plt.title("Green channel")
plt.imshow(img_green, cmap="gray")

plt.subplot(1,3,3)
plt.title("Red channel")
plt.imshow(img_red, cmap="gray")

In [None]:
# calculate color mean

color_mean_blue = img_blue.mean()
color_mean_green = img_green.mean()
color_mean_red = img_red.mean()


print("Color Mean -> B: %.2f, G: %.2f, R: %.2f" % (color_mean_blue, color_mean_green, color_mean_red))

### 1.2 Standard Deviation
- Standard Deviation are extracted from each color channel. \
<img src="resource/Standard Deviation.png" > </img><br>
<img src="resource/std.png" ></img>

In [None]:
# calculate standard deviation

std_blue = img_blue.std()
std_green = img_green.std()
std_red = img_red.std()


print("Color Standard Deviation -> B: %.2f, G: %.2f, R: %.2f" % (std_blue, std_green, std_red))

### 1.3 Skewness
- Skewness are extracted from each color channel. <br>
<img src="resource/Skewness.png" > </img><br>
<img src="resource/skew.png" > </img>

In [None]:
from scipy.stats import skew

# calculate skewness

skew_blue = skew(img_blue.reshape(-1))
skew_green = skew(img_green.reshape(-1))
skew_red = skew(img_red.reshape(-1))

print("Color Skewness -> B: %.2f, G: %.2f, R: %.2f" % (skew_blue, skew_green, skew_red))

- **note** : `.reshape(-1)` akan memuad 2D matrix menjadi 1D matrix

### 1.4 Calculate Mean, STD & Skewness from Background Removed Image (Range Tresholding)

In [None]:
# define range of red color in HSV
lower_red = np.array([-10, 50, 50])
upper_red = np.array([10, 255, 255])

# define range of green color in HSV
lower_green = np.array([30, 25, 25])
upper_green = np.array([70, 255, 255])

# define range of black color in HSV
lower_black = np.array([0, 0, 0])
upper_black = np.array([255, 255, 50])

In [None]:
# load image
img = cv2.imread('Tomat.jpg')

# convert to hsv
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# apply range thresholding
mask_green = cv2.inRange(hsv.copy(), lower_green, upper_green)
mask_red = cv2.inRange(hsv.copy(), lower_red, upper_red)
mask_black = cv2.inRange(hsv.copy(), lower_black, upper_black)

mask = mask_green + mask_red + mask_black 
res = cv2.bitwise_and(img, img, mask= mask)

In [None]:
plt.imshow(res[:,:,::-1])

In [None]:
# blue channel from background removed image
img_blue = res[:,:,0]

# green channel from background removed image
img_green = res[:,:,1]

# red channel from background removed image
img_red = res[:,:,2]

In [None]:
# calculate color mean from background removed image

color_mean_blue = img_blue.mean()
color_mean_green = img_green.mean()
color_mean_red = img_red.mean()


print("Color Mean -> B: %.2f, G: %.2f, R: %.2f" % (color_mean_blue, color_mean_green, color_mean_red))

In [None]:
# calculate standard deviation from background removed image

std_blue = img_blue.std()
std_green = img_green.std()
std_red = img_red.std()


print("Color Standard Deviation -> B: %.2f, G: %.2f, R: %.2f" % (std_blue, std_green, std_red))

In [None]:
# calculate skewness from background removed image

skew_blue = skew(img_blue.reshape(-1))
skew_green = skew(img_green.reshape(-1))
skew_red = skew(img_red.reshape(-1))

print("Color Skewness -> B: %.2f, G: %.2f, R: %.2f" % (skew_blue, skew_green, skew_red))

### 1.5 Create class **FeatureExtraction** to calculate Statistical Feature for All Image
- Statistical Feature saved as list of dictionary data 
```
[
    {
    'b' : {
            'mean' : xxx,
            'std' : xxx,
            'skewness` : xxx
        },
    'g' : {
            'mean' : xxx,
            'std' : xxx,
            'skewness` : xxx
        },
    'r' : {
            'mean' : xxx,
            'std' : xxx,
            'skewness` : xxx
        }
    },
    .
    .
    .
]
```

In [None]:
class FeatureExtraction : 
    def __init__(self, PREPROCESSED_DATASET_FOLDER = "resized_tomato/"):
        self.labels = []
        self.image_list = []
        self.statistical_features = []
        self.color_ch = ['b', 'g', 'r']
        self.PREPROCESSED_DATASET_FOLDER = PREPROCESSED_DATASET_FOLDER
        
    def ImageRead(self):
        for folder in os.listdir(self.PREPROCESSED_DATASET_FOLDER):
            for file in os.listdir(self.PREPROCESSED_DATASET_FOLDER + folder):
                img = cv2.imread(self.PREPROCESSED_DATASET_FOLDER + folder + "/" + file)
                self.image_list.append(img)
                self.labels.append(folder) # append label (name) of image
                
    def CalcStatisticalFeature(self):
        for img  in self.image_list:
            feature_ch = {}
            for i in range(len(color_ch)):
                feature_ch[color_ch[i]] = {
                    'mean' : res[:,:,i].mean(),
                    'std' : res[:,:,i].std(),
                    'skewness' : skew(res[:,:,i].reshape(-1))
                }
            
            
            self.statistical_features.append(feature_ch)

In [None]:
Feature = FeatureExtraction()

In [None]:
Feature.ImageRead()
Feature.CalcStatisticalFeature()

In [None]:
Feature.statistical_features[0]

In [None]:
Feature.statistical_features[0]['b']

In [None]:
Feature.statistical_features[0]['b']['mean']

In [None]:
Feature.statistical_features[0]['b']['skewness']

___
# 2. Gray Level Co-occurance Matrix

Original Source : [Feature Extraction : Gray Level Co-occurrence Matrix (GLCM)](https://yunusmuhammad007.medium.com/feature-extraction-gray-level-co-occurrence-matrix-glcm-10c45b6d46a1)

- Ilustrasi tekstur pada citra dengan distribusi 50% hitam dan 50% putih. <br>
<img src="resource/texture.png"></img><br>
- Kalkulasi statistik seperti *mean, median, maupun standar deviasi* tidak akan mampu membedakan ke-3 gambar diatas.
- Gray-Level Co-occurrence matrix (GLCM) merupakan teknik analisis **tekstur** pada citra. 
- GLCM merepresentasikan **hubungan antara 2 pixel** yang bertetanggaan (neighboring pixels) yang memiliki **intensitas keabuan** (grayscale intensity), **jarak** dan **sudut**. 
- Terdapat 8 sudut yang dapat digunakan pada GLCM, diantaranya sudut 0°, 45°, 90°, 135°, 180°, 225°, 270°, atau 315°.<br>
<img src="resource/glcm_angel.png"></img><br>

#### Langkah Pembuatan GLCM Matrix
- Pembuatan *framework matrix*
- Pembuatan *co-occurrence matrix* (mengisi framework matrix)
- Pembuatan *symmetric matrix* (penjumlahan co-occurrence matrix dengan transpose matrix)
- *Matrix normalization* yang akan menghasilkan nilai matrix antara 0–1

**1. Pembuatan framework matrix**
- Misalkan kita memiliki **grayscale matrix** dengan ukuran 3x3, dengan **gray tone 0–3** seperti berikut,<br>
<img src="resource/gray_matrix.png"></img><br>
- Define **framework matrix** bernilai 0 dengan dimensi 4x4, ukuran ini didapatkan dari **quantization level** matrix diatas,<br>
`Quantization level = count(gray tone)`
- Sehingga untuk gray tone 0–3 kita mendapatkan **quantization level = 4**, dengan seperti ini kita harusmembuat framework matrix dengan size 4x4,<br>
<img src="resource/framework_matrix.png"></img><br>
- Tiap posisi pada *framework matrix* merupakan kombinasi nilai pixel pada matrix input 3x3,<br>
<img src="resource/framework_matrix_pad.png"></img><br>
- Pada **citra digital 8-bit** akan memiliki **quantization level 256**, mengingat gray tone-nya antara 0-255.

**2. Pembuatan co-occurrence matrix (mengisi framework matrix)**
- Selanjutnya kita akan menggunakan **distance =1** dan **angel =0°** , untuk membentuk *co-occurrence matrix*,<br>
<img src="resource/gif_glcm.gif"></img><br>

**3. Pembuatan symmetric matrix (penjumlahan co-occurrence matrix dengan transpose matrix)**
- Setelah itu buatlah *symmetric matrix* dengan **menjumlahkan** GLCM matrix dengan hasil **transpose-nya**,<br>
<img src="resource/sym_matrix.png"></img><br>
- Hasil penjumlahan GLCM matrix dan hasil transpose-nya,<br>
<img src="resource/re_sym_matrix.png"></img><br>

**4. Matrix normalization yang akan menghasilkan nilai matrix antara 0–1**
- lakukan *matrix normalization* pada *symmetric matrix* dengan formula,<br>
<img src="resource/norm_matrix.png"></img><br>
- Sehingga dihasilkan matrix hasil normalisasinya sebagai berikut,<br>
<img src="resource/norm_matrix_2.png"></img><br>

### 2.1 GLCM Texture Feature
- GLCM texture feature :
    - contrast,
    - correlation, 
    - energy,
    - homogeneity
- The four texture features are extracted from **each color channel** using gray level co-occurrence matrix (GLCM) of the image.<br>
<img src="resource/GLCM_metric.png" style="width:400px"></img><br>

### 2.2 GLCM implementation using Scikit-Image
- Scikit-Image memiliki library untuk perhitungan GLCM pada `skimage.feature` yaitu module `greycomatrix` untuk mendapatkan GLCM matrix, dan `greycoprops` untuk menghitung texture feature pada GLCM,

In [None]:
from skimage.feature import greycomatrix, greycoprops

- simple GLCM calculation for 3x3 grayscale image with gray tone 0-3

In [None]:
input_matrix = np.array([[0, 0, 1],
                          [1, 2, 3],
                          [2, 3, 2]])

glcm = greycomatrix(input_matrix, 
                    distances=[1],  # distance 1 pixel
                    angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],  # angel 0, 45, 90, 135 degre
                    levels=4, # number of grey-levels counted
                    symmetric=True, 
                    normed=True)

In [None]:
# glcm matrix for angel 0 degre

print(glcm[:, :, 0, 0])

In [None]:
# glcm matrix for angel 45 degre

print(glcm[:, :, 0, 1])

In [None]:
# glcm matrix for angel 90 degre

print(glcm[:, :, 0, 2])

In [None]:
# glcm matrix for angel 135 degre

print(glcm[:, :, 0, 3])

- Calculate GLCM feature from image

In [None]:
img = cv2.imread("Tomat.jpg")

# grab blue channel
blue = img[:,:,0]

# calculate GLCM 
glcm_img = greycomatrix(blue, 
                    distances=[1],  # distance 1 pixel
                    angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],  # angel 0, 45, 90, 135 degre
                    levels=256, # number of grey-levels counted in 8 bit grayscale image
                    symmetric=True, 
                    normed=True)

In [None]:
# glcm matrix for angel 0 degre

print(glcm_img[:, :, 0, 0])

In [None]:
glcm_img[:, :, 0, 0].shape

### 2.3 GLCM Texture Feature using Scikit-Image

In [None]:
texture_feature_labels = ['correlation', 'homogeneity', 'contrast', 'energy']
texture_feature_outputs = []
for feature in texture_feature_labels:
    out = greycoprops(glcm_img, feature)[0]
    texture_feature_outputs.append(out)

In [None]:
texture_feature_outputs

In [None]:
# GLCM texture correlation feature for angel 0, 45, 90, 135 degre
texture_feature_outputs[0]

In [None]:
# GLCM texture homogeneity feature for angel 0, 45, 90, 135 degre
texture_feature_outputs[1]

### 2.4 Calculate GLCM Matrix Feature for All Image

In [None]:
# load croped with background removed image
image_list = []
labels = []
PREPROCESSED_DATASET_FOLDER = "resized_tomato/"

for folder in os.listdir(PREPROCESSED_DATASET_FOLDER):
    for file in os.listdir(PREPROCESSED_DATASET_FOLDER + folder):
        img = cv2.imread(PREPROCESSED_DATASET_FOLDER + folder + "/" + file)
        image_list.append(img)
        labels.append(folder) # append label (name) of image

- GLCM Matrix saved as list of dictionary data 
```
[
    {
        'b' : <256x256 glcm matrix>,
        'g' : <256x256 glcm matrix>,
        'r` : <256x256 glcm matrix>
    },
    {
        'b' : <256x256 glcm matrix>,
        'g' : <256x256 glcm matrix>,
        'r` : <256x256 glcm matrix>
    },
    .
    .
    .
]
```

In [None]:
glcm_matrix_list = []
color_ch = ['b', 'g', 'r']

for img in image_list:
    matrix_ch = {}
    for i in range(len(color_ch)):
        # grab r, g, b channel
        img_ch = img[:,:,i]
        
        # calculate GLCM 
        glcm_img = greycomatrix(img_ch, 
                            distances=[1],  # distance 1 pixel
                            angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],  # angel 0, 45, 90, 135 degre
                            levels=256, # number of grey-levels counted in 8 bit grayscale image
                            symmetric=True, 
                            normed=True)
        
        matrix_ch[color_ch[i]] = glcm_img
        
    glcm_matrix_list.append(matrix_ch)

In [None]:
glcm_matrix_list[0]

In [None]:
glcm_matrix_list[0]['r']

### 2.5 Calculate GLCM Texture Feature for All GLCM Matrix

- GLCM Texture Feature saved as list of dictionary data 
```
[
    {
    'b' : {
            'correlation' : <feature output in angel 0, 45, 90, 135 degre>,
            'homogeneity' : <feature output in angel 0, 45, 90, 135 degre>,
            'contrast` : <feature output in angel 0, 45, 90, 135 degre>,
            'energy' : <feature output in angel 0, 45, 90, 135 degre>
        },
    'g' : {
            'correlation' : <feature output in angel 0, 45, 90, 135 degre>,
            'homogeneity' : <feature output in angel 0, 45, 90, 135 degre>,
            'contrast` : <feature output in angel 0, 45, 90, 135 degre>,
            'energy' : <feature output in angel 0, 45, 90, 135 degre>
        },
    'r' : {
            'correlation' : <feature output in angel 0, 45, 90, 135 degre>,
            'homogeneity' : <feature output in angel 0, 45, 90, 135 degre>,
            'contrast` : <feature output in angel 0, 45, 90, 135 degre>,
            'energy' : <feature output in angel 0, 45, 90, 135 degre>
        }
    },
    .
    .
    .
]
```

In [None]:
# proceed all GLCM matrix feature to produce GLCM Texture Feature
color_ch = ['b', 'g', 'r']
glcm_feature_list = []
texture_feature_labels = ['correlation', 'homogeneity', 'contrast', 'energy']

for glcm_matrix in glcm_matrix_list:
    feature_ch = {}
    for ch in color_ch:
        feature_item = {}
        for feature in texture_feature_labels:
            out = greycoprops(glcm_matrix[ch], feature)[0]
            feature_item[feature] = out
        feature_ch[ch] = feature_item
    glcm_feature_list.append(feature_ch)

In [None]:
# result of all GLCM feature for single image

glcm_feature_list[0]

In [None]:
# result of all GLCM feature for single channel in one image

glcm_feature_list[0]['b']

In [None]:
# result of GLCM correlation feature for single channel in one image

glcm_feature_list[0]['b']['correlation']

In [None]:
# result of GLCM correlation feature for single channel in one image for specific angel (e.g 0 degre)

glcm_feature_list[0]['b']['correlation'][1] # correlation feature in 45 degree angel

### Task
- Tambahkan method **CalcGLCMMatrix** ke class FeatureExtraction

In [None]:
class FeatureExtraction : 
    def __init__(self, PREPROCESSED_DATASET_FOLDER = "resized_tomato/"):
        self.labels = []
        self.image_list = []
        self.statistical_features = []
        self.glcm_matrix_list = []
        self.color_ch = ['b', 'g', 'r']
        self.PREPROCESSED_DATASET_FOLDER = PREPROCESSED_DATASET_FOLDER
        
    def ImageRead(self):
        for folder in os.listdir(self.PREPROCESSED_DATASET_FOLDER):
            for file in os.listdir(self.PREPROCESSED_DATASET_FOLDER + folder):
                img = cv2.imread(self.PREPROCESSED_DATASET_FOLDER + folder + "/" + file)
                self.image_list.append(img)
                self.labels.append(folder) # append label (name) of image
                
    def CalcStatisticalFeature(self):
        for img  in self.image_list:
            # r, g, b channel from background removed image
            img_blue = res[:,:,0]
            img_green = res[:,:,1]
            img_red = res[:,:,2]

            feature = {}
            
            # calculate color mean from background removed image
            feature['mean'] = {
                'b' : img_blue.mean(),
                'g' : img_green.mean(),
                'r' : img_red.mean()
            }
            
            # calculate standard deviation from background removed image
            feature['std'] = {
                'b' : img_blue.std(),
                'g' : img_green.std(),
                'r' : img_red.std()
            }
            
            # calculate skewness from background removed image
            feature['skewness'] = {
                'b' : skew(img_blue.reshape(-1)),
                'g' : skew(img_green.reshape(-1)),
                'r' : skew(img_red.reshape(-1))
            }
            
            self.statistical_features.append(feature)
            
    def CalcGLCMMatrix(self):
        for img in self.image_list:
            #
            # Lengkapi bagian ini agar dapat menghitung GLCM Matrix
            # 
            #

            self.glcm_matrix_list.append(matrix_ch)

In [None]:
Feature = FeatureExtraction()

In [None]:
Feature.ImageRead()
Feature.CalcStatisticalFeature()

In [None]:
Feature.CalcGLCMMatrix()

In [None]:
Feature.glcm_matrix_list[0]['g'].shape

### Home Work
- Tambahkan Method **CalcGLCMTextureFeature** to class FeatureExtraction