In [3]:
import cv2
from PIL import Image
import numpy as np 
from matplotlib import pyplot as plt
import os
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
def slice_roi(img):
    # convert 0 to 1 , 1 to 0 to find border
    img2 = np.where(img==0, 255, 0)
    
    #axis=0 refer to column, axis=1 refer to row
    
    #find column with value >0 to find left/right border
    img2_col = np.sum(img2, axis = 0)
    col_with_value = np.where(img2_col > 0)[0]
    
    #find row with value >0 to find top/bottom border
    img2_row = np.sum(img2, axis = 1)
    row_with_value = np.where(img2_row > 0)[0]
    
    #get left/right border
    start_col = min(col_with_value)
    end_col = max(col_with_value)
    
    #get top/bottom border
    start_row = min(row_with_value)
    end_row = max(row_with_value)
    
    #get ROI width & height
    roi_width = end_col - start_col
    roi_height = end_row - start_row
    
    #get ROI area
    img = img[start_row:end_row, start_col:end_col]
    
    if roi_width > roi_height:
        diff = roi_width - roi_height
        extra_pad = int(diff/2)
        img = np.pad(img, ((extra_pad, extra_pad), (0,0)), 'maximum')
    else:
        diff = roi_height - roi_width
        extra_pad = int(diff/2)
        img = np.pad(img, ((0,0), (extra_pad, extra_pad)), 'maximum')
        
    return img

In [5]:
def split_rename(imgString, separator = '_', indexSplit = 2):
    sp = imgString.split(separator)
    return sp[indexSplit]

In [6]:
def loadImage(folder, imgType = ['.jpeg', '.jpg'], IMGSIZE = 64, scale=1.0, binarization=False):
    images = []
    label = []
    # traverse root directory, and list directories as dirs and files as files
    for root, dirs, files in os.walk(folder):
        for filename in os.listdir(root):
            if any([filename.endswith(x) for x in imgType]):
                
                # baca image dan ubah ke grayscale
                img_gray = cv2.imread(os.path.join(root, filename), cv2.IMREAD_GRAYSCALE)
                                
                # gunakan ROI untuk crop
                img_roi = slice_roi(img_gray)
                
                # Normalisasi (binarization image)
                if binarization:
                    ret, img_roi = cv2.threshold(img_roi, 128, 1, cv2.THRESH_BINARY) # batasnya 128, yg dimana diatas 128 dikasi nilai 1 
                
                # Resize ukuran citra
                img = cv2.resize(img_roi, (IMGSIZE, IMGSIZE))
                
                
                # jika real image
                if img is not None:
                    # adding image
                    images.append(img)
                    # adding label
                    label.append(split_rename(filename, separator='.', indexSplit = 0))
    
    return np.array(images, dtype=np.uint8), np.array(label)

In [7]:
def own_augment_v2(datagen, img_array, label, limit_augs=3, binary=False):
    dataset = []
    labels = []
    for index, img in enumerate(img_array):        
        x = img.reshape( (1, ) + img.shape + (1,))
        
        dataset.append(x)
        labels.append(label[index])
        
        i = 0
        for batch in datagen.flow(x,batch_size=3):
            if binary:
                mean_batch = np.mean(batch)
                batch = np.where(batch > mean_batch, 1, 0)
            dataset.append(batch)
            labels.append(label[index])
            i += 1
            if i >= limit_augs:
                break
    data = np.array(dataset)
    return np.squeeze(data), np.array(labels)

In [145]:
# source : https://www.statology.org/numpy-ndarray-object-has-no-attribute-append/
def combine_dataset(data1, data2):
    return np.concatenate((data1, data2))

In [277]:
# Base for Augment
IMAGE_PATH = '../Dataset/Dico/'
img_extension = ['.jpeg', '.jpg']

In [278]:
img_base, label_base = loadImage(IMAGE_PATH, binarization=False)

In [279]:
img_base.shape

(450, 64, 64)

In [280]:
# own augment v2

cVAL = 255

datagen = ImageDataGenerator(
    rotation_range= 45,
    zoom_range=[0.9, 0.8],
    fill_mode='constant',
    cval=cVAL
)

img_base, label_base = own_augment_v2(datagen, img_base, label_base, limit_augs=14)

In [281]:
img_base.shape

(6750, 64, 64)

# ====================================================================================================

In [282]:
# Base data for increment
IMAGE_PATH = '../Dataset'
img_extension = ['.jpeg', '.jpg']

In [283]:
img, label = loadImage(IMAGE_PATH, binarization=False)

In [284]:
img.shape

(2250, 64, 64)

In [285]:
img_aug = combine_dataset(img, img_base)
label_aug = combine_dataset(label, label_base)

In [286]:
img_aug.shape

(9000, 64, 64)

# Moment Invariant

In [22]:
#moment inv
def momentInvariantFeature(img):
    moments = cv2.moments(img)
    huMoment = cv2.HuMoments(moments)
    
    return huMoment.flatten().tolist()

In [23]:
# looping for entire image
def feature_extraction(img_array):
    feature = []
    for i in range(len(img_array)):
        feature.append(momentInvariantFeature(img_array[i]))
    return feature

In [25]:
# Create function to concate another feature
def concatenate_feature(df1, df2, axis=0, rename_index=True):
    # 0 vertical (menurun)
    # 1 horizonal (kesamping)
    return pd.concat([df1, df2], axis=axis, ignore_index=rename_index)

In [26]:
def oneHotEncoder(label):
    # label with one hot encoding
    label = np.array(label).reshape(-1,1) # ubah ke bentuk (-1,1) sesuai yang diinginkan oleh onehotencoder
    onehot_encoder = OneHotEncoder()
    y = onehot_encoder.fit_transform(label).toarray()
    # onehot_encoder.categories_
    df_label = pd.DataFrame(data=y, columns=[str(onehot_encoder.categories_[0][i]) for i in range(np.array(y).shape[1])])
    
    return df_label

In [27]:
# add label in end of feature
def mInvariant_mixin_feature_and_label(feature, label, encoder=False):
    # feaature
    dt = pd.DataFrame(data=feature, columns=["M"+str(i+1) for i in range(np.array(feature).shape[1])], dtype=np.float64)
    if(encoder == False):
        dt['label'] = label
        return dt
    else:
        df_label = oneHotEncoder(label)
        # gabungin dataframenya
        df = pd.concat([dt, df_label], axis=1)
        return df

In [34]:
# function feature to csv with label
def save_to_csv(data, name):
    dt_string = f'{name}_feature.csv'
    data.to_csv(dt_string, header=True, index = False)

In [287]:
feature_im = np.array(feature_extraction(img_aug.astype(np.float32)))

In [288]:
# moment invariant
data_im = mInvariant_mixin_feature_and_label(feature_im, label_aug, encoder=False)

In [290]:
save_to_csv(data_im, f"{img_aug.shape[0]}")

In [289]:
data_im

Unnamed: 0,M1,M2,M3,M4,M5,M6,M7,label
0,0.000811,3.754003e-09,1.012610e-13,3.743107e-13,7.259493e-26,1.615691e-17,6.365473e-27,a
1,0.000775,2.039180e-09,1.341664e-13,2.927303e-13,-4.693105e-26,1.139503e-17,-3.410212e-26,ba
2,0.000740,9.906478e-10,1.631860e-14,2.249429e-13,1.352651e-26,6.312464e-18,1.664667e-27,be
3,0.000792,1.942616e-09,1.251477e-13,8.969757e-14,-7.816695e-27,3.319659e-18,-5.405118e-27,bi
4,0.000777,9.830001e-10,3.430836e-13,3.653126e-13,-7.862973e-27,9.189068e-18,1.290901e-25,bo
...,...,...,...,...,...,...,...,...
8995,0.000966,1.999966e-08,6.309168e-13,1.047117e-11,-9.475208e-24,-1.351760e-15,-2.519099e-23,yu
8996,0.000946,1.475190e-08,6.610021e-13,7.285443e-12,6.457513e-25,-7.644746e-16,-1.597463e-23,yu
8997,0.000956,1.145065e-08,1.566326e-12,6.950745e-12,2.237815e-23,-7.056287e-16,-5.020679e-24,yu
8998,0.000961,2.136639e-08,9.495136e-13,9.816790e-12,-1.877307e-23,-1.313314e-15,-2.336338e-23,yu
