<a href="https://colab.research.google.com/github/Lilly1025/My_Projects/blob/main/%E0%B8%AA%E0%B8%B3%E0%B9%80%E0%B8%99%E0%B8%B2%E0%B8%82%E0%B8%AD%E0%B8%87_holybasilVSsweet_basil.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clone DATASET

In [None]:
!git clone https://github.com/TAUTOLOGY-EDUCATION/DATASET/

Cloning into 'DATASET'...
remote: Enumerating objects: 1301, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 1301 (delta 4), reused 12 (delta 3), pack-reused 1285[K
Receiving objects: 100% (1301/1301), 1.96 GiB | 33.58 MiB/s, done.
Resolving deltas: 100% (12/12), done.
Filtering content: 100% (3/3), 5.02 GiB | 56.57 MiB/s, done.


# Unzip

In [None]:
!unzip "/content/DATASET/HorapaVsKaprao/horapa-01.zip" -d "/content/DATASET/HorapaVsKaprao"

In [None]:
!unzip "/content/DATASET/HorapaVsKaprao/horapa-02.zip" -d "/content/DATASET/HorapaVsKaprao"

In [None]:
!unzip "/content/DATASET/HorapaVsKaprao/kaprao.zip" -d "/content/DATASET/HorapaVsKaprao"

# Import Module


In [32]:
import numpy as np
import pickle
import cv2
from os import listdir

from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer

from sklearn.model_selection import train_test_split

from sklearn.utils import class_weight

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import img_to_array

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation

from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
import seaborn as sns

# Training

Define constant

In [33]:
EPOCHS = 100
INIT_LR  = 1e-3
BS = 32

width = 256
height = 256

default_image_size = tuple((width, height))
depth = 3

directory_root = './DATASET/HorapaVsKaprao/'

Convert Image to Array with resizing to square

In [34]:
def convert_image_to_array(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None :
            height, width, channels = image.shape

            if height > width:
                image = image[height//2 - width//2:height//2 + width//2, 0:width]
            else:
                image = image[0:height, width//2 - height//2:width//2 + height//2]

            image = cv2.resize(image, default_image_size)

            return img_to_array(image)
        else:
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None

Load image to our code

In [None]:
image_list, label_list = [], []

try:
    print("[INFO] Loading images ...")
    root_dir = listdir(directory_root)

    for plant_name in root_dir :
        print(f"[INFO] Processing {plant_name} ...")

        plant_name_list = listdir(f"{directory_root}/{plant_name}")

        for image in plant_name_list :
            image_path = f"{directory_root}/{plant_name}/{image}"
            if image_path.endswith(".jpg") or image_path.endswith(".JPG"):
                image_list.append(convert_image_to_array(image_path))
                label_list.append(plant_name)
        print("[INFO] Image loading success")

except Exception as e:
    print(f"Error : {e}")


[INFO] Loading images ...
[INFO] Processing .ipynb_checkpoints ...
[INFO] Image loading success
[INFO] Processing kapao ...
[INFO] Image loading success
[INFO] Processing horapa ...


Labeling

In [None]:
label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)
pickle.dump(label_binarizer,open('label_tranform.pkl', 'wb'))
n_classes = len(label_binarizer.classes_)

print(f"There are {n_classes} classes which is {', '.join(label_binarizer.classes_)}")

There are 2 classes which is horapa, kapao


Preprocessing image

In [None]:
np_image_list = np.array(image_list, dtype=np.float16)

Show sample of dataset

In [None]:
plt.figure(figsize = (10, 10))

for i in range(6):
    if i  % 2 == 0:
        ax = plt.subplot(3 , 2, i + 1)
        plt.imshow(cv2.cvtColor(image_list[460+i] / 255., cv2.COLOR_BGR2RGB))
        plt.title(label_binarizer.classes_[i%2])
        plt.axis("off")
    else:
        ax = plt.subplot(3 , 2, i + 1)
        plt.imshow(cv2.cvtColor(image_list[i] / 255., cv2.COLOR_BGR2RGB))
        plt.title(label_binarizer.classes_[i%2])
        plt.axis("off")


Spliting data

In [None]:
print("[INFO] Spliting data to train, validate and test")
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state = 2)

print(f"[INFO] All dataset: {len(image_list)}")
print(f"[INFO] Training dataset: {len(y_train)}")
print(f"[INFO] Validation dataset: {len(y_val)}")
print(f"[INFO] Testing dataset: {len(y_test)}")
print(f"[INFO] Training dataset: {len(x_train)}")

[INFO] Spliting data to train, validate and test
[INFO] All dataset: 1115
[INFO] Training dataset: 713
[INFO] Validation dataset: 179
[INFO] Testing dataset: 223
[INFO] Training dataset: 713


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Class Weighting

In [None]:
print(f"[INFO] Class weighting ...")
class_weights = class_weight.compute_class_weight('balanced',
                                            classes=np.unique(np.ravel(y_train, order='C'))
                                            ,y=np.ravel(y_train, order='C'))

class_weight_dict = dict(enumerate(class_weights))

print(class_weight_dict)


Data Augmentator กรณีข้อมูลแบบ imbalanced

In [None]:
aug = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

In [None]:
plt.figure(figsize=(10, 10))

image_flow = aug.flow(x_train, y_train, batch_size=BS)

for i in range(9):
    img, label = image_flow.next()
    ax = plt.subplot(3,3,i+1)
    plt.imshow(cv2.cvtColor(img[0] / 255., cv2.COLOR_BGR2RGB ))
    plt.title(label_binarizer.classes_[label[0]])
    plt.axis("off")

# **MAKE MODEL**

In [None]:
print("[INFO] Making model...")
inputShape = (height,width,depth)
ChanDim = -1
if K.image_data_format() == "channels_first":
    inputShape = (depth,height,width)
    ChanDim = 1

model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same", input_shape=inputShape, activation='relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dense(1, activation="sigmoid"))

model.summary()

opt = Adam(learning_rate=INIT_LR, beta_1=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

[INFO] Making model...
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 256, 256, 32)      896       
                                                                 
 max_pooling2d_17 (MaxPooli  (None, 85, 85, 32)        0         
 ng2D)                                                           
                                                                 
 dropout_17 (Dropout)        (None, 85, 85, 32)        0         
                                                                 
 conv2d_29 (Conv2D)          (None, 85, 85, 64)        18496     
                                                                 
 conv2d_30 (Conv2D)          (None, 85, 85, 64)        36928     
                                                                 
 max_pooling2d_18 (MaxPooli  (None, 42, 42, 64)        0         
 ng2D)                         

Early Stopping

In [None]:
es = EarlyStopping(
                            monitor='val_accuracy',
                            verbose=1,
                            patience=20,
                            mode='max',
                            restore_best_weights=True
)

Training the model

In [None]:
print("[INFO] Training ...")

history = model.fit(
                aug.flow(x_train, y_train, batch_size=BS),
                validation_data = (x_val, y_val),
                steps_per_epoch=len(x_train) // BS,
                epochs=EPOCHS,
                verbose=1,
                callbacks=[es],
                class_weight=class_weight_dict)

Show training graph accuracy

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs =  range(1, len(acc)+1)

#Train and Validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.figure()
#Train and Validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()

# Testing the model

In [None]:
print("[INFO] Calculating model accuracy")
train_scores = model.evaluate(x_train, y_train)
test_scores = model.evaluate(x_test, y_test)
all_scores = model.evaluate(np_image_list, image_labels)
print(f"Test Accuracy (on train dataset): {train_scores[1]*100}")
print(f"Test Accuracy (on test dataset): {test_scores[1]*100}")
print(f"Test Accuracy (on all dataset): {all_scores[1]*100}")


In [None]:
def plot_cm(labels, predictions, p =0.5):
    cm = confusion_matrix(labels, prediction > p)
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title('confusion matrix')
    plt.xlabel('Actual label')
    plt.ylabel('Predicted label')

    print(f'Actual is "{label_binarizer.classes_[0]}" and Prediction is "{label_binarizer.classes_[0]}": ',cm[0][0])
    print(f'Actual is "{label_binarizer.classes_[0]}" and Prediction is "{label_binarizer.classes_[1]}": ',cm[0][1])
    print(f'Actual is "{label_binarizer.classes_[1]}" and Prediction is "{label_binarizer.classes_[0]}": ',cm[1][0])
    print(f'Actual is "{label_binarizer.classes_[1]}" and Prediction is "{label_binarizer.classes_[1]}": ',cm[1][1])

test_predictions_baseline = model.predict(x_test, batch_size=BS)

plot_cm(y_test, test_predictions_baseline)

In [None]:
#save the model to disk
print("[INFO] Saving model ...")
model.save('save_model/model.h5')