In [None]:
pip install tensorflow

In [None]:
pip install --user matplotlib==3.5.0

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sn
import os
from sklearn.metrics import confusion_matrix, classification_report

from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint


##############################################
learning_rate = 0.1  # khoi tao learning rate
min_learning_rate = 0.00001  # khi learning rate dat den gia tri nay, khong giam them
learning_rate_reduction_factor = 0.5  # he so duoc su dung khi giam learning rate -> learning_rate *= learning_rate_reduction_factor
patience = 3  # can cho bao nhieu so vong lap truoc khi giam learning rate khi do mat mat len cao (loss len cao)
verbose = 1  # kiem soat so lan chay duoc thuc hien trong qua trinh dao tao va thuc nghiem: 0 - none, 1 - bao cao so lieu sau moi batch, 2 - bao cao so lieu sau moi vong lap
image_size = (100, 100)  # chieu dai va chieu rong cua anh
input_shape = (100, 100, 3)  # hinh dang dau vao du kien cho cac mo hinh duoc dao tao; vi hinh anh trong Fruit-360 la hinh anh RGB 100 x 100, day la hinh dang dau vao bat buoc

use_label_file = False  # dat bien nay thanh true neu ban muon tai ten nhan tu tep; tep phai chua ten cua cac nhan da su dung, moi nhan tren moi dong rieng biet
label_file = 'labels.txt'
base_dir = 'E:\\files for study\\AI\\ML\\Assignment2\\Fruit-Images-Dataset-master' # duong dan toi thu muc chua tap du lieu hoa qua
test_dir = os.path.join(base_dir, 'Test')
train_dir = os.path.join(base_dir, 'Training')
output_dir = 'E:\\files for study\\AI\\ML\\Assignment2\\Ketqua'  # thu muc goc de luu cac tep dau ra
##############################################

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

if use_label_file:
    with open(label_file, "r") as f:
        labels = [x.strip() for x in f.readlines()]
else:
    labels = os.listdir(train_dir)
num_classes = len(labels)

# tao 2 bieu do, mot cho do chinh xac, 1 cho ham mat mat, de hien thi so lieu cau hai chi so nay trong qua trinh dao tao
def plot_model_history(model_history, out_path=""):
    fig, axs = plt.subplots(1, 2, figsize=(15, 5))
    # do chinh xac
    axs[0].plot(range(1, len(model_history.history['accuracy']) + 1), model_history.history['accuracy'])
    axs[0].plot(range(1, len(model_history.history['val_accuracy']) + 1), model_history.history['val_accuracy'])
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    #axs[0].set_xticks(np.arange(1, len(model_history.history['accuracy']) + 1), model_history.history['accuracy'])
    axs[0].legend(['train', 'val'], loc='best')
    # ham mat mat
    axs[1].plot(range(1, len(model_history.history['loss']) + 1), model_history.history['loss'])
    axs[1].plot(range(1, len(model_history.history['val_loss']) + 1), model_history.history['val_loss'])
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    #axs[1].set_xticks(np.arange(1, len(model_history.history['loss']) + 1), model_history.history['loss'])
    axs[1].legend(['train', 'val'], loc='best')
    # luu bieu do trong mot file goi la "acc_loss.png";model_name duoc cung cap khi tao va dao tao mo hinh
    if out_path:
        plt.savefig(out_path + "/acc_loss.png")
    plt.show()


# tao mot ma tran nham lan de hien thi cac anh dan nham nhan
def plot_confusion_matrix(y_true, y_pred, classes, out_path=""):
    cm = confusion_matrix(y_true, y_pred)
    df_cm = pd.DataFrame(cm, index=[i for i in classes], columns=[i for i in classes])
    plt.figure(figsize=(40, 40))
    ax = sn.heatmap(df_cm, annot=True, square=True, fmt="d", linewidths=.2, cbar_kws={"shrink": 0.8})
    if out_path:
        plt.savefig(out_path + "/confusion_matrix.png")  # ma tran duoc lưu vao 1 file ten la "model_name_confusion_matrix.png"
    return ax


# Thay doi ngau nhien mau sac va do bao hoa cua hinh anh de mo phong cac dieu kien anh sang thay doi 
def augment_image(x):
    import tensorflow as tf
    x = tf.image.random_saturation(x, 0.9, 1.2)
    x = tf.image.random_hue(x, 0.02)
    return x

# dua ra  duong dan chua thu muc train va thu muc tap test, va ty le xac thu de kiem tra, can truyen vao 3 trinh tao
#  - trinh tao tap train su dung (100 - validation_percent) phan tram cua hinh anh tu tap train 
#    ap dung lat ngang va lat doc ngau nhien de tang du lieu va tao cac batch mot cac ngau nhien
#  - trinh tao validation su dung validation_percent con lai cau hinh anh lay tu tap train
#    khong tao ra cac batch ngau nhien, vi mo hinh khong duoc dao tao ve du lieu nay
#    do chinh xac va mat mat duoc theo doi bang cach su dung du lieu validation de co the cap nhap toc do hoc neu mo hinh dat muc toi uu cuc bo
#  - trinh tao tap test su dung tap test ma khong co bat ky hinh thuc gia tang nao
#    mot khi qua trinh huan luyen duoc thuc hien cac gia tri cuoi cung cua do chinh xac va mat mat duoc tinh toan
def build_data_generators(train_folder, test_folder, validation_percent, labels=None, image_size=(100, 100), batch_size=50):
    train_datagen = ImageDataGenerator(
        width_shift_range=0.0,
        height_shift_range=0.0,
        zoom_range=0.0,
        horizontal_flip=True,
        vertical_flip=True,  # lat ngau nhien
        preprocessing_function=augment_image, 
        validation_split=validation_percent)  # ty le phan tram cho biet so luong tap hop dao tao nen duoc giu lai de xac thuc, cho vao tap validation

    test_datagen = ImageDataGenerator()

    train_gen = train_datagen.flow_from_directory(train_folder, target_size=image_size, class_mode='sparse',
                                                  batch_size=batch_size, shuffle=True, subset='training', classes=labels)
    validation_gen = train_datagen.flow_from_directory(train_folder, target_size=image_size, class_mode='sparse',
                                                       batch_size=batch_size, shuffle=False, subset='validation', classes=labels)
    test_gen = test_datagen.flow_from_directory(test_folder, target_size=image_size, class_mode='sparse',
                                                batch_size=batch_size, shuffle=False, subset=None, classes=labels)
    return train_gen, validation_gen, test_gen


# phuong phap nay thuc hien tat ca cac buoc tu thiet lap du lieu, dao tao va thu nghiem mo hinh, ve bieu do ket qua
# mo hinh la mot mo hinh bat ky co the dao tao; hinh dang dau vao va so luong dau ra cua cac lop phu thuoc vao tap du lieu duoc su dung, 
# trong truong hop nay dau vao la hinh anh RGB 100 x 100 va dau ra la lop softmax voi 118 xac suat
# ten duoc su dung de luu bao cao phan loai co chua diem f1 cua mo hinh, cac bieu do hien thi do mat mat va do chinh xac va ma tran nham lan
# kich thuoc batch duoc su dung de xac dinh so luong hinh anh duoc truyen qua mang CNN cung 1 luc, so buoc tren moi vong lap duoc tinh tu day la (tong so hinh anh trong bo / kich thuoc lo)+1
def train_and_evaluate_model(model, name="", epochs=25, batch_size=50, verbose=verbose, useCkpt=False):
    print(model.summary())
    model_out_dir = os.path.join(output_dir, name)
    if not os.path.exists(model_out_dir):
        os.makedirs(model_out_dir)
    if useCkpt:
        model.load_weights(model_out_dir + "/model.h5")

    trainGen, validationGen, testGen = build_data_generators(train_dir, test_dir, validation_percent=0.1, labels=labels, image_size=image_size, batch_size=batch_size)
    optimizer = Adadelta(lr=learning_rate)
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=patience, verbose=verbose, 
                                                factor=learning_rate_reduction_factor, min_lr=min_learning_rate)
    save_model = ModelCheckpoint(filepath=model_out_dir + "/model.h5", monitor='val_accuracy', verbose=verbose, 
                                 save_best_only=True, save_weights_only=False, mode='max', period=1)
    
    history = model.fit(trainGen,
                                  epochs=epochs,
                                  steps_per_epoch=(trainGen.n // batch_size) + 1,
                                  validation_data=validationGen,
                                  validation_steps=(validationGen.n // batch_size) + 1,
                                  verbose=verbose,
                                  callbacks=[learning_rate_reduction, save_model])

    model.load_weights(model_out_dir + "/model.h5")

    validationGen.reset()
    loss_v, accuracy_v = model.evaluate(validationGen, steps=(validationGen.n // batch_size) + 1, verbose=verbose)
    loss, accuracy = model.evaluate(testGen, steps=(testGen.n // batch_size) + 1, verbose=verbose)
    print("Validation: accuracy = %f  ;  loss_v = %f" % (accuracy_v, loss_v))
    print("Test: accuracy = %f  ;  loss_v = %f" % (accuracy, loss))
    plot_model_history(history, out_path=model_out_dir)
    testGen.reset()
    y_pred = model.predict(testGen, steps=(testGen.n // batch_size) + 1, verbose=verbose)
    y_true = testGen.classes[testGen.index_array]
    plot_confusion_matrix(y_true, y_pred.argmax(axis=-1), labels, out_path=model_out_dir)
    class_report = classification_report(y_true, y_pred.argmax(axis=-1), target_names=labels)

    with open(model_out_dir + "/classification_report.txt", "w") as text_file:
        text_file.write("%s" % class_report)
    # print(class_report)


print(labels)
print(num_classes)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Activation, Dropout, Lambda


# Tao mot lop tuy chinh de chuyen doi hinh anh goc tu 
# RGB sang HSV va thang do xam va noi cac ket qua  
# tao hinh o dau vao co kich thuoc 100 x 100 x 4
def convert_to_hsv_and_grayscale(x):
    import tensorflow as tf
    hsv = tf.image.rgb_to_hsv(x)
    gray = tf.image.rgb_to_grayscale(x)
    rez = tf.concat([hsv, gray], axis=-1)
    return rez


def network(input_shape, num_classes):
    img_input = Input(shape=input_shape, name='data')
    x = Lambda(convert_to_hsv_and_grayscale)(img_input)
    x = Conv2D(16, (5, 5), strides=(1, 1), padding='same', name='conv1')(x)
    x = Activation('relu', name='conv1_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool1')(x)
    x = Conv2D(64, (5, 5), strides=(1, 1), padding='same', name='conv2')(x)
    x = Activation('relu', name='conv2_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool2')(x)
    x = Conv2D(64, (5, 5), strides=(1, 1), padding='same', name='conv3')(x)
    x = Activation('relu', name='conv3_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool3')(x)
    x = Conv2D(128, (5, 5), strides=(1, 1), padding='same', name='conv4')(x)
    x = Activation('relu', name='conv4_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool4')(x)
    x = Flatten()(x)
    x = Dense(1024, activation='relu', name='fcl1')(x)
    x = Dropout(0.2)(x)
    x = Dense(256, activation='relu', name='fcl2')(x)
    x = Dropout(0.2)(x)
    out = Dense(num_classes, activation='softmax', name='predictions')(x)
    rez = Model(inputs=img_input, outputs=out)
    return rez


model = network(input_shape=input_shape, num_classes=num_classes)
train_and_evaluate_model(model, name="fruit-360 model")