# Show tensorflow version


In [None]:
# !pip show tensorflow-gpu
!pip install tensorflow-gpu==2.2
!pip show keras 

# flow



In [0]:
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array


datagen = ImageDataGenerator(
                            rotation_range=360,
                            width_shift_range=0.2,
                            height_shift_range=0.2,
                            shear_range=10.0,
                            zoom_range=0.2,
                            fill_mode="constant",
                            cval=0,
                            horizontal_flip=True,
                            vertical_flip=True,
                            rescale=1/255,
                            validation_split=0.2,
                            dtype=None)

image = load_img("yaleB11_P00A+000E+00.jpg", target_size=(240, 320))
plt.figure(0, figsize=(16, 9))
plt.imshow(image)
plt.show()

image = img_to_array(image)
image = image.reshape((1,) + image.shape)
# print(image.shape)

# Start value
start_num = 0
# End value
end_num = 3

for batch in datagen.flow(image, 
                          batch_size=1, 
                          save_prefix='', 
                          save_format='png', 
                          save_to_dir="data_augumentation"):
    plt.figure(start_num, figsize=(16, 9))
    augu_image = batch[0]
    # 轉換影像單位為float32
    # augu_image = batch[0].astype('float32')
    implot = plt.imshow(augu_image)

    start_num +=1
    # 判斷產生幾張照片，後中斷
    if start_num % end_num == 0: 
        break
plt.show()

# flow_from_directory 


In [0]:
import os
import cv2
import time
import tarfile
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img



def data_augumentation(
                       directory="training_data",
                       train_num=80,
                       test_num=20, 
                       image_size=(256, 256),
                       color_mode="rgb",
                       train_save_prefix="",
                       test_save_prefix="",
                       save_format="png",
                       output_dir="./",
                       show_image=False,
                       validation_split=0.2):
    
    # 建立資料集目錄，並確定是否有原始資料
    if not os.path.exists("training_data"):
        os.mkdir("training_data")
        print("[INFO] Builed training_data/")

    data_num = os.listdir("training_data")
    assert len(data_num) > 1, "Not any training data, you need to upload training data to training_data/xxx，one class has one drirectory !"


    # 建立資料增強輸出目錄
    if not os.path.exists("data_augumentation"):
        os.mkdir("data_augumentation")
        os.mkdir("data_augumentation/train")
        os.mkdir("data_augumentation/test")
        print("[INFO] Builed data_augumentation/")
        print("[INFO] Builed data_augumentation/train")
        print("[INFO] Builed data_augumentation/test")
        
    datagen = ImageDataGenerator(
                                rotation_range=360,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                shear_range=10.0,
                                zoom_range=0.2,
                                fill_mode="constant",
                                # 0(白色) ~ 255(黑色)
                                cval=0,
                                horizontal_flip=True,
                                vertical_flip=True,
                                rescale=1/255,
                                # validation_split會影響到ImageDataGenerator.flow_from_directory的subset參數
                                validation_split=validation_split,
                                dtype=None)


    print("[INFO] Generating training data...")
    
    # Start value
    start_num = 0

    # Record start time
    train_start_time = time.time()

    # Training data
    # 沒有label的話，會返回tuple(x, y)
    for image, label in datagen.flow_from_directory(
                                                    directory=directory,
                                                    batch_size=1,
                                                    target_size=image_size,
                                                    color_mode=color_mode,
                                                    save_prefix=train_save_prefix, 
                                                    save_format=save_format,
                                                    # 和ImageDataGenerator的validation_split參數相配合
                                                    subset="training",
                                                    # class_mode="binary",
                                                    save_to_dir="data_augumentation/train"):

        if show_image:
            # print(image.shape)
            plt.figure(start_num, figsize=(16, 9))
            augu_image = np.reshape(image, (image.shape[1], image.shape[2]))
            plt.imshow(augu_image, cmap="gray")
            plt.show()

        start_num +=1
        # 判斷產生幾張照片，後中斷
        if start_num % train_num == 0: 
            break
        
    # Record end time
    train_end_time = time.time()
    print("[INFO] Generating training data costs %.4f secs" % (train_end_time - train_start_time))
    print("[INFO] Generate %d training images." % train_num)


    print("[INFO] Generating testing data...")
    test_start_time = time.time()

    # Testing data
    for image, label in datagen.flow_from_directory(
                                                    directory=directory,
                                                    batch_size=1,
                                                    target_size=image_size,
                                                    color_mode=color_mode,
                                                    save_prefix=test_save_prefix, 
                                                    save_format=save_format,
                                                    subset="validation",
                                                    # class_mode="binary",
                                                    save_to_dir="data_augumentation/test"):

        if show_image:
            # print(label)
            # print(image.shape)
            plt.figure(start_num, figsize=(16, 9))
            augu_image = np.reshape(image, (image.shape[1], image.shape[2]))
            plt.imshow(augu_image, cmap="gray")
            # 轉換影像單位為float32
            # augu_image = batch[0].astype('float32')
            plt.show()

        start_num +=1
        # 判斷產生幾張照片，後中斷
        if start_num % test_num == 0: 
            break

    test_end_time = time.time()
    print("[INFO] Generating testing data costs %.4f secs" % (test_end_time - test_start_time))
    print("[INFO] Generate %d testing images." % test_num)

    # 建立壓縮包名
    file_name = os.path.join(output_dir, "dataset.tar.gz")
    tar = tarfile.open(file_name, "w:gz")

    # 建立壓縮包
    for root,dir,files in os.walk("data_augumentation/"):
        for file in files:
            fullpath = os.path.join(root, file)
            tar.add(fullpath)
    tar.close()
    print("[INFO] Generate dataset.tar.gz file.")

In [89]:
data_augumentation(train_num=8000, 
                   test_num=2000, 
                   color_mode="grayscale", 
                   train_save_prefix="train", 
                   test_save_prefix="test", 
                   validation_split=0.2,
                   show_image=False)

[INFO] Generating training data...
Found 7 images belonging to 2 classes.
[INFO] Generating training data costs 144.0582 secs
[INFO] Generate 8000 training images.
[INFO] Generating testing data...
Found 1 images belonging to 2 classes.
[INFO] Generating testing data costs 36.4703 secs
[INFO] Generate 2000 testing images.
[INFO] Generate dataset.tar.gz file.


In [0]:
!rm -rf data_augumentation/test/*
!rm -rf data_augumentation/train/*
!rm -rf data_augumentation