### Library Import
코드를 실행하기 위한 모든 라이브러리들 import 과정

In [118]:
import tensorflow as tf
import datetime
import os
import operator
import shutil

from tensorflow.keras import backend as K
from tensorflow.keras import layers as L
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img

import matplotlib.pyplot as plt
import numpy as np
import cv2
from PIL import Image

### Environment Variables
환경변수들

In [172]:
# resize할 크기
image_size = 36

# 한개의 element당 생성할 data 개수
data_size = 256

# data 경로
data_path = os.getcwd() + '/../data/'

# element들을 저장할 경로
element_path = data_path + 'alphebet/'

# example이 존재하는 경로
example_path = data_path + 'ex/'

# checkpoint 경로
model_path = './ckpt.h5'

# debug values
DEBUG_DONT_ERASE_TEMP       = False
DEBUG_PRINT_ALL_PREDICTIONS = False

### Create Folder
폴더를 생성해주는 함수

In [120]:
def create_folder(directory):
      if not os.path.exists(directory):
          os.makedirs(directory)
          return True
      return False

create_folder(element_path)
elements = os.listdir(example_path)
elements.sort()
for element in elements: 
    if element[0] == '.':
        elements.remove(element)

print(tf.__version__)
print(os.getcwd())
print(data_path)
print(elements)

2.12.0
/Users/naburang/Desktop/Metal/tensorflow_venv.nosync/code
/Users/naburang/Desktop/Metal/tensorflow_venv.nosync/code/../data/
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'zz']


### 데이터 늘리기
약간의 위치변화 혹은 회전을 통한 데이터 늘리는 과정

In [121]:
datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.05,
        zoom_range=0.05,
        fill_mode='nearest',
        cval=255)


def generate_data(file_name):
  for element in elements: 
      # Canny edge 적용
      canny_path = data_path + 'canny/'
      create_folder(canny_path)
      cv_image = cv2.imread(example_path + element + '/' + file_name + '.png')
      cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
      canny = cv2.Canny(cv_image, 100, 200)
      cv2.imwrite(canny_path + element + file_name + '.png', canny)

      # 에시 파일읽기
      image = load_img(canny_path + element + file_name + '.png')
      x = img_to_array(image)                 # image to array
      x = x.reshape((1,) + x.shape)           # reshape array

      if element == 'zz':
        for _ in range(data_size):
          path = element_path + element
          shutil.copy(canny_path + element + file_name + '.png',
                      path + '/' + element + str(_)+'.jpg')
      else:  
        i = 0
        create_folder(element_path + element)

        # save image
        for batch in datagen.flow(x, batch_size=1,
                                  save_to_dir=element_path + element, 
                                  save_prefix=element + file_name, 
                                  save_format='jpg'):
            i += 1
            if i > (data_size / 4) * 5:
                break


if create_folder(element_path + element):
  generate_data('0')
  generate_data('1')

### Load Data

In [122]:
def load_data(size):
    # data reader
    datagen = ImageDataGenerator(rotation_range=10,
                                 shear_range=5,
                                 validation_split=0.2)
    
    # train data
    train_generator = datagen.flow_from_directory(element_path,
                                                  target_size=(size,size),
                                                  subset='training')
    
    # validation data
    val_generator = datagen.flow_from_directory(element_path,
                                                target_size=(size,size),
                                                subset='validation')
    return train_generator, val_generator

### Make Model

In [123]:
def make_model(train, val, class_count, size = 36):
    K.clear_session()

    model_ckpt = ModelCheckpoint(model_path, save_best_only=True)
    reduce_lr = ReduceLROnPlateau(patience=8, verbose=1)
    early_stop = EarlyStopping(patience=20, verbose=2)

    entry = L.Input(shape=(size,size,3))
    x = L.SeparableConv2D(64,(3,3),activation='relu',padding ='same')(entry)
    x = L.MaxPooling2D((2,2))(x)

    x = L.SeparableConv2D(128,(3,3),activation='relu',padding ='same')(x)
    x = L.MaxPooling2D((2,2))(x)

    x = L.SeparableConv2D(256,(2,2),activation='relu',padding ='same')(x)
    x = L.GlobalMaxPooling2D()(x)

    x = L.Dense(256)(x)
    x = L.LeakyReLU()(x)
    x = L.Dense(64,kernel_regularizer=l2(2e-4))(x)
    x = L.LeakyReLU()(x)
    x = L.Dense(class_count,activation='softmax')(x)

    model = Model(entry,x)
    model.compile(loss='categorical_crossentropy', 
                  optimizer='adam',
                  metrics=['accuracy'])

    history = model.fit(train,
                        validation_data=val,
                        epochs=999,
                        batch_size=256,
                        callbacks=[model_ckpt, reduce_lr, early_stop],
                        verbose=1)
    return history

### Show Losses
학습 과정에서의 loss 그래프 출력

In [124]:
def show_losses(hist):
    _, loss_ax = plt.subplots(figsize=(10, 5))
    acc_ax = loss_ax.twinx()
    
    loss_ax.plot(hist.history['loss'], 'y', label='train loss')
    loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
    acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
    acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
    
    loss_ax.set_xlabel('epoch')
    loss_ax.set_ylabel('loss')
    acc_ax.set_ylabel('accuray')
    loss_ax.legend(loc='upper left')
    acc_ax.legend(loc='lower left')

### Divide Image
이미지 쪼개기

In [162]:
def divide_image(img_name, shape):
    # canny edge
    cv_image = cv2.imread(data_path + img_name)
    cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
    canny = cv2.Canny(cv_image, 80, 150)
    cv2.imwrite(data_path + 'tmp_canny.png', canny)

    # open image
    image = Image.open(data_path + 'tmp_canny.png')
    if not DEBUG_DONT_ERASE_TEMP:
        os.remove(data_path + 'tmp_canny.png')
    
    # image width, height 구학기
    width, height = image.size[0], image.size[1]
    single_width = width / shape[1]
    single_height = height / shape[0]

    # 폴더 생성
    create_folder(data_path + 'tmp/')
    
    for i in range(shape[0]):
        for j in range(shape[1]):
            # 범위 지정
            area = (j * single_width, i * single_height,
                    single_width * (j + 1), (i + 1) * single_height)

            path = data_path + 'tmp/' + str(i * shape[1] + j) + '/data/'
            create_folder(path)
            cropped_img = image.crop(area)
            cropped_img.save(path + str(i * shape[1] + j) + '.png')        # 쪼갠 이미지 저장

    return shape[0] * shape[1]

### Remove Temporary Images
임시로 생성된 이미지 제거

In [163]:
def remove_tmp_imgs():
    try:
        shutil.rmtree(data_path + 'tmp')
    except:
        print('Warning: failed to delete temporary splited images')

### Model Train

In [164]:
train_generator, val_generator = load_data(image_size)

with tf.device('/GPU:0'):
    try:
        load_model(model_path)
    except:
        print('Could not find checkpoint, creating new model')
        hist = make_model(train_generator, 
                          val_generator,
                          len(elements),
                          image_size)
        show_losses(hist)

Found 13366 images belonging to 27 classes.
Found 3329 images belonging to 27 classes.


### Integer To ASCII(Char)

In [165]:
def itoa(num):
    if num == 26:
        return  ' '
    else:
        num_tr = num+97
        return chr(num_tr)

### Predict

In [166]:
def predict(model, image_name, shape):
    # 이미지 쪼개기
    length = divide_image(image_name, shape)
    result = ''

    generator = ImageDataGenerator()
    image_path = data_path + 'tmp/'

    for i in range(length):
        # 쪼갠 이미지 불러오기
        data = generator.flow_from_directory(image_path + str(i), 
                                             target_size=(image_size, image_size))
    
        # Predict
        prediction = model.predict(data)[0]
        if DEBUG_PRINT_ALL_PREDICTIONS:
            print(prediction)

        # 결과 가져오기
        predict_idx = np.argmax(prediction)
        predict_val = prediction[predict_idx]
        result += itoa(predict_idx)
        
        print('Best prediction:', predict_val, itoa(predict_idx))

    print('\nResult:', result)
    if not DEBUG_DONT_ERASE_TEMP:
        remove_tmp_imgs()
        
    return result

### Model 불러오기

In [167]:
model = load_model(model_path)
acc = model.evaluate(val_generator)[1]
print('Model accuracy: %.2f' % acc)

2023-06-04 15:35:22.196649: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-06-04 15:35:22.317764: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Model accuracy: 1.00


### Predict Image

In [175]:
predicted = predict(model, 'hello.jpg', (1, 5))

Found 1 images belonging to 1 classes.
Best prediction: 0.96740806 b
Found 1 images belonging to 1 classes.
Best prediction: 0.62546486 e
Found 1 images belonging to 1 classes.
Best prediction: 0.82064915 h
Found 1 images belonging to 1 classes.


2023-06-04 15:37:32.840013: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-06-04 15:37:32.908550: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-06-04 15:37:32.977216: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Best prediction: 0.49368364 c
Found 1 images belonging to 1 classes.
Best prediction: 0.9999851 k
Found 1 images belonging to 1 classes.
Best prediction: 0.99232507 o
Found 1 images belonging to 1 classes.


2023-06-04 15:37:33.047406: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-06-04 15:37:33.118985: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-06-04 15:37:33.196621: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Best prediction: 0.9433239 t
Found 1 images belonging to 1 classes.
Best prediction: 0.4834451 y

Result: behckoty


2023-06-04 15:37:33.269369: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-06-04 15:37:33.338362: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


In [132]:
from TTS.api import TTS
from pydub import AudioSegment
from pydub.playback import play

tts_model = TTS.list_models()[0]
tts = TTS(tts_model)
tts.tts_to_file(text=predicted, speaker=tts.speakers[0], language='en', file_path="output.wav")

audio = AudioSegment.from_wav("output.wav")
play(audio)
os.remove("output.wav")

No API token found for 🐸Coqui Studio voices - https://coqui.ai 
Visit 🔗https://app.coqui.ai/account to get one.
Set it as an environment variable `export COQUI_STUDIO_TOKEN=<token>`

 > tts_models/multilingual/multi-dataset/your_tts is already downloaded.
 > Model's license - CC BY-NC-ND 4.0
 > Check https://creativecommons.org/licenses/by-nc-nd/4.0/ for more info.
 > Using model: vits
 > Setting up Audio Processor...
 | > sample_rate:16000
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:0
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:None
 | > fft_size:1024
 | > power:None
 | > preemphasis:0.0
 | > griffin_lim_iters:None
 | > signal_norm:None
 | > symmetric_norm:None
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:None
 | > pitch_fmax:None
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:1.0
 | > clip_norm:True
 | > do_trim_silence:False
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_am

Input #0, wav, from '/var/folders/72/lgrwqgmx38gdffsjlwlxyzgm0000gn/T/tmpguu7c_5d.wav':
  Duration: 00:00:01.12, bitrate: 256 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 16000 Hz, 1 channels, s16, 256 kb/s
   0.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


