라이브러리 , 마운트
================================

In [None]:
import tensorflow as tf
import keras
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os,cv2,glob,shutil
from tqdm import tqdm #진행률 표시

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


기본 데이터 정의
============

In [None]:
SEED = 909

#배치사이즈
BATCH_SIZE = 16

#에폭의 수
NUM_OF_EPOCHS = 25

#Train,Valid 비율
validation_ratio=0.2

#원래(480,720) (이미지의 특징이 사라지지 않는 선에서 수정해도 상관없음) 
#원래(540,960)
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
IMG_SIZE = (IMAGE_HEIGHT, IMAGE_WIDTH)

#데이터의 경로 설정
data_dir = '/content/drive/MyDrive/newstudy/ocean_trash_classfication'
data_dir_train = os.path.join(data_dir, 'train/images')
data_dir_test = os.path.join(data_dir, 'test/images')

class_name = sorted(os.listdir(data_dir_train))

print(class_name)


['bundle of ropes', 'circular fish trap', 'eel fish trap', 'fish net', 'other objects', 'rectangular fish trap', 'rope', 'spring fish trap', 'tire', 'wood']


출력함수
==============================

In [None]:
def predict_img(img,get_last=False): 
  prediction = model.predict(np.expand_dims(img, axis=0)) #h,w,class

  if type(model.output) == list:
    num_of_output = len(model.output)
    x = prediction[num_of_output-1] if get_last else sum(prediction) / num_of_output
  else:
    x = prediction

  x = np.argmax(x[0], axis=-1) #해당픽셀의 채널 중에서 가장 큰 인덱스 값 가져옴 h,w

  return x

def display_dataset(ds,num=1,predict=False,test=False):
  i=0
  for x, y in ds:
    if i== num:
      break;

    dx = x[0]*255
    dx = dx.numpy()
    dy = np.argmax(y[0])
    print(dy,y[0])

    c = 'True:'
    if test:
      c += 'No Named'
    else:
      c += class_name[dy]

    if predict:
      c += ',Predict:' + class_name[predict_img(x[0])]

    plt.title(c)
    plt.imshow(tf.keras.preprocessing.image.array_to_img(dx))
    plt.show()
    i+=1

def display_test(img,num=1):
  for i in range(num):
    x = read_image(img[i])
    dx = x*255
    p = predict_img(x)
    plt.title(class_name[p])
    plt.imshow(tf.keras.preprocessing.image.array_to_img(dx))
    plt.show()

#model 학습 정보 표시,저장
def display_history(history, dir, num,save_pic=True,save_csv=True,save_best_infor=True):
  t = ['Accuracy','Loss']
  k = ['accuracy','loss']
  p = ['lower','upper']


  if type(model.output) != list : num_of_output = 1
  else: num_of_output = len(model.output)

  plt.figure(figsize=(24,3*num_of_output))

  for i in range(num_of_output):

    if type(model.output) != list : name = ''
    else: name = model.output_names[i]+'_'

    for j in range(4):
      plt.subplot(num_of_output,4,i*4+j+1)
      if j%2==0 :
        plt.plot(history[f'{name}{k[j//2]}'], 'g-', label= k[j//2])
        if i==0:
          plt.title(t[j//2])
      else :
        plt.plot(history[f'val_{name}{k[j//2]}'], 'g-', label= 'val_'+k[j//2])
        if i==0:
          plt.title('Val_'+t[j//2])
      if j==0:
        plt.ylabel(f'Output[{i}]')
      elif i== num_of_output-1 :
        plt.xlabel('Epoch')
      plt.legend(loc= p[j//2]+' right')
      plt.grid(True, axis='both',alpha=0.5)

  if (save_pic) : 
    plt.savefig(f'{dir}/Graph{num}.png')

  if (save_csv):
    hist_df = pd.DataFrame(history) 
    with open(f'{dir}/History{num}.csv', mode='w') as f:
      hist_df.to_csv(f)

  if (save_best_infor):
    idx = np.argmin(history['val_loss'])
    result = [['Epoch'],[idx+1]]
    start_idx = 1-(type(history) == dict)
    for i in range(start_idx,len(history.keys())):
      key = list(history.keys())[i]
      result[0].append(key)
      result[1].append(history[key][idx])
    hist_df = pd.DataFrame(result) 
    with open(f'{dir}/Best{num}.Infor.csv', mode='w') as f:
      hist_df.to_csv(f)
      
  plt.show()

데이터 전처리
====================

In [None]:
def read_image(x):
  x = cv2.imread(x, cv2.IMREAD_COLOR)
  x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB) 
  x = cv2.resize(x, (IMAGE_WIDTH,IMAGE_HEIGHT))
  x = x / 255.0
  x = x.astype(np.float32)
  return x


def tf_dataset(x,y, batch_size=1):
  dataset = tf.data.Dataset.from_tensor_slices((x,y))
  dataset = dataset.shuffle(buffer_size=1000)
  dataset = dataset.map(preprocess,num_parallel_calls=tf.data.experimental.AUTOTUNE)
  dataset = dataset.batch(batch_size)
  dataset = dataset.repeat()
  dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  return dataset

def preprocess(x,y):
  def f(x,y):
      image = x.decode()
      image = read_image(image)
      label = y
      return image, label

  image, label = tf.numpy_function(f, [x,y], [tf.float32, tf.int32])
  label = tf.one_hot(label, len(class_name), dtype=tf.int32)
  image.set_shape([IMAGE_HEIGHT, IMAGE_WIDTH, 3])
  label.set_shape(len(class_name))
  print(image.shape,label.shape)
  return image, label

train_x = sorted(glob.glob(os.path.join(data_dir_train, '*/*')))
train_y = [class_name.index(train_x[i].split('/')[-2]) for i in range(len(train_x)) ]


#print(train_x)
#print(train_y)

test = sorted(glob.glob(os.path.join(data_dir_test,'*')))

train_x, valid_x = train_test_split(train_x, test_size=validation_ratio, random_state=SEED)
train_y, valid_y = train_test_split(train_y, test_size=validation_ratio, random_state=SEED)

print(f"Dataset: Train: {len(train_x)} - Valid: {len(valid_x)} - Test: {len(test)}")

train_dataset = tf_dataset(train_x,train_y, batch_size=BATCH_SIZE)
valid_dataset = tf_dataset(valid_x,valid_y, batch_size=BATCH_SIZE)


In [None]:
NUM_TRAIN = len(train_x)

NUM_VALID = len(valid_x)

EPOCH_STEP_TRAIN = NUM_TRAIN // BATCH_SIZE
EPOCH_STEP_VALID = NUM_VALID // BATCH_SIZE

In [None]:
display_dataset(train_dataset,10,False)

ResNet50 모델
======================

In [None]:
def conv(input,chennel,kernel_sizes,strides=2,padding='valid',is_relu=True,is_bn=True):
  x = input
  x = keras.layers.Conv2D(chennel,kernel_sizes,strides,padding=padding)(x)
  if is_bn:
    x = keras.layers.BatchNormalization()(x)
  if is_relu:
    x = keras.activations.relu(x)
  return x

def resnet50(classes=1000,initial_features=64):
  inputs = keras.Input(shape=[IMAGE_HEIGHT,IMAGE_WIDTH,3])
  x= inputs

  x = conv(x,initial_features,7,2,'same')
  x = keras.layers.MaxPool2D(3,2,'same')(x)

  repeat = [3,4,6,3]
  for i in range(4):
    for j in range(repeat[i]):
      if j==0:  #conv
        if i==0: 
          strides = 1
        else :
          strides = 2
        shortcut = conv(x,initial_features*2**(i+2),1,strides,'valid',False)
      else:
        shortcut = x
        strides = 1
        
      x = conv(x,initial_features*2**i,1,strides,'valid')
      x = conv(x,initial_features*2**i,3,1,'same')
      x = conv(x,initial_features*2**(i+2),1,1,'valid',False)
      x = keras.layers.add([x,shortcut])
      x = keras.activations.relu(x)
  
  x = keras.layers.GlobalAveragePool2D()(x)
  x = keras.layers.Dense(classes,activation='softmax')(x)

  return keras.Model(inputs=[inputs], outputs=[x], name=f'ResNet50-F{initial_features}-C{classes}')

model = resnet50(len(class_name))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "ResNet50-F64-C10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_41 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_1663 (Conv2D)           (None, 109, 109, 64  9472        ['input_41[0][0]']               
                                )                                                                 
                                                                                                  
 batch_normalization_1663 (Batc  (None, 109, 109, 64  256        ['conv2d_1663[0][0]']            
 hNormalization)                )                                                  

학습
================


In [None]:
#학습부분
print(f'Train Data:{NUM_TRAIN}')
print(f'Valid Data:{NUM_VALID}')
print(f'Epochs:{NUM_OF_EPOCHS}')
print(f'Batch Size:{BATCH_SIZE}')
print(f'Num Class:{len(class_name)}')
print(f'Model Name:{model.name}')
print(f'Loss:{model.loss}')
print(f'Dataset Name:{data_dir.split("/")[-1]}')
print(f'I/O Image Size (H * W):{IMAGE_HEIGHT} * {IMAGE_WIDTH}')

cp_dir = f'{data_dir}/{model.name}.h5'

mc = tf.keras.callbacks.ModelCheckpoint(filepath=cp_dir, 
                                        save_best_only=True,
                                        verbose=1
                                        )

history = model.fit(train_dataset,
                    steps_per_epoch=EPOCH_STEP_TRAIN,
                    validation_data=valid_dataset,
                    validation_steps=EPOCH_STEP_VALID,
                    epochs=NUM_OF_EPOCHS,
                    callbacks=[mc]
                    )

In [None]:
save_dir = f'{data_dir}/model/{model.name}-H{IMAGE_HEIGHT}-W{IMAGE_WIDTH}-{BATCH_SIZE}-globalavgpool'

if os.path.exists(save_dir) == False:
  os.makedirs(save_dir)

new_model_num = 1
while (os.path.exists(f'{save_dir}/Model{new_model_num}.h5')):
  new_model_num += 1

model.save(f'{save_dir}/Model{new_model_num}.h5')
if os.path.exists(cp_dir):
  shutil.move(cp_dir,f'{save_dir}/Best{new_model_num}.h5')
display_history(history.history,save_dir,new_model_num,True,True,True)

In [None]:
display_test(test,len(test))

In [None]:
display_dataset(train_dataset,100,True)

모델 불러오기
===========================

In [None]:
# 이전에 컴파일 한 모델 불러오기
load_dir = f'{data_dir}/model/ResNet50-F64-C10-H224-W224-16-globalavgpool'
model_num = 1

sel_best = False
model_name = ''

if sel_best == True: 
  model_name = 'Best' 
else :
  model_name = 'Model'


if os.path.exists(f'{load_dir}/{model_name}{model_num}.h5'):
  model = tf.keras.models.load_model(f'{load_dir}/{model_name}{model_num}.h5')
else :
  print('모델이 없습니다.')

if os.path.exists(f'{load_dir}/History{model_num}.csv'):
  history = pd.read_csv(f'{load_dir}/History{model_num}.csv')
  display_history(history,load_dir,model_num,True,False,True) 
else :
  print("학습과정에 대한 정보가 없습니다.")