In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import utils
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPool2D, Input, Dense, Flatten, Concatenate
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications.vgg16 import VGG16

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
from IPython.display import Image

import os
import random
from copy import deepcopy
import pickle
import time

In [2]:
np.random.seed(5)

# Load dataset

load_path = 'D:\GH\Audio\dataset\\UrbanSound8K\\audio\\np_data'

'''
loaded = np.load('파일명.npz', allow_pickle=True)
(128, 173, 1)
'''

train_path = os.path.join(load_path, 'fold1train.npy')
test_path = os.path.join(load_path, 'fold1test.npy')

train_data = np.load(train_path, allow_pickle=True)
test_data = np.load(test_path, allow_pickle=True)

x_train = train_data[:, -1]
y_train = train_data[:, 1]
x_test = test_data[:, -1]
y_test = test_data[:, 1]

x_train = np.stack(x_train, axis=0)
x_test = np.stack(x_test, axis=0)

x_train = x_train[...,np.newaxis]
x_test = x_test[...,np.newaxis]
y_train = y_train.reshape((-1,))
y_test = y_test.reshape((-1,))

# 배열의 원소 개수만큼 인덱스 배열을 만든 후
# 무작위로 뒤섞어 줍니다.
idx_train = np.arange(x_train.shape[0])
idx_test = np.arange(x_test.shape[0])
# print(idx)
np.random.shuffle(idx_train)
np.random.shuffle(idx_test)

x_train_shuffle = x_train[idx_train]
y_train_shuffle = y_train[idx_train]
x_test_shuffle = x_test[idx_test]
y_test_shuffle = y_test[idx_test]

# TensorFlow에서 읽을 수 있는 np.astype으로 변경
x_train = np.asarray(x_train_shuffle).astype(np.float64)
y_train = np.asarray(y_train_shuffle).astype(np.int64)
x_test = np.asarray(x_test_shuffle).astype(np.float64)
y_test = np.asarray(y_test_shuffle).astype(np.int64)

print('Train/Test Dataset Shape')
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

Train/Test Dataset Shape
(7859, 128, 173, 1)
(7859,)
(873, 128, 173, 1)
(873,)


In [32]:
IMG_SHAPE = (128, 173) + (3,)
# base_model = VGG16(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')
base_model = tf.keras.applications.resnet.ResNet50(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

In [33]:
sample_arr = [True, False]
bool_arr = np.random.choice(sample_arr, size=len(base_model.layers))
print(bool_arr)

base_model.trainable = True		# resnet 모델 학습동결을 해제한다
for idx, i in enumerate(base_model.layers):	# 143층부터의 학습은 해제상태로 두고, 
    i.trainable = bool_arr[idx]				# 이전까지의 학습은 동결한다.

[False False False False False  True  True  True False  True  True False
  True  True False  True  True  True  True  True False False False False
  True False False False  True  True False False False False False False
  True False  True False False  True  True  True  True  True  True  True
  True False False False False False False  True  True  True  True  True
 False  True  True False  True False False False False False False  True
  True  True  True  True False False False False  True  True  True  True
 False  True  True False False  True  True False  True False  True  True
 False  True  True False False False False  True  True False  True False
 False  True False False False False  True False  True  True  True False
 False  True  True False  True  True False  True  True  True False False
 False False  True  True False False False False  True False  True  True
 False False  True False  True  True False  True False False  True  True
  True False  True  True False  True False  True Fa

In [19]:
# base_model.trainable = False		# resnet 모델의 모든 레이어를 학습동결 한다.
# base_model.trainable = True		  # resnet 모델의 모든 레이어를 학습한다.

# base_model.trainable = True		    # Base Model 학습동결을 해제한다
# for i in base_model.layers[:10]:	# 10층까지 레이어를 불러서, 
#   i.trainable = False				      # 이전까지의 학습은 동결한다.

In [34]:
inputs = Input((128, 173, 1))
first_conv_layer = Conv2D(3, 1, padding='same', activation=None)(inputs)

x = base_model(first_conv_layer, training = False)
x = Flatten()(x)
outputs = Dense(10, activation = 'softmax')(x)

model = tf.keras.Model(inputs, outputs, name="UrbanSound8K__model")

# 'categorical_crossentropy'은 y[0]=[0, 0, 0, 0, 0, 0, 0, 0, 1], y[1, 0, 0, 0, 0, 0, 0, 0, 0]과 같이 one-hot-encoding label일 경우에 사용
model.compile(loss="sparse_categorical_crossentropy", 
optimizer=tf.keras.optimizers.Adam(learning_rate= 0.0001), 
metrics=['accuracy'])

In [35]:
model.summary()

Model: "UrbanSound8K__model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 128, 173, 1)]     0         
                                                                 
 conv2d_5 (Conv2D)           (None, 128, 173, 3)       6         
                                                                 
 resnet50 (Functional)       (None, 4, 6, 2048)        23587712  
                                                                 
 flatten_5 (Flatten)         (None, 49152)             0         
                                                                 
 dense_5 (Dense)             (None, 10)                491530    
                                                                 
Total params: 24,079,248
Trainable params: 17,448,336
Non-trainable params: 6,630,912
_________________________________________________________________


In [36]:
early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
checkpoint_best_path = 'model_Freeze10_checkpoints_best/checkpoint'
checkpoint_best = ModelCheckpoint(filepath=checkpoint_best_path,
                                save_weights_only=True,
                                save_freq='epoch',
                                monitor='val_accuracy',
                                save_best_only=True,
                                verbose=1)

In [37]:
hist = model.fit(x_train, y_train,
                validation_data = (x_test, y_test),
                epochs = 50,
                batch_size = 32,
                verbose = 1,
                callbacks=[checkpoint_best])

Epoch 1/50
Epoch 1: val_accuracy improved from -inf to 0.91180, saving model to model_Freeze10_checkpoints_best\checkpoint
Epoch 2/50
Epoch 2: val_accuracy improved from 0.91180 to 0.96220, saving model to model_Freeze10_checkpoints_best\checkpoint
Epoch 3/50
Epoch 3: val_accuracy did not improve from 0.96220
Epoch 4/50
Epoch 4: val_accuracy improved from 0.96220 to 0.98167, saving model to model_Freeze10_checkpoints_best\checkpoint
Epoch 5/50
Epoch 5: val_accuracy improved from 0.98167 to 0.98282, saving model to model_Freeze10_checkpoints_best\checkpoint
Epoch 6/50
Epoch 6: val_accuracy did not improve from 0.98282
Epoch 7/50

KeyboardInterrupt: 