In [7]:
import numpy as np
import tensorflow as tf

In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv2D, Bidirectional
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Reshape, MaxPooling2D
from tensorflow.keras.applications import VGG16

## Data Load

### 1. generate train, validation

### 1. CNN + LSTM

In [4]:
def build_CNN_LSTM(input_shape=(30,20,64,64,3)):
    BZ = input_shape[0]
    frame = input_shape[1]
    model = tf.keras.Sequential()
    
    
    # cnn
    model.add(Conv2D(32, 
                        kernel_size=(5,5),
                        strides=(1,1),
                        activation='relu',
                        input_shape=input_shape))
    model.add(Conv2D(64, 
                        kernel_size=(5,5),
                        strides=(1,1),
                        activation='relu'))
    model.add(Conv2D(128, 
                        kernel_size=(5,5),
                        strides=(1,1),
                        activation='relu'))

    # reshape(flatten 대신)
    model.add(Reshape((frame, -1)))
    
    # lstm
    model.add(LSTM(256, activation = 'relu', return_sequences = False))

    # dense
    model.add(Dense(units=16, activation='softmax'))
              
    # argMax는 fit 후에 적용할 것
    return model

In [None]:
input_shape = (30, 20, 64, 64, 3)
model = build_CNN_LSTM(input_shape)
model.summary()

### 2. Deep Layered CNN + LSTM

In [3]:
# cnn 2개 추가
# lstm 양방향
# cnn 레이어 후 드롭 아웃, 배치 정규화
# 드롭 아웃 확률 0.2
# 맥스 풀링 2x2 -> cnn 사이 보폭 2?
def build_Deep_CNN_LSTM(input_shape=(150,20,64,64,3)):
    BZ = input_shape[0]
    frame = input_shape[1]
    model = tf.keras.Sequential()
    
    # cnn + max pooling + drop out + batch nomalization
    for i in range(3) :
        model.add(Conv2D(filters=3,
                            kernel_size=(5,5),
                            strides=(1,1),
                            activation='relu',
                            input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        
    # bidirectional lstm
    model.add(Bidirectional(LSTM(256, activation = 'relu', return_sequences = True),
                           merge_mode = 'concat'))
    
    # reshape(flatten 대신)
    model.add(Reshape((frame, -1)))

    # dense
    model.add(tf.keras.layers.Dense(units=16, activation='softmax'))
    
    return model

In [10]:
input_shape = (30, 20, 64, 64, 3)
model = build_Deep_CNN_LSTM(input_shape)
model.summary()

ValueError: Input 0 of layer max_pooling2d is incompatible with the layer: expected ndim=4, found ndim=6. Full shape received: (None, 30, 20, 60, 60, 3)

### 3. ImageNet Pretrained VGG-16 Features + LSTM

In [4]:
def build_Frozen_VGG_LSTM(input_shape=(150,20,64,64,3)):
    model = tf.keras.Sequential()
    
    # vgg16
    vgg16 = VGG16(weights='imagenet',
                        include_top=False,
                        input_shape=input_shape)
    vgg16.trainable = False
    model.add(vgg16)
    
    # bidirectional lstm
    model.add(Bidirectional(LSTM(256, activation = 'relu', return_sequences = True),
                           merge_mode = 'concat'))
    
    # reshape(flatten 대신)
    model.add(Reshape((frame, -1)))

    # dense
    model.add(tf.keras.layers.Dense(units=10, activation='softmax'))
    
    return model

### 4. Fine-tunes VGG-16 + LSTM

In [5]:
# 마지막 컨볼루션 블록(512 3x3 커널이 있는 3개의 cnn) 고정 해제하고 양방향 LSTM과 dense 레이어와 함께 훈련
# 이미지의 더 복잡한 특징 캡처 의도
def build_FineTuned_VGG_LSTM(input_shape=(150,20,64,64,3)):
    model = tf.keras.Sequential()
    
    # vgg16
    vgg16 = VGG16(weights='imagenet',
                        include_top=False,
                        input_shape=input_shape)
    vgg16.trainable = False
    # Fine tuning을 위해 일부 레이어의 Freeze 해제
    set_trainable = False
    for layer in vgg16.layers:
        if layer.name == 'block5_conv1':
            set_trainable = True
        if layer.name == 'block5_conv2':
            set_trainable = True
        if layer.name == 'block5_conv3':
            set_trainable = True
        if set_trainable:
            layer.trainable = True
        else:
            layer.trainable = False
    model.add(vgg16)
    
    # bidirectional lstm
    model.add(Bidirectional(LSTM(256, activation = 'relu', return_sequences = True),
                           merge_mode = 'concat'))
    
    # reshape(flatten 대신)
    model.add(Reshape((frame, -1)))

    # dense
    model.add(tf.keras.layers.Dense(units=10, activation='softmax'))
    
    return model

In [None]:
if __name__ == '__main__':
    