# 첫번째 모델
### CNN+LSTM
- input shape: (image 갯수, frame 수, image크기)

In [1]:
import keras
from keras import models, layers
import tensorflow as tf
import numpy as np

In [2]:
class CNN_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(CNN_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.conv1 = layers.Conv2D(32, kernel_size=(5,5), activation= 'relu', input_shape = input_shape)
        self.conv2 = layers.Conv2D(64, kernel_size=(5,5), activation= 'relu')
        self.conv3 = layers.Conv2D(128, kernel_size=(5,5), activation= 'relu')
        self.lstm = layers.LSTM(256, return_sequences = False)
        #self.lstm2 = layers.LSTM(128, return_sequences = True)
        #self.lstm3= layers.LSTM(256, return_sequences = False)
        self.dense = layers.Dense(16, activation = 'softmax')
        #self.max_pool= layers.MaxPooling2D(pool_size= (2,2))
        
        #self.flatten = layers.Flatten()
    
    def call(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = tf.reshape(x,(self.BZ, self.frame, -1))
        x = self.lstm(x)
        x = self.dense(x)
        return x

In [3]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = CNN_LSTM(input_shape, 16)
y = model(x)
label = []
for i in range(len(y)):
    label.append(np.argmax(y[i]))
print(len(label))
print(label)

32
[1, 4, 13, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 13, 1, 1, 1, 1, 1, 1, 4]


# 두번째 모델
### Deep Layered CNN+LSTM

In [4]:
class deep_CNN_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(deep_CNN_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.conv1 = layers.Conv2D(32, kernel_size=(5,5), activation= 'relu', input_shape = input_shape)
        self.conv2 = layers.Conv2D(64, kernel_size=(5,5), activation= 'relu')
        self.conv3 = layers.Conv2D(128, kernel_size=(5,5), activation= 'relu')
        #self.lstm = layers.LSTM(256, return_sequences = False)
        self.bilstm = layers.Bidirectional(layers.LSTM(256), merge_mode = 'concat')
        #self.lstm2 = layers.LSTM(128, return_sequences = True)
        #self.lstm3= layers.LSTM(256, return_sequences = False)
        self.dense = layers.Dense(16, activation = 'softmax')
        self.max_pool= layers.MaxPooling2D(pool_size= (2,2))
        self.dropout = layers.Dropout(0.2)
        #self.flatten = layers.Flatten()
    
    def call(self, input):
        fm = []
        for x in input:
            x = self.conv1(x)
            x = self.dropout(self.max_pool(x))
            x = self.conv2(x)
            x = self.dropout(self.max_pool(x))
            x = self.conv3(x)
            x = self.dropout(self.max_pool(x))
            fm.append(x)
        x = tf.reshape(fm,(self.BZ, self.frame, -1))
        print(x.shape)
            
        x = self.bilstm(x)
        print(x.shape)
        x = self.dense(x)
        return x

In [5]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = deep_CNN_LSTM(input_shape, 16)
y = model(x)
label = []
for i in range(len(y)):
    label.append(np.argmax(y[i]))
print(len(label))
print(label)

(32, 20, 2048)
(32, 512)
32
[6, 4, 4, 6, 6, 6, 6, 6, 6, 4, 4, 6, 6, 6, 6, 6, 6, 4, 4, 6, 4, 6, 4, 4, 6, 6, 6, 6, 6, 4, 6, 4]


# 세번째 모델
### Pretrained VGG-16(ImageNet) + LSTM

In [6]:
from keras.applications import VGG16

class VGG_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(VGG_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.vgg = VGG16(weights = 'imagenet', include_top = False, input_shape = (input_shape[2], input_shape[3], input_shape[4]))
        self.vgg.trainable = False
            
        #self.lstm = layers.LSTM(256, return_sequences = False)
        self.bilstm = layers.Bidirectional(layers.LSTM(256), merge_mode = 'concat')
        #self.lstm2 = layers.LSTM(128, return_sequences = True)
        #self.lstm3= layers.LSTM(256, return_sequences = False)
        self.dense = layers.Dense(16, activation = 'softmax')
        self.max_pool= layers.MaxPooling2D(pool_size= (2,2))
        self.dropout = layers.Dropout(0.2)
        #self.flatten = layers.Flatten()
    
    def call(self, input):
        print(input.shape)
        fm = []
        for x in input:
            x = self.vgg(x)
            #print(x.shape)
            fm.append(x)
        x = tf.reshape(fm,(self.BZ, self.frame, -1)) # (32, 20, 2048)
        x = self.bilstm(x)
        x = self.dense(x)
        return x

In [7]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = VGG_LSTM(input_shape, 16)
y = model(x)
label = []
for i in range(len(y)):
    label.append(np.argmax(y[i]))
print(len(label))
print(label)

(32, 20, 64, 64, 3)
32
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]


# 네번째 모델
### Fine-Tuned VGG-16 + LSTM

In [8]:
from keras.applications import VGG16

class VGG_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(VGG_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.vgg = VGG16(weights = 'imagenet', include_top = False, input_shape = (input_shape[2], input_shape[3], input_shape[4]))
        self.vgg.trainable = True
            
        #self.lstm = layers.LSTM(256, return_sequences = False)
        self.bilstm = layers.Bidirectional(layers.LSTM(256), merge_mode = 'concat')
        #self.lstm2 = layers.LSTM(128, return_sequences = True)
        #self.lstm3= layers.LSTM(256, return_sequences = False)
        self.dense = layers.Dense(16, activation = 'softmax')
        self.max_pool= layers.MaxPooling2D(pool_size= (2,2))
        self.dropout = layers.Dropout(0.2)
        #self.flatten = layers.Flatten()
    
    def call(self, input):
        print(input.shape)
        fm = []
        for x in input:
            x = self.vgg(x)
            #print(x.shape)
            fm.append(x)
        x = tf.reshape(fm,(self.BZ, self.frame, -1)) # (32, 20, 2048)
        x = self.bilstm(x)
        x = self.dense(x)
        return x

In [9]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = VGG_LSTM(input_shape, 16)
y = model(x)
label = []
for i in range(len(y)):
    label.append(np.argmax(y[i]))
print(len(label))
print(label)

(32, 20, 64, 64, 3)
32
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]


## TRAIN

In [10]:
BZ = 32
frame_size = 20
input_shape = (BZ, frame_size, 64, 64, 3)
num_classes = 16
model = CNN_LSTM(input_shape, num_classes)

loss = keras.losses.CategoricalCrossentropy(from_logits = True)
optimizer = keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [11]:
def train(epochs):
    for epoch in range(epochs):
        train_loss.reset_states()
        train_accuracy.reset_states()
        test_loss.reset_states()
        test_accuracy.reset_states()
        
        for images, label in train_dataset:
            y_hat = model(images)
            y = []
            for i in range(len(y_hat)):
                y.append(np.argmax(y_hat[i]))

In [12]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

## dataset
x = np.random.random((32, 20, 64, 64, 3))
y = np.random.randint((32,))
#model.fit(x,epochs = 3)