# 첫번째 모델
### CNN+LSTM
- input shape: (image 갯수, frame 수, image크기)

In [2]:
import keras
from keras import models, layers, activations
import tensorflow as tf
import numpy as np

In [35]:
class CNN_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(CNN_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.conv1 = layers.Conv2D(32, kernel_size=(5,5), activation= 'relu', input_shape = input_shape)
        self.conv2 = layers.Conv2D(64, kernel_size=(5,5), activation= 'relu')
        self.conv3 = layers.Conv2D(128, kernel_size=(5,5), activation= 'relu')
        self.lstm = layers.LSTM(256, return_sequences = False)
        #self.lstm2 = layers.LSTM(128, return_sequences = True)
        #self.lstm3= layers.LSTM(256, return_sequences = False)
        self.dense = layers.Dense(16, activation = 'softmax')
        
        #self.compile(loss = keras.losses.categorical_crossentropy, optimizer = 'adam', metircs = ['accuracy'])
        
    def call(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = tf.reshape(x,(self.BZ, self.frame, -1))
        x = self.lstm(x)
        x = self.dense(x)
        print(type(x))
        label = []
        for i  in range(len(x)):
            label.append(np.argmax(x[i]))
        #print(label)
        return tf.convert_to_tensor(label)

In [36]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = CNN_LSTM(input_shape, 16)
y = model(x)
# label = []
# for i in range(len(y)):
#     label.append(np.argmax(y[i]))
# print(len(label))
# print(label)
print(y)

<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([7 3 7 3 7 3 7 9 3 3 7 7 7 7 7 3 7 3 3 7 7 7 7 3 3 7 7 7 6 3 3 3], shape=(32,), dtype=int32)


# 두번째 모델
### Deep Layered CNN+LSTM

In [34]:
class deep_CNN_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(deep_CNN_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.conv1 = layers.Conv2D(32, kernel_size=(5,5), activation = 'relu', input_shape = input_shape)
        self.conv2 = layers.Conv2D(64, kernel_size=(5,5), activation = 'relu')
        self.conv3 = layers.Conv2D(128, kernel_size=(5,5), activation = 'relu')
        
        self.bilstm = layers.Bidirectional(layers.LSTM(256), merge_mode = 'concat')
        self.dense = layers.Dense(num_classes, activation = 'softmax')
        self.max_pool= layers.MaxPooling2D(pool_size= (2,2))
        self.dropout = layers.Dropout(0.2)
    
    def call(self, input):
        fm = []
        for x in input:
            x = self.max_pool(self.dropout(layers.BatchNormalization()(self.conv1(x))))
            x = self.max_pool(self.dropout(layers.BatchNormalization()(self.conv2(x))))
            x = self.max_pool(self.dropout(layers.BatchNormalization()(self.conv3(x))))
            fm.append(x)
        x = tf.reshape(fm,(self.BZ, self.frame, -1))
            
        x = self.bilstm(x)
        x = self.dense(x)
        return x

In [60]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = deep_CNN_LSTM(input_shape, 16)
y = model(x)
label = []
for i in range(len(y)):
    label.append(np.argmax(y[i]))
print(len(label))
print(label)

(32, 20, 2048)
(32, 512)
32
[7, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]


# 세번째 모델
### Pretrained VGG-16(ImageNet) + LSTM

In [None]:
from keras.applications import VGG16

class VGG_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(VGG_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.vgg = VGG16(weights = 'imagenet', include_top = False, input_shape = (input_shape[2], input_shape[3], input_shape[4]))
        self.vgg.trainable = False

        self.bilstm = layers.Bidirectional(layers.LSTM(256), merge_mode = 'concat')
        self.dense = layers.Dense(num_classes, activation = 'softmax')
        self.max_pool= layers.MaxPooling2D(pool_size= (2,2))
        self.dropout = layers.Dropout(0.2)
    
    def call(self, input):
        print(input.shape)
        fm = []
        for x in input:
            x = self.vgg(x)
            #print(x.shape)
            fm.append(x)
        x = tf.reshape(fm,(self.BZ, self.frame, -1)) # (32, 20, 2048)
        x = self.bilstm(x)
        x = self.dense(x)
        return x

In [None]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = VGG_LSTM(input_shape, 16)
y = model(x)
label = []
for i in range(len(y)):
    label.append(np.argmax(y[i]))
print(len(label))
print(label)

# 네번째 모델
### Fine-Tuned VGG-16 + LSTM

In [63]:
from keras.applications import VGG16

class VGG_LSTM(keras.Model):
    def __init__(self, input_shape, num_classes):
        super(VGG_LSTM, self).__init__()
        self.BZ = input_shape[0]
        self.frame = input_shape[1]
        
        self.vgg = VGG16(weights = 'imagenet', include_top = False, input_shape = (input_shape[2], input_shape[3], input_shape[4]))
        self.vgg.trainable = False
        
        set_trainable = False
        for layer in self.vgg.layers:
            if layer.name == 'block5_conv1' or layer.name == 'block5_conv2' or layer.name== 'block5_conv3':
                set_trainable = True
            if set_trainable:
                layer.trainable = True
            else:
                layer.trainable = False

        #self.lstm = layers.LSTM(256, return_sequences = False)
        self.bilstm = layers.Bidirectional(layers.LSTM(256), merge_mode = 'concat')
        #self.lstm2 = layers.LSTM(128, return_sequences = True)
        #self.lstm3= layers.LSTM(256, return_sequences = False)
        self.dense = layers.Dense(num_classes, activation = 'softmax')
        self.max_pool= layers.MaxPooling2D(pool_size= (2,2))
        self.dropout = layers.Dropout(0.2)
        #self.flatten = layers.Flatten()
    
    def call(self, input):
        print(input.shape)
        fm = []
        for x in input:
            x = self.vgg(x)
            #print(x.shape)
            fm.append(x)
        x = tf.reshape(fm,(self.BZ, self.frame, -1)) # (32, 20, 2048)
        x = self.bilstm(x)
        x = self.dense(x)
        label = []
        for i  in range(len(x)):
            label.append(np.argmax(x[i]))
        return label

In [64]:
input_shape = (32, 20, 64, 64, 3)
x = tf.random.normal(input_shape)
model = VGG_LSTM(input_shape, 16)
y = model(x)
label = []
for i in range(len(y)):
    label.append(np.argmax(y[i]))
print(len(label))
print(label)

(32, 20, 64, 64, 3)
32
[14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14]


## TRAIN

In [5]:
# parameter 설정
BZ = 32
frame_size = 20
input_shape = (BZ, frame_size, 64, 64, 3)
num_classes = 16

In [6]:
model = CNN_LSTM(input_shape, num_classes)

loss = keras.losses.CategoricalCrossentropy(from_logits = True)
optimizer = keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [8]:
def train(epochs):
    for epoch in range(epochs):
        train_loss.reset_states()
        train_accuracy.reset_states()
        test_loss.reset_states()
        test_accuracy.reset_states()
        
        for images, label in train_dataset:
            y_hat = model(images)
            y = []
            for i in range(len(y_hat)):
                y.append(np.argmax(y_hat[i]))

In [9]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

## dataset
x = np.random.random((32, 20, 64, 64, 3))
y = np.random.randint((32,))
#model.fit(x,epochs = 3)

In [10]:
x_data = np.load('D:/git/LipService/x_save.npy') # x_save.npy
y_data = np.load('D:/git/LipService/y_data.npy') # y_data.npy

In [11]:
x_data.shape

(300, 20, 64, 64, 3)

In [12]:
y_data.shape

(300,)

In [13]:
model = CNN_LSTM(input_shape, num_classes)
y = model(x_data[:32])

In [15]:
y.shape

TensorShape([32, 16])

In [37]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

model.fit(x_data[:32], y_data[:32],epochs = 3)

Epoch 1/3
<class 'tensorflow.python.framework.ops.Tensor'>


NotImplementedError: in user code:

    C:\Users\hwj43\anaconda3\envs\JolpJJang\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    <ipython-input-35-f4ac3cda6c4e>:27 call  *
        label.append(np.argmax(x[i]))
    <__array_function__ internals>:5 argmax  **
        
    C:\Users\hwj43\anaconda3\envs\JolpJJang\lib\site-packages\numpy\core\fromnumeric.py:1188 argmax
        return _wrapfunc(a, 'argmax', axis=axis, out=out)
    C:\Users\hwj43\anaconda3\envs\JolpJJang\lib\site-packages\numpy\core\fromnumeric.py:55 _wrapfunc
        return _wrapit(obj, method, *args, **kwds)
    C:\Users\hwj43\anaconda3\envs\JolpJJang\lib\site-packages\numpy\core\fromnumeric.py:44 _wrapit
        result = getattr(asarray(obj), method)(*args, **kwds)
    C:\Users\hwj43\anaconda3\envs\JolpJJang\lib\site-packages\numpy\core\_asarray.py:83 asarray
        return array(a, dtype, copy=False, order=order)
    C:\Users\hwj43\anaconda3\envs\JolpJJang\lib\site-packages\tensorflow\python\framework\ops.py:852 __array__
        raise NotImplementedError(

    NotImplementedError: Cannot convert a symbolic Tensor (cnn_lstm_8/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported


In [22]:
x_data[:32].shape

(32, 20, 64, 64, 3)

In [23]:
y_data[:32].shape

(32,)