In [None]:
import tensorflow as tf
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from tensorflow import keras
from keras.models import Input, Model
from keras.layers import TimeDistributed, LSTM
from keras.layers import ConvLSTM2D
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, LeakyReLU, BatchNormalization
from keras.layers import Dense, Flatten, GlobalMaxPooling2D
from keras.layers import MaxPooling3D
from keras.layers import concatenate
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [None]:
frames = 15
Width = 256
Height = 256

In [None]:
def load_video_names(path):
  videos = []
  labels = []
  videos_val = []
  labels_val = []
  amount = []

  for i in os.walk(path): # 디렉토리 내의 파일 개수
    amount.append(len(i[2]))
  amount.pop(0)

  amount = [i * 0.7 for i in amount]

  for category, amount in zip(os.listdir(path),amount):
    i = 0
    for video in os.listdir(path+"/"+category):
      if i <= amount:
          videos.append(path+"/"+category+"/"+video)
          labels.append(category)
      else:
          videos_val.append(path+"/"+category+"/"+video)
          labels_val.append(category)
      i += 1
  return videos, labels, videos_val, labels_val

#normalize pixel(-1, 1)
def preprocess(frame):
  frame = cv2.resize(frame, (Width, Height))
  frame = frame - 127.5
  frame = frame / 127.5
  return frame


def load_video(video_path):
  video_frames = []
  cap = cv2.VideoCapture(video_path)
  while True:
    ret, frame = cap.read()
    if ret == True:
      video_frames.append(preprocess(frame))  #프레임 다 저장
    else:
      break
  cap.release()
  video_frames = select_frame(video_frames)
  if len(video_frames) != frames:
    print('short_video ', video_path, len(video_frames))

  return np.array(video_frames)


def select_frame(video_frames):
  selected_frames = []
  if len(video_frames) > frames:
    fn = len(video_frames)//frames
    f_num=0
    for f in video_frames:
      if len(selected_frames) < frames:
        if f_num % fn == 0:
          selected_frames.append(f)
      f_num += 1
  else:
    selected_frames = video_frames
  return selected_frames


def create_dataset(videos, labels):
  videos = videos.numpy()
  videos = videos.astype(str)
  X = load_video(videos[0])
    
  return X, labels

In [None]:
def onehot_encode_label(labels):
  labels_counts = np.unique(labels, return_counts = True)
  le = LabelEncoder()

  encoded_labels = le.fit_transform(labels)
  encoded_labels = np.reshape(encoded_labels, (-1, 1))

  encoder = OneHotEncoder()
  encoded_labels = encoder.fit_transform(encoded_labels)

  encoded_labels = encoded_labels.toarray()

  return encoded_labels

In [None]:
def res_block(model, filters):
  start_block = model
  model = Conv2D(filters = filters, kernel_size = 3, padding = 'same')(model)
  model = BatchNormalization(momentum = 0.9)(model)
  model = LeakyReLU(0.2)(model)
  return concatenate([start_block, model])


def create_model():

  model = tf.keras.models.Sequential()
  input_layer = Input(shape = (frames, Width, Height, 3))

  model = ConvLSTM2D(32, 3, padding = 'same', return_sequences= False)(input_layer)
  model = BatchNormalization(momentum = 0.9)(model)
  model = LeakyReLU(0.2)(model)

  filters = 64

  for _ in range(6):
    model = res_block(model, filters)
    try:
      model = MaxPooling3D((2, 2, 2))(model)
    except:
      model = MaxPooling2D((2, 2))(model)
    if filters < 512 :
      filters *= 2

  classes = len(np.unique(labels))
  model = Flatten()(model)

  model = Dense(classes, activation = 'softmax')(model)

  model = Model(input_layer, model)

  model.compile(optimizer = Adam(learning_rate = 1e-4), loss = 'categorical_crossentropy', metrics = ['accuracy'])
  #model.compile(optimizer = Adam(learning_rate = 1e-4), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
  return model

In [None]:
videos, labels, videos_val, labels_val = load_video_names('/content/drive/MyDrive/HAR/AR_CNN_1/hmdb51')

In [None]:
classifier = create_model()

In [None]:
classifier.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 15, 256, 256 0                                            
__________________________________________________________________________________________________
conv_lst_m2d_2 (ConvLSTM2D)     (None, 256, 256, 32) 40448       input_3[0][0]                    
__________________________________________________________________________________________________
batch_normalization_14 (BatchNo (None, 256, 256, 32) 128         conv_lst_m2d_2[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_14 (LeakyReLU)      (None, 256, 256, 32) 0           batch_normalization_14[0][0]     
____________________________________________________________________________________________

In [None]:
%cd /content/drive/MyDrive/HAR/AR_CNN_1

/content/drive/MyDrive/HAR/AR_CNN_1


In [None]:
model_name = 'AR_1.h5'

In [None]:
checkpoint_cb=keras.callbacks.ModelCheckpoint(
    model_name,
    monitor='val_accuracy', verbose=1, save_best_only=True, save_freq= 'epoch')

early_stopping_cb=keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

In [None]:
def _fixup_shape(images, labels):
    images.set_shape([None, 15, 256, 256, 3])
    labels.set_shape([None, 12])
    return images, labels

In [None]:
def reader_dataset(videos, labels, repeat = 1, n_parse_threads = 5, batch_size = 5, shuffle_buffer_size = 100):
  y = onehot_encode_label(labels)
  print(y.shape)
  dataset = tf.data.Dataset.from_tensor_slices((videos, y)).repeat(repeat)
  dataset = dataset.shuffle(shuffle_buffer_size)
  dataset = dataset.map(lambda path, label : tf.py_function(create_dataset, [[path], label], [tf.float64, tf.float64]), num_parallel_calls = n_parse_threads)

  return dataset.batch(batch_size, drop_remainder = True).map(_fixup_shape).prefetch(1)

In [None]:
train_set = reader_dataset(videos, labels)
valid_set = reader_dataset(videos_val, labels_val)

(1428, 12)
(601, 12)


In [None]:
print(train_set)

<PrefetchDataset shapes: ((None, 15, 256, 256, 3), (None, 12)), types: (tf.float64, tf.float64)>


In [None]:
try:
  classifier.load_weights(model_name)
  print("success")
except:
  pass

In [None]:
history = classifier.fit(train_set, epochs=30, validation_data = valid_set, callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.21667, saving model to AR_1.h5
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.21667 to 0.27333, saving model to AR_1.h5
Epoch 3/30

Epoch 00003: val_accuracy did not improve from 0.27333
Epoch 4/30

Epoch 00004: val_accuracy did not improve from 0.27333
Epoch 5/30

Epoch 00005: val_accuracy did not improve from 0.27333
Epoch 6/30

Epoch 00006: val_accuracy did not improve from 0.27333
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.27333
Epoch 8/30

Epoch 00008: val_accuracy did not improve from 0.27333
Epoch 9/30

Epoch 00009: val_accuracy did not improve from 0.27333
Epoch 10/30

Epoch 00010: val_accuracy did not improve from 0.27333
Epoch 11/30

Epoch 00011: val_accuracy did not improve from 0.27333
Epoch 12/30

Epoch 00012: val_accuracy did not improve from 0.27333
Epoch 13/30

Epoch 00013: val_accuracy did not improve from 0.27333
Epoch 14/30

Epoch 00014: val_accuracy did not improve from 0.27333
Ep

In [None]:
classifier.save_weights(model_name)

test

In [None]:
dataset_X = tf.data.Dataset.from_tensor_slices((videos, y)).repeat(2)
dataset_X = dataset_X.map(lambda path, label : tf.py_function(create_dataset, [[path], label], [tf.float64, tf.float64]))

y = onehot_encode_label(labels)
dataset_y = tf.data.Dataset.from_tensor_slices(y).repeat(2)

for i in dataset_X:
  print(i)

(<tf.Tensor: shape=(15, 256, 256, 3), dtype=float64, numpy=
array([[[[-0.29411765,  0.01176471, -0.03529412],
         [-0.29411765,  0.01176471, -0.03529412],
         [-0.29411765,  0.01176471, -0.03529412],
         ...,
         [-0.39607843, -0.09019608, -0.1372549 ],
         [-0.39607843, -0.09019608, -0.1372549 ],
         [-0.39607843, -0.09019608, -0.1372549 ]],

        [[-0.28627451,  0.01960784, -0.02745098],
         [-0.28627451,  0.01960784, -0.02745098],
         [-0.28627451,  0.01960784, -0.02745098],
         ...,
         [-0.39607843, -0.09019608, -0.1372549 ],
         [-0.39607843, -0.09019608, -0.1372549 ],
         [-0.39607843, -0.09019608, -0.1372549 ]],

        [[-0.29411765,  0.01176471, -0.03529412],
         [-0.29411765,  0.01176471, -0.03529412],
         [-0.29411765,  0.01176471, -0.03529412],
         ...,
         [-0.39607843, -0.09019608, -0.1372549 ],
         [-0.39607843, -0.09019608, -0.1372549 ],
         [-0.39607843, -0.09019608, -0.13725

In [None]:
videos = videos[:3]
labels = labels[:3]

In [None]:
y = onehot_encode_label(labels)

In [None]:
videos

['/content/drive/MyDrive/HAR/AR_CNN_1/hmdb51/draw_sword/19th_Century_Cavalry_Sabre_Draw_draw_sword_u_nm_np1_fr_bad_0.avi',
 '/content/drive/MyDrive/HAR/AR_CNN_1/hmdb51/draw_sword/19th_Century_Cavalry_Sabre_Draw_draw_sword_u_nm_np1_fr_bad_1.avi',
 '/content/drive/MyDrive/HAR/AR_CNN_1/hmdb51/draw_sword/A_point_about_drawing_swords_draw_sword_u_nm_np1_fr_med_0.avi']

In [None]:
dataset = tf.data.Dataset.list_files(videos)