# 라이브러리 불러오기

In [None]:
import cv2
import numpy as np
import pandas as pd
from google.colab import drive
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# 드라이브 마운트
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 전역변수
IMG_SIZE = 224
MAX_SEQ_LENGTH = 20
BATCH_SIZE = 32

# 데이터 전처리

In [None]:
# 데이터 불러오기
train_df = pd.read_csv('/content/drive/MyDrive/모듈프로젝트4_3조/16.모듈프로젝트04/workspace/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/모듈프로젝트4_3조/16.모듈프로젝트04/workspace/test.csv')

In [None]:
# 라벨 인코딩 : 'label' 컬럼 따로 만들지 않고 'tag'에 바로 대입함
# {CricketShot: 0, Punch: 1, TennisSwing: 2}
encoder = LabelEncoder()
train_df['tag'] = encoder.fit_transform(train_df['tag'])
test_df['tag'] = encoder.transform(test_df['tag'])

In [None]:
# 비디오를 구성하는 이미지의 가운데 부분을 리턴
def crop_center_square(frame):
    y, x = frame.shape[:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]

In [None]:
# 비디오 파일을 읽어서 각 프레임을 이미지로 변환해서 리턴
def load_video(path, max_frames=MAX_SEQ_LENGTH, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = list()
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)
            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)

In [None]:
# 모든 비디오 파일의 이미지와 종류를 리턴
def prepare_all_videos(video_name, label, root_dir):
    num_samples = len(video_name)
    video_paths = video_name.values.tolist()
    labels = label.values.reshape(-1, 1)

    x = np.zeros(shape=(num_samples * MAX_SEQ_LENGTH, IMG_SIZE, IMG_SIZE, 3), dtype='float32')
    y = np.zeros(shape=(num_samples * MAX_SEQ_LENGTH), dtype='float32')

    index = 0
    for video_index, video_path in enumerate(video_paths):
        print(f'\rprepare all videos : {video_index + 1} / {num_samples}', end='')
        frames = load_video(root_dir + video_path)
        for frame in frames:
            x[index] = np.array(frame, dtype='float32')
            y[index] = np.array(labels[video_index], dtype='float32')
            index += 1
    print()

    return x, to_categorical(y)

In [None]:
# train 및 test 데이터셋 생성
x_train, y_train = prepare_all_videos(train_df['video_name'], train_df['tag'], '/content/drive/MyDrive/모듈프로젝트4_3조/16.모듈프로젝트04/workspace/train/')
x_test, y_test = prepare_all_videos(test_df['video_name'], test_df['tag'], '/content/drive/MyDrive/모듈프로젝트4_3조/16.모듈프로젝트04/workspace/test/')

prepare all videos : 356 / 356
prepare all videos : 137 / 137


# 데이터 분석
데이터 전처리 부분이 중복되어 base_model, vgg16_model, resnet50_model을 같이 썼습니다.

## 공통 부분

In [None]:
# 이미지 데이터 생성
train_image_data_generator = ImageDataGenerator(
	horizontal_flip=True,
    rescale=1/255
)

test_image_data_generator = ImageDataGenerator(
    rescale=1/255
)

In [None]:
# 학습하여 나온 결과가 개선되지 않으면 학습 중단
early_stopping = EarlyStopping(
    monitor='loss',               # 무엇을 감시하고 있을지
    min_delta=1e-4,               # 개선이 되는 것으로 보는 최소값, 이 값보다 작으면 개선이 없는 것
    patience=5,                   # patience의 epochs만큼 진행해도 개선되지 않으면 중단
)

## 베이스 모델

In [None]:
# 기본 모델 정의 (base_model)
base_model = Sequential([
    Conv2D(32, kernel_size=(3, 3), input_shape=(IMG_SIZE, IMG_SIZE, 3), activation='relu', padding='SAME'),
    MaxPooling2D(pool_size=2, padding='SAME'),
    Conv2D(64, kernel_size=(3, 3), activation='relu', padding='SAME'),
    MaxPooling2D(pool_size=2, padding='SAME'),
    Conv2D(128, kernel_size=(3, 3), activation='relu', padding='SAME'),
    MaxPooling2D(pool_size=2, padding='SAME'),
    Dropout(0.5),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.8),
    Dense(3, activation='softmax')
])

base_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])

base_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 56, 56, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 28, 28, 128)       0         
_________________________________________________________________
dropout (Dropout)            (None, 28, 28, 128)       0

In [None]:
# 베이스 모델 학습
base_model.fit(
    train_image_data_generator.flow(x_train, y_train, batch_size=BATCH_SIZE),
    epochs=50,
    callbacks=[early_stopping],
    steps_per_epoch=len(x_train) // BATCH_SIZE,
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
 15/222 [=>............................] - ETA: 11:22 - loss: 0.0043 - acc: 1.0000

In [None]:
# 베이스 모델 평가
base_model.evaluate(
    test_image_data_generator.flow(x_test, y_test, batch_size=BATCH_SIZE)
)

In [None]:
base_model.save('/content/drive/MyDrive/모듈프로젝트4_3조/김남준/models/base_model.h5')
del base_model

## VGG16 모델
형태는 베이스 모델과 동일합니다.

In [None]:
# VGG16 모델 정의 (vgg16_model)
# 참고 : https://eremo2002.tistory.com/57?category=779320
vgg16_model = Sequential([
    VGG16(weights='imagenet', include_top=True, input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    Flatten(),
    Dense(4096, activation='relu'),
    Dense(2048, activation='relu'),
    Dense(1024, activation='relu'),
    Dense(3, activation='softmax')
])

vgg16_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])

vgg16_model.summary()

In [None]:
vgg16_model.fit(
    train_image_data_generator.flow(x_train, y_train, batch_size=BATCH_SIZE),
    epochs=50,
    callbacks=[early_stopping],
    steps_per_epoch=len(x_train) // BATCH_SIZE,
)

In [None]:
vgg16_model.evaluate(
    test_image_data_generator.flow(x_test, y_test, batch_size=BATCH_SIZE)
)

In [None]:
vgg16_model.save('/content/drive/MyDrive/모듈프로젝트4_3조/김남준/models/vgg16_model_model.h5')
del vgg16_model

## ResNet50 모델
형태는 베이스 모델과 동일합니다.

In [None]:
# ResNet50 모델 정의 (resnet50_model)
# 참고 : https://brillante-scene.tistory.com/94
resnet50_model = ResNet50(include_top=True, weights=None, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling=max, classes=3)

resnet50_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])

resnet50_model.summary()

In [None]:
resnet50_model.fit(
    train_image_data_generator.flow(x_train, y_train, batch_size=BATCH_SIZE),
    epochs=50,
    callbacks=[early_stopping],
    steps_per_epoch=len(x_train) // BATCH_SIZE,
)

In [None]:
resnet50_model.evaluate(
    test_image_data_generator.flow(x_test, y_test, batch_size=BATCH_SIZE)
)

In [None]:
resnet50_model.save('/content/drive/MyDrive/모듈프로젝트4_3조/김남준/models/resnet50_model.h5')
del resnet50_model