## MediaPipe

- holistic 객체의 
- face_landmarks = 얼굴 랜드마크
- left_hand_landmarks = 왼쪽 손 랜드마크
- right_hand_landmarks = 오른쪽 손 랜드마크
- pose_landmarks = pose 랜드마크
- [mediapipe-documentation]https://google.github.io/mediapipe/getting_started/python.html



 ## 1. Install dependencies

In [4]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

## 2. Keypoints using MP Holistic

In [7]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [8]:
mp_drawing.draw_landmarks

<function mediapipe.python.solutions.drawing_utils.draw_landmarks(image: numpy.ndarray, landmark_list: mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList, connections: Union[List[Tuple[int, int]], NoneType] = None, landmark_drawing_spec: Union[mediapipe.python.solutions.drawing_utils.DrawingSpec, Mapping[int, mediapipe.python.solutions.drawing_utils.DrawingSpec]] = DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2), connection_drawing_spec: Union[mediapipe.python.solutions.drawing_utils.DrawingSpec, Mapping[Tuple[int, int], mediapipe.python.solutions.drawing_utils.DrawingSpec]] = DrawingSpec(color=(224, 224, 224), thickness=2, circle_radius=2))>

In [9]:
# mediapipe 감지 함수
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [10]:
# landmark 특징점 그리는 함수
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [11]:
# landmark 특징점의 선 굵기나, 색상을 변경하는 함수.
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                                # 색상 변경. 선의 굵기나, 색상을 변경한다.
                                mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                            ) 

    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                # 색상 변경. 선의 굵기나, 색상을 변경한다.
                                mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    )

    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                # 색상 변경. 선의 굵기나, 색상을 변경한다.
                                mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
    ) 

    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                # 색상 변경. 선의 굵기나, 색상을 변경한다.
                                mp_drawing.DrawingSpec(color=(245 ,117, 66), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
    ) 

In [None]:
file_num = '0569'
ear_format = f'data/ear/KETI_SL_000000{file_num}.avi'

In [None]:
'''
    비디오 프레임 측정 
'''
cap = cv2.VideoCapture(ear_format)
length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(length)

In [None]:
# 웹캠을 실행시켜 landmark를 감지하는 블록.

cap = cv2.VideoCapture(ear_format)
length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Frame 길이 측정. 
print(f'length : {length}')
frame_jump = round(length / 30)
print(frame_jump)

frame_cnt = 0 
# frame 측정.
saved_frames = 0 
prev = None
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
    
        # frame을 읽는다.
        ret, frame = cap.read()

        # 읽어올 거 없으면 알아서 끄세요.
        if not ret:
            while saved_frames < 30:
                print(f"new frame {saved_frames} added")
                saved_frames += 1
            break
        
        frame_cnt += 1
        prev = frame
        
        if saved_frames < 30 and frame_cnt % frame_jump == 0:
            print(f"saved {saved_frames}")
            saved_frames += 1

            # Make detections   
            image, results = mediapipe_detection(frame, holistic)
        
            # Draw landmarks
            draw_styled_landmarks(image, results)

            # video show
            cv2.imshow("WebCam", image)

        # 끄고싶을때 사용하는 키
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
            
        
    cap.release()
    cv2.destroyAllWindows()

In [None]:
len(results.face_landmarks.landmark)

In [None]:
results

In [None]:
draw_landmarks(prev, results)

In [None]:
prev.shape

In [None]:
plt.imshow(cv2.cvtColor(prev, cv2.COLOR_BGR2RGB))

## 3. Extract Keypoint Values

In [None]:
results.pose_landmarks.landmark[0].visibility

In [None]:
pose = []
for res in results.pose_landmarks.landmark:
    # randmark의 좌표 추출
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [None]:
# pose의 landmark 배열을 일차원으로 펴서 반환한다. pose landmark가 없을 시 동일한 shape를 가진 영행렬을 반환한다.
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)

# face의 landmark 배열을 일차원으로 펴서 반환. face landmark가 없을 시 동일한 shape를 가진 영행렬을 반환한다.
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)

# 왼손 랜드마크 배열 left_hand_landmarks가 없을 경우에는 동일한 shape의 영행렬을 반환한다.
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)

# 오른손 랜드마크 배열 left_hand_landmarks가 없을 경우에는 동일한 shape의 영행렬을 반환한다.
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [None]:
print(f'right hand shape : {rh.shape}')
print(f'lefh hand shape : {lh.shape}')
print(f'pose shape : {pose.shape}')
print(f'face shape : {face.shape}')
print(f'all mediapipe shape : {126 + 132 + 1404}')

In [None]:
# face의 landmark에서 x, y, z의 총수를 곱한 것이 face_lanemark를 1차원 행렬로 폈을때의 shape와 같다.
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() \
if results.face_landmarks \
else np.zeros(1404)


In [12]:
# keypoints 추출 함수
def extract_keypoints(results):
    # pose의 landmark 배열을 일차원으로 펴서 반환한다. pose landmark가 없을 시 동일한 shape를 가진 영행렬을 반환한다.
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 4) # member : x,y,z,visibility

    # face의 landmark 배열을 일차원으로 펴서 반환. face landmark가 없을 시 동일한 shape를 가진 영행렬을 반환한다.
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468 * 3) # member : x,y,z

    # 왼손 랜드마크 배열 left_hand_landmarks가 없을 경우에는 동일한 shape의 영행렬을 반환한다.
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3) # member : x,y,z

    # 오른손 랜드마크 배열 left_hand_landmarks가 없을 경우에는 동일한 shape의 영행렬을 반환한다.
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3) # member : x,y,z
    return np.concatenate([pose, face, lh, rh])

In [None]:
# skelleton의 keypoints 추출
extract_keypoints(results).shape

In [None]:
extract_keypoints(results)[:-10]

## 4. Setup Folders for Collection

In [13]:
# 추출할 numpy array 타입의 데이터 PATH
DATA_PATH = os.path.join("Suhwa_Data")

# 특정 행동 들을 감지하려는 작업 (hello, thanks, iloveyou)
actions = np.array(['ear', 'bridge', 'snow'])

# 비디오 숫자.
no_sequences = 20

# 비디오 내의 전체 프레임
sequence_length = 30

In [14]:
ear_path = "data/ear"
bridge_path = "data/bridge"
snow_path = "data/snow"

In [None]:
# hello
## 0
## 1
## 2
## ...
## 29

# thanks

# I love you

In [15]:
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass


In [None]:
result_test = extract_keypoints(results)

In [None]:
result_test

In [None]:
# 각프레임 데이터를 numpy 데이터 형태로 저장.
'''
현재 hello 단어의 프레임이 0부터 ~ 29 까지 인데
예시로 hello의 각 프레임마다 numpy 형태로 데이터가 저장되어서
해당 단어를 인식한다.    
'''
np.save('0', result_test)

In [None]:
# 저장한 numpy 형태의 배열 파일을 불러온다.
np_result = np.load('0.npy')

In [None]:
np_result.shape

## 5. Collect Keypoint Sequences

In [None]:
file_num = '0569'
ear_format = f'data/ear/KETI_SL_000000{file_num}.avi'

In [16]:
ear_path = "data/ear/"
bridge_path = "data/bridge/"
snow_path = "data/snow/"

In [17]:
ear_list = os.listdir(ear_path)
bridge_list = os.listdir(bridge_path)
snow_list = os.listdir(snow_path)

In [18]:
# file_path/
ear_file = []
snow_file = []
bridge_file = []


# 데이터 목록 긁어오기
# os.path.join()을 쓰세요.
for i in range(len(ear_list)):

    earfile_path = ear_path + ear_list[i]
    bridgefile_path = bridge_path + bridge_list[i]
    snowfile_path = snow_path + snow_list[i]

    ear_file.append(earfile_path)
    snow_file.append(snowfile_path)
    bridge_file.append(bridgefile_path)

In [19]:
actions

array(['ear', 'bridge', 'snow'], dtype='<U6')

In [20]:
video_file = dict()
for action in actions:
    if action == 'ear':
        video_file[action] = ear_file
    elif action =='snow':
        video_file[action] = snow_file
    elif action == "bridge":
        video_file[action] = bridge_file

In [21]:
video_file

{'ear': ['data/ear/KETI_SL_0000000150.avi',
  'data/ear/KETI_SL_0000000569.avi',
  'data/ear/KETI_SL_0000000983.avi',
  'data/ear/KETI_SL_0000001402.avi',
  'data/ear/KETI_SL_0000001730.avi',
  'data/ear/KETI_SL_0000002149.avi',
  'data/ear/KETI_SL_0000002664.avi',
  'data/ear/KETI_SL_0000003083.avi',
  'data/ear/KETI_SL_0000003497.avi',
  'data/ear/KETI_SL_0000003918.avi',
  'data/ear/KETI_SL_0000004338.avi',
  'data/ear/KETI_SL_0000004760.avi',
  'data/ear/KETI_SL_0000005176.avi',
  'data/ear/KETI_SL_0000005592.avi',
  'data/ear/KETI_SL_0000006016.avi',
  'data/ear/KETI_SL_0000006435.avi',
  'data/ear/KETI_SL_0000006853.avi',
  'data/ear/KETI_SL_0000007272.avi',
  'data/ear/KETI_SL_0000007689.avi',
  'data/ear/KETI_SL_0000008108.avi'],
 'bridge': ['data/bridge/KETI_SL_0000000178.avi',
  'data/bridge/KETI_SL_0000000597.avi',
  'data/bridge/KETI_SL_0000001012.avi',
  'data/bridge/KETI_SL_0000001431.avi',
  'data/bridge/KETI_SL_0000001762.avi',
  'data/bridge/KETI_SL_0000002181.avi',
  

In [22]:
# 단어 행동 데이터를 수집해서 각 시퀀스를 numpy 배열 형태로 저장한다.

# Loop through actions ex) ['ear', 'snow', 'leg']
for action in actions:
    
    for sequence in range(no_sequences): # 20개 루프
        cap = cv2.VideoCapture(video_file[action][sequence])
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Frame 길이 측정. 
        frame_jump = round(length / 30)

        frame_cnt = 0 
        # frame 측정.
        saved_frames = 0 # (saved_frames).npy 파일로 저장.
        prev = None

        # Set mediapipe model
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

            # Loop through sequences aka videos ex) no_sequences == 20 비디오 개수
                
            print(f'sequence : {sequence}')
            # Loop through video length aka sequence length
            '''
                해야 되는 것
                원래 video frame을 잘라서. 
                30 프레임 형태로 만들어줘야함.
            '''
            while cap.isOpened():
                
                # frame을 읽는다.
                ret, frame = cap.read()

                
                # 읽어올 거 없으면 알아서 끄세요.
                if not ret:
                    break
                
                frame_cnt += 1
                
                if saved_frames < 30 and frame_cnt % frame_jump == 0:

                    # Make detections
                    image, results = mediapipe_detection(frame, holistic)

                    prev = results

                    # Draw landmarks
                    draw_styled_landmarks(image, results)

                    '''
                        내보낼 keypoints 들
                        1. 각 프레임 마다 keypoints를 추출한다.
                        2. 각 프레임 마다 DATA_PATH에 맞게 해당 keypoints를 numpy 데이터 형태로 npy_path에 저장한다.
                        3. 해당 numpy 데이터 형태를 저장한다.
                    '''
                    keypoints = extract_keypoints(results)
                    npy_path = os.path.join(DATA_PATH, action, str(sequence), str(saved_frames))

                    print(f'npy_path : {npy_path}')

                    np.save(npy_path, keypoints)

                    print(f"saved {saved_frames}")
                    saved_frames += 1
                    
                    # 스크린에 보여준다.
                    cv2.imshow('OpenCV Feed', image)
                    
                # # 끄고싶을때 사용하는 키
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                
            while saved_frames < 30:
                keypoints = extract_keypoints(prev)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(saved_frames))
                print(f"new frame {saved_frames} added")
                np.save(npy_path, keypoints)
                saved_frames += 1

        cap.release()
        cv2.destroyAllWindows()

sequence : 0
npy_path : Suhwa_Data\ear\0\0
saved 0
npy_path : Suhwa_Data\ear\0\1
saved 1
npy_path : Suhwa_Data\ear\0\2
saved 2
npy_path : Suhwa_Data\ear\0\3
saved 3
npy_path : Suhwa_Data\ear\0\4
saved 4
npy_path : Suhwa_Data\ear\0\5
saved 5
npy_path : Suhwa_Data\ear\0\6
saved 6
npy_path : Suhwa_Data\ear\0\7
saved 7
npy_path : Suhwa_Data\ear\0\8
saved 8
npy_path : Suhwa_Data\ear\0\9
saved 9
npy_path : Suhwa_Data\ear\0\10
saved 10
npy_path : Suhwa_Data\ear\0\11
saved 11
npy_path : Suhwa_Data\ear\0\12
saved 12
npy_path : Suhwa_Data\ear\0\13
saved 13
npy_path : Suhwa_Data\ear\0\14
saved 14
npy_path : Suhwa_Data\ear\0\15
saved 15
npy_path : Suhwa_Data\ear\0\16
saved 16
npy_path : Suhwa_Data\ear\0\17
saved 17
npy_path : Suhwa_Data\ear\0\18
saved 18
npy_path : Suhwa_Data\ear\0\19
saved 19
npy_path : Suhwa_Data\ear\0\20
saved 20
npy_path : Suhwa_Data\ear\0\21
saved 21
npy_path : Suhwa_Data\ear\0\22
saved 22
npy_path : Suhwa_Data\ear\0\23
saved 23
npy_path : Suhwa_Data\ear\0\24
saved 24
npy_pat

In [None]:
cap.release()
cv2.destroyAllWindows()

## 6. Preprocess Data and Create Labels and Features

In [23]:
# train, test 데이터 분할
from sklearn.model_selection import train_test_split

'''
클래스 벡터(정수)를 이진 클래스 행렬로 변환한다.
ex) 클래스 (0, 1, 2)인 label 데이터가 있다고 가정.
'0' 클래스인 경우 => [1, 0 ,0]
'1' 클래스인 경우 => [0, 1, 0]
'2' 클래스인 경우 => [0, 0, 1]
'''
from tensorflow.keras.utils import to_categorical

In [24]:
# actions 단어들에 labeling
label_map = {label:num for num, label in enumerate(actions)}

In [25]:
'''
단어마다 30개의 프레임이 존재하는 
총 90 개의 넘파이 배열에서
keypoints를 나타내는 총 1662개의 특정 값들이 필요함.
'''
label_map

{'ear': 0, 'bridge': 1, 'snow': 2}

In [26]:
# sequences, labels 배열
sequences, labels = [], [] 

for action in actions:
    # 각 프레임 마다.(이 예제에서는 30)
    for sequence in range(no_sequences):
        window = []

        # sequence의 길이마다(이 예제에서는 30) 
        for frame_num in range(sequence_length):

            '''
            각 sequence의 numpy 형태 배열을 불러온다.
            ex) hello > 0 > 0.npy
            window.append(hello > 0 > 0.npy)
            '''
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            # 해당 numpy 배열을 window 배열에 추가한다.
            window.append(res)

        '''
            각 frame의 배열 모음을 추가한다.
            ex) hello > 0(numpy 배열 모음 0 ~ 30.npy)

            frame 배열에 라벨링을 추가해준다.
            ex) sequences = [
                [hello.0], [hello.1], [hello.2] ....
                [thanks.0], [thanks.1], [thanks.2] ...
            ]
            
            labels_map = { 'hello': 0, 'thanks': 1, 'iloveyou': 2}
            labels =[
                0, 0, 0, 0, 0 ....
                1, 1, 1, 1, 1 ....
                2, 2, 2, 2, 2 ....
            ]
        '''
        sequences.append(window)
        labels.append(label_map[action])

In [27]:
'''
1. sequences의 배열에는 각 단어마다 30개의 sequence
총 90개의 sequence를 가진다.

2. 각 시퀀스(프레임)마다 30번의 keypoints 추출 작업을 거친다.
3. 총 1662개의 key 포인트를 가지는 numpy 배열을 가진다.
sequences.shape = (90, 30, 1662)

labels_map = { 'hello': 0, 'thanks': 1, 'iloveyou': 2}
            labels =[
                0, 0, 0, 0, 0 ....
                1, 1, 1, 1, 1 ....
                2, 2, 2, 2, 2 ....
            ]
'''
print(f'sequences의 shape : {np.array(sequences).shape}')
print(f'sequences의 길이 : {len(sequences)}')
print(f'labels의 shape : {np.array(labels).shape}')
print(f'labels의 길이 : {len(labels)}')

sequences의 shape : (60, 30, 1662)
sequences의 길이 : 60
labels의 shape : (60,)
labels의 길이 : 60


In [28]:
X = np.array(sequences)

In [29]:
X.shape

(60, 30, 1662)

In [30]:
y = to_categorical(labels).astype(int)

In [31]:
y

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0,

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.05)

In [33]:
print(f'X_train : {X_train.shape}')
print(f'train_label : {y_train.shape}')
print(f'X_test : {X_test.shape}')
print(f'test_label : {y_test.shape}')

X_train : (57, 30, 1662)
train_label : (57, 3)
X_test : (3, 30, 1662)
test_label : (3, 3)


## 7. Build and Train LSTM Neural Network

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard # 로깅

In [35]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [50]:
'''
    LSTM Neural network
'''
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662))) # (frame , keypoints)
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))

'''
    One-Hot-Encoding
    ex) res = [0.2, 0.7, 0.1]
    가장 확률 높은 인덱스 번호를 추출 
        => (모델이 예측한 확률이 가장 높은 인덱스 추출.)
        => 예측. (방금한 행동은 아마 1번 일거야)
    np.argmax(res) == 1 
    actions[np.argmax(res)] == 'thanks'
    너가 방금한 행동은 'thanks'야
'''
model.add(Dense(actions.shape[0], activation='softmax'))

In [37]:
X.shape

(60, 30, 1662)

In [38]:
res = [.2, 0.7, 0.1]

In [39]:
np.argmax(res)

1

In [40]:
actions[np.argmax(res)]

'bridge'

In [51]:
'''
다중 클래스 분류 시 일반적인 손실함수 : categorical_crossentropy
다중 클래스 분류 시 일반적인 정확도 측정 함수 : categorical_accuracy
'''
model.compile(
    optimizer='adam', 
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy']
)

In [42]:
'''
    Logs\train 폴더에 들어간뒤
    tensorboard --logdir=. 을 입력하면 
    Tensorbaord화면이 나온다.
    ex) 
    1step. cd suhwa_dataset\Logs\train
    2step. tesorboard --logdir=.
'''

'\n    Logs\train 폴더에 들어간뒤\n    tensorboard --logdir=. 을 입력하면 \n    Tensorbaord화면이 나온다.\n    ex) \n    1step. cd suhwa_dataset\\Logs\train\n    2step. tesorboard --logdir=.\n'

In [43]:
model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<tensorflow.python.keras.callbacks.History at 0x1f5bf81d488>

In [44]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 64)            442112    
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 128)           98816     
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 99        
Total params: 596,675
Trainable params: 596,675
Non-trainable params: 0
__________________________________________________

## 8. Make Predictions

In [45]:
res = model.predict(X_test)

In [46]:
# 예측.
actions[np.argmax(res[0])]

'snow'

In [47]:
actions[np.argmax(y_test[0])]

'snow'

## 9. Save Weights

In [48]:
# model 저장
model.save('complete.h5')

In [49]:
del model

In [52]:
# 모델 불러오기 load_model
'''
    1. 모델 재구축 실행 => model build
    2. 모델 컴파일 => model compile
    3. 모델 불러오기 => lodad_weights
'''
model.load_weights('complete.h5')

## 10. Evaluation using Confusion Matrix and Accuracy

In [53]:
'''
    Confusion Matrix를 통한 정확도 검증
'''
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [54]:
yhat = model.predict(X_train)

In [55]:
ytrue = np.argmax(y_train, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [56]:
multilabel_confusion_matrix(ytrue,yhat)

array([[[38,  0],
        [ 0, 19]],

       [[37,  0],
        [ 0, 20]],

       [[39,  0],
        [ 0, 18]]], dtype=int64)

In [57]:
accuracy_score(ytrue, yhat)

1.0

# 11. Test in Real Time

In [58]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [None]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

In [62]:
# 1. New detection variables
sequence = []
sentence = []
threshold = 0.8

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
#         sequence.insert(0,keypoints)
#         sequence = sequence[:30]
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            # numpy 배열 차원 추가
            # 이 예제에서는 (30, 1662) => (1, 30, 1662)
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            
            
            #3. Viz logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [None]:
res[np.argmax(res)] > threshold

In [None]:
# 모델의 예측 규격에 맞게 데이터를 캡슐화
np.expand_dims(X_test[0].shape, axis=0)

In [None]:
model.predict(np.expand_dims(X_test[0], axis=0))