In [12]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import csv
import copy
import mediapipe as mp

##### Hyperparameter

In [13]:
mp_holistic = mp.solutions.holistic 
mp_drawing = mp.solutions.drawing_utils
width = 640
height = 480
# kích thước của open Cv vốn mặc định rồi
# cap = cv2.VideoCapture(0)

In [14]:
def mediapipe_detection(image, model):
    # từ image, model dự đoán trả về kết quả (định dạng mặc định) 
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [15]:
last = None
def update_mpresult(res,results):
    c = 0
    if results.pose_landmarks:
        for p in results.pose_landmarks.landmark:
            p.x = res[c][0]
            p.y = res[c][1]
            c+=1
    else:
        for _ in range(33):
            c+=1
    if results.left_hand_landmarks:
        for p in results.left_hand_landmarks.landmark:
            p.x = res[c][0]
            p.y = res[c][1]
            c+=1
    else:
        if last and last.left_hand_landmarks: results.left_hand_landmarks = copy.deepcopy(last.left_hand_landmarks)
        for _ in range(21):
            c+=1
    if results.right_hand_landmarks:
        for p in results.right_hand_landmarks.landmark:
            p.x = res[c][0]
            p.y = res[c][1]
            c+=1
    else:
        if last and last.right_hand_landmarks: results.right_hand_landmarks = copy.deepcopy(last.right_hand_landmarks)
        for _ in range(21):
            c+=1
    return results

def normalize_keypoint(res,img=None):
    #normalize keypoint
    x1,y1,x2,y2 = res[11][0]*width,res[11][1]*height,res[12][0]*width,res[12][1]*height
    try:
        cv2.circle(img,(int(x1),int(y1)),4,(0,255,255),-1)
        cv2.circle(img,(int(x2),int(y2)),4,(0,255,255),-1)
    except:
        # print("No img found")
        pass
    dis = np.sqrt((x1-x2)**2+(y1-y2)**2)
    x_cen = (res[11][0]+res[12][0])/2
    y_cen = (res[11][1]+res[12][1])/2
    vector = [0.5-x_cen,0.5-y_cen]
    scale = (200*width/640)/dis
    for i in range(len(res)):
        if res[i][0]==0 and res[i][1]==0:
            continue
        res[i][0] = vector[0]+res[i][0]
        res[i][1] = vector[1]+res[i][1]
        res[i][0] = 0.5+(res[i][0]-0.5)*scale
        res[i][1] = 0.5+(res[i][1]-0.5)*scale
    return res


def extract_keypoint(results):
    global last
    res = []
    if results.pose_landmarks:
        for p in results.pose_landmarks.landmark:
            res.append(np.array([p.x,p.y,p.z,p.visibility]))
    else:
        for _ in range(33):
            res.append(np.array([0,0,0,0]))
    #--------------
    if results.left_hand_landmarks:
        for p in results.left_hand_landmarks.landmark:
            res.append(np.array([p.x,p.y,p.z]))
    elif last!= None and last.left_hand_landmarks:
        for p in last.left_hand_landmarks.landmark:
            res.append(np.array([p.x,p.y,p.z]))
    else:
        for _ in range(21):
            res.append(np.array([0,0,0]))
    #---------------
    if results.right_hand_landmarks:
        for p in results.right_hand_landmarks.landmark:
            res.append(np.array([p.x,p.y,p.z]))
    elif last!=None and last.right_hand_landmarks:
        for p in last.right_hand_landmarks.landmark:
            res.append(np.array([p.x,p.y,p.z]))
    else:
        for _ in range(21):
            res.append(np.array([0,0,0]))
    return res

def extract_keypoints_flatten(result,img = None):
    #đây là hàm chính thức
    res = extract_keypoint(result)
    res = normalize_keypoint(res,img)
    update_mpresult(res,result)
    return np.concatenate([x for x in res])

def numpy_to_filecsv(data,filename):
    with open(filename,"w",newline="") as csvfile:
        writer = csv.writer(csvfile,delimiter=",")
        writer.writerows(data.tolist())

def filecsv_to_numpy(filename,data):
    pass

In [16]:
# testing mediapipe in a frame
# with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#     ret, frame = cap.read()
#     print(ret)
#     frame,result = mediapipe_detection(frame,holistic)
#     res = extract_keypoints_flatten(result)
#     draw_landmarks(frame,result)
#     frame = cv2.flip(frame,1)
#     cap.release()
#     img = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
#     plt.imshow(img)
#     print(res.shape)
#     cv2.destroyAllWindows()

In [17]:

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections


#### Mặc kệ đoạn code ở trên, bắt đầu thu từ đây

- Điều chỉnh lựa chọn folder thu:
    - Lỗi lỏ của python nên điền số tạm nhé
    - Chạy 2 cell bên dưới để biết nên điền số nào
- Khi cửa sổ hiện lên, bắt đầu thu theo quy tắc là:
    - Chữ starting collect sẽ đơ trong 1.5s, vào tư thế chuẩn bị thu
    - Quá trình thu diễn ra trong 30 frame, điều chỉnh một động tác sao cho nó khớp đúng 30 frame
    - Nhấn q để huỷ quá trình thu


In [18]:

with open("group_tu.txt","r",encoding='utf-8') as ip:
    all_actions = [x.split("\n")[0].strip() for x in ip.readlines()]
all_actions = np.array(all_actions)

In [19]:
s = ""
for i,j in enumerate(all_actions):
    s += f"{i}. {j}   "
    if i%5==4:
        print(s)
        s=""

0. Ban ngày   1. Ban đêm   2. Bố   3. Cười   4. Cảm ơn   
5. Khóc   6. Mẹ   7. Sách   8. Xin chào   9. Ăn   
10. Viết   11. Xem   12. Xin lỗi   13. Đi học   14. Đi   
15. Chơi   16. Tôi   17. Bạn   18. Mỗi ngày   19. Làm việc   


In [20]:
def collected_actions(x,y):
    #hàm để chọn tiện ra một số label để thu, để khi dừng đang thu giữa chừng thu lại cho tiện
    #quay từ label X đến hết label Y
    get = False
    res = []
    for i,name in enumerate(all_actions):
        if (i==x):
            get = True
        if get:
            res.append(name)
        if (i==y):
            break
    return res

In [60]:
DATA_PATH = os.path.join('Pending upload\\AnhNguyen_data') 
Video_Data_Path = os.path.join('Pending upload\\AnhNguyen_data_video')
start_vid_num = 120 #Chỉ số video bắt đầu
no_sequences = 20 #số video cho một nhãn
sequence_length = 30 # số frame cho một video
delay_ms = 2000 # khoảng thời gian tương đối giữa hai vid liên tiếp

with open("group_tu2.txt","r",encoding='utf-8') as ip:
    all_actions = [x.split("\n")[0].strip() for x in ip.readlines()]
all_actions = np.array(all_actions)
cap = cv2.VideoCapture(0)
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
with mp_holistic.Holistic(min_detection_confidence=0.3, min_tracking_confidence=0.3) as holistic:
    print("starting... in 2s")
    cv2.waitKey(2000)
    stop = False
    actions = collected_actions(0,0) 
    print(actions)
    for action in actions:
        if stop: break
        folder_path = os.path.join(DATA_PATH,action)
        vid_path = os.path.join(Video_Data_Path,action)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        if not os.path.exists(vid_path):
            os.makedirs(vid_path)
        for sequence in range(no_sequences):
            sequence+=start_vid_num
            if stop: break
            #lặp trên từng mẫu, từng video
            #với mỗi sequence, output ra 1 file csv tại data/action/sequence
            file_path = os.path.join(folder_path,str(sequence)+".csv")
            if os.path.exists(file_path):
                continue
            video_res = cv2.VideoWriter(os.path.join(vid_path,str(sequence)+".mp4"),  
                         cv2.VideoWriter_fourcc(*'MP4V'), 
                         10.0, (width,height)) 
            seq_list = []
            for frame_num in range(sequence_length+1):
                ret, frame = cap.read()
                if frame_num == 0:
                    frame = cv2.flip(frame,1) 
                    cv2.putText(frame, f'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(frame, f'{action} video Number {sequence}', (30,30), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2, cv2.LINE_AA)
                    print('qCollecting frames for {} Video Number {}'.format(action, sequence))
                    cv2.imshow('OpenCV Feed', frame)
                    if cv2.waitKey(delay_ms) & 0xFF == ord('q'):
                        stop = True
                        video_res.release()
                        break
                    continue
                video_res.write(frame)
                image, results = mediapipe_detection(frame, holistic)
                res = extract_keypoints_flatten(results)
                draw_landmarks(image, results)
                image = cv2.flip(image,1)
                if frame_num!=0:
                    cv2.putText(image, f'{action} video Number {sequence}', 
                                (30,30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2, cv2.LINE_AA)
                    cv2.imshow('OpenCV Feed', image)
                
                seq_list.append(res)
                last = copy.deepcopy(results)
                if frame_num== sequence_length and stop == False:
                    numpy_to_filecsv(np.array(seq_list),file_path)
                    seq_list = []

                # Nhấn giữ Q để dừng, nhớ xoá video cuối cùng.
                if cv2.waitKey(20) & 0xFF == ord('q'):
                    stop = True
                    video_res.release() 
                    break
            last = None
            video_res.release()   
    cap.release()
    cv2.destroyAllWindows()

starting... in 2s
['Đi học']
qCollecting frames for Đi học Video Number 60
qCollecting frames for Đi học Video Number 61
qCollecting frames for Đi học Video Number 62
qCollecting frames for Đi học Video Number 63
qCollecting frames for Đi học Video Number 64
qCollecting frames for Đi học Video Number 65
qCollecting frames for Đi học Video Number 66
qCollecting frames for Đi học Video Number 67
qCollecting frames for Đi học Video Number 68
qCollecting frames for Đi học Video Number 69
qCollecting frames for Đi học Video Number 70
qCollecting frames for Đi học Video Number 71
qCollecting frames for Đi học Video Number 72
qCollecting frames for Đi học Video Number 73
qCollecting frames for Đi học Video Number 74
qCollecting frames for Đi học Video Number 75
qCollecting frames for Đi học Video Number 76
qCollecting frames for Đi học Video Number 77
qCollecting frames for Đi học Video Number 78
qCollecting frames for Đi học Video Number 79
qCollecting frames for Đi học