In [1]:
import torch
import mediapipe as mp
import cv2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import time
import os
from ultralytics import YOLO

In [2]:
torch.cuda.is_available()

True

In [3]:
model = YOLO('./YOLOV8-anchor-colab/runs/detect/train/weights/best.pt')  # load a custom model

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [8]:
def ret_landmark(img):
    with mp_holistic.Holistic(model_complexity = 0,
                          min_detection_confidence = 0.6,
                          min_tracking_confidence = 0.6,
                          refine_face_landmarks = True,
                          smooth_landmarks = True) as holistic:
        try:
            image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = holistic.process(image)
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        except Exception as e:
            print(e)
            pass

    # cv2_imshow(image)
    face_arr = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros(468*3)
    return face_arr
    # print(results.face_landmarks)

In [39]:
def gen_ldms_data(video_path,threshold=0.76):
    cap = cv2.VideoCapture(video_path)
    face_seq = []
    frame_count = 0;
    while(cap.isOpened()):
        ret, frame = cap.read()
        
        results = model(frame)[0]

        for result in results.boxes.data.tolist():
            if result[4] > threshold:
                face_arr = ret_landmark(frame)
                face_seq.append(face_arr)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    face_seq = np.array(face_seq)
    face_seq.shape
    np.save(f'output/mesh_sequence_{video_path}.npy', face_seq)
    return face_seq


In [53]:
def anchor_detect(video_path,video_path_out,threshold=0.76):
    cap = cv2.VideoCapture(video_path)
    time_stamp = []
    frame_count = 0;
    det_frame = 0;
    face_seq=[]
    fps = cap.get(cv2.CAP_PROP_FPS)
    # out = cv2.VideoWriter(video_path_out, cv2.VideoWriter_fourcc(*'MP4V'), int(fps), (640,360))


    while(cap.isOpened() and frame_count <= 275):

        frame_count += 1
        ret, frame = cap.read()
        # W,H, _ = frame.shape
        results = model(frame)[0]
        print("current frame : ", frame_count)
        for result in results.boxes.data.tolist():
            if result[4] > threshold:
                sec = frame_count / fps
                det_frame+=1
                face_arr = ret_landmark(frame)
                if face_arr.shape == (1404,):
                    print("Mistake facemesh: ", frame_count, det_frame)
                face_seq.append(face_arr)
                cv2.imwrite(f'./frames/img_{det_frame}-{frame_count}.jpg',frame)

                # out.write(frame)

                time_stamp.append(sec)

    cap.release()
    # out.release()
    return face_seq, time_stamp


In [13]:
vid_path = "./videos/prinsa.mp4"
mesh = prinsa_detect(video_path=vid_path)

current frame :  1
current frame :  2
current frame :  3
current frame :  4
current frame :  5
current frame :  6
current frame :  7
current frame :  8
current frame :  9
current frame :  10
current frame :  11
current frame :  12
current frame :  13
current frame :  14
current frame :  15
current frame :  16
current frame :  17
current frame :  18
current frame :  19
current frame :  20
current frame :  21
current frame :  22
current frame :  23
current frame :  24
current frame :  25
current frame :  26
current frame :  27
current frame :  28
current frame :  29
current frame :  30
current frame :  31
current frame :  32
current frame :  33
current frame :  34
current frame :  35
current frame :  36
current frame :  37
current frame :  38
current frame :  39
current frame :  40
current frame :  41
current frame :  42
current frame :  43
current frame :  44
current frame :  45
current frame :  46
current frame :  47
current frame :  48
current frame :  49
current frame :  50
current f

In [10]:
video_path = './videos/sajjan_1.mp4'
mesh = sajjan_detect(video_path=video_path)

current frame :  1
current frame :  2
current frame :  3
current frame :  4
current frame :  5
current frame :  6
current frame :  7
current frame :  8
current frame :  9
current frame :  10
current frame :  11
current frame :  12
current frame :  13
current frame :  14
current frame :  15
current frame :  16
current frame :  17
current frame :  18
current frame :  19
current frame :  20
current frame :  21
current frame :  22
current frame :  23
current frame :  24
current frame :  25
current frame :  26
current frame :  27
current frame :  28
current frame :  29
current frame :  30
current frame :  31
current frame :  32
current frame :  33
current frame :  34
current frame :  35
current frame :  36
current frame :  37
current frame :  38
current frame :  39
current frame :  40
current frame :  41
current frame :  42
current frame :  43
current frame :  44
current frame :  45
current frame :  46
current frame :  47
current frame :  48
current frame :  49
current frame :  50
current f

In [9]:
def sajjan_detect(video_path,threshold=0.76):
    cap = cv2.VideoCapture(video_path)
    time_stamp = []
    frame_count = 0;
    det_frame = 0;
    face_seq=[]
    fps = cap.get(cv2.CAP_PROP_FPS)
    # out = cv2.VideoWriter(video_path_out, cv2.VideoWriter_fourcc(*'MP4V'), int(fps), (640,360))

    binary_frame = False
    while(cap.isOpened() and frame_count<=1500):

        frame_count += 1
        if (frame_count%2==0):
            binary_frame = True
        else:
            binary_frame = False
        ret, frame = cap.read()
        # W,H, _ = frame.shape
        # results = model(frame)[0]
        print("current frame : ", frame_count)
        # sec = frame_count / fps
        # det_frame+=1
        if(binary_frame):
            face_arr = ret_landmark(frame)
            face_seq.append(face_arr)

    cap.release()
    # out.release()
    return face_seq


In [11]:
def prinsa_detect(video_path,threshold=0.76):
    cap = cv2.VideoCapture(video_path)
    time_stamp = []
    frame_count = 0;
    det_frame = 0;
    face_seq=[]
    fps = cap.get(cv2.CAP_PROP_FPS)
    # out = cv2.VideoWriter(video_path_out, cv2.VideoWriter_fourcc(*'MP4V'), int(fps), (640,360))


    while(cap.isOpened() and frame_count <= 1500):

        frame_count += 1
        ret, frame = cap.read()
        # W,H, _ = frame.shape
        # results = model(frame)[0]
        print("current frame : ", frame_count)
        # sec = frame_count / fps
        # det_frame+=1
        face_arr = ret_landmark(frame)
        face_seq.append(face_arr)


    cap.release()
    # out.release()
    return face_seq


In [11]:
npy_out = np.array(mesh)

In [12]:
npy_out.shape

(750, 478, 3)

In [9]:
np.save('./npy_data/mesh_sajjan.npy',npy_out)

In [35]:
os.system('ffmpeg -r 25 -i ./frames/img_%01d.jpg -vcodec mpeg4 -y -vb 40M anchor_video.mp4')


0

In [26]:
print(stamps)

[2.8, 2.84, 2.88, 3.56, 3.6, 3.64, 3.68, 3.72, 3.88, 3.92, 3.96, 4.0, 4.04, 4.08, 4.12, 4.16, 4.2, 4.24, 4.28, 4.32, 4.36, 4.4, 4.44, 4.48, 4.52, 4.56, 4.6, 4.64, 4.68, 4.76, 4.8, 4.84, 4.88, 4.92, 4.96, 5.0, 5.04, 5.08, 5.12, 5.16, 5.2, 5.24, 5.28, 5.32, 5.36, 5.4, 5.44, 5.48, 5.56, 5.6, 5.64, 5.68, 5.72, 5.76, 5.8, 5.84, 5.88, 5.92, 5.96, 6.0, 6.04, 6.08, 6.12, 6.16, 6.2, 6.24, 6.28, 6.32, 6.36, 6.4, 6.44, 6.48, 6.52, 6.56, 6.6, 6.64, 6.68, 6.72, 6.76, 6.8, 6.84, 6.88, 6.92, 6.96, 7.0, 7.04, 7.8, 7.84, 7.88, 7.92, 7.96, 8.0, 8.04, 8.08, 8.12, 8.16, 8.2, 8.24, 8.28, 8.32, 8.36, 8.4, 8.44, 8.48, 8.52]


In [None]:
video_path = 'yesma path rakha'
mesh_arr = gen_ldms_data(video_path=video_path, threshold=0.78)

In [4]:
VIDEOS_DIR = os.path.join('.', 'videos')

video_path = "D:/AI-Anchor/videodataset/English.mp4"
video_path_out = '{}_out.mp4'.format(video_path)

cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
H, W, _ = frame.shape
out = cv2.VideoWriter(video_path_out, cv2.VideoWriter_fourcc(*'MP4V'), int(cap.get(cv2.CAP_PROP_FPS)), (W, H))


threshold = 0.74

while ret:

    results = model(frame)[0]

    for result in results.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = result

        if score > threshold:
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 4)
            cv2.putText(frame, results.names[int(class_id)].upper(), (int(x1), int(y1 - 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)

    out.write(frame)
    ret, frame = cap.read()

cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 1 anchor, 46.9ms
Speed: 6.5ms preprocess, 46.9ms inference, 172.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 anchor, 43.8ms
Speed: 15.6ms preprocess, 43.8ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 anchor, 46.9ms
Speed: 0.0ms preprocess, 46.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 anchor, 47.3ms
Speed: 0.0ms preprocess, 47.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 anchor, 31.3ms
Speed: 0.0ms preprocess, 31.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 anchor, 7.0ms
Speed: 0.0ms preprocess, 7.0ms inference, 15.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 anchor, 15.6ms
Speed: 0.0ms preprocess, 15.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 anchor, 15.6ms
Speed: 0.0ms preprocess, 15.6ms inference, 0.0ms postprocess per image at shape (1, 3, 