In [10]:
# TODO : 전체 영상의 한명의 얼굴 위치만 찍기
import cv2
import numpy as np
import csv
import os


def load_csv_data(file_paths):
    all_face_positions = []
    for file_path in file_paths:
        with open(file_path, "r") as csvfile:
            csvreader = csv.reader(csvfile)
            next(csvreader)  # Skip header
            for row in csvreader:
                frame, x, y, w, h, eye_point1, eye_point2 = row
                all_face_positions.append((int(x), int(y), int(w), int(h)))
    return all_face_positions


def draw_face_centers_on_white_background(face_positions, image_size=(1080, 1920)):
    # Create a white background image
    white_background = np.ones((image_size[0], image_size[1], 3), dtype=np.uint8) * 255

    # Define lines based on the example dimensions
    width, height = image_size[1], image_size[0]
    line1_x = int(width * 0.35)
    line2_x = int(width * 0.65)
    line1_y = int(height * 0.25)
    line2_y = int(height * 0.6)

    # Draw lines (blue color #4BEEEE)
    line_color = (235, 64, 52)
    cv2.line(white_background, (line1_x, 0), (line1_x, height), line_color, 2)
    cv2.line(white_background, (line2_x, 0), (line2_x, height), line_color, 2)
    cv2.line(white_background, (0, line1_y), (width, line1_y), line_color, 2)
    cv2.line(white_background, (0, line2_y), (width, line2_y), line_color, 2)

    # Draw face centers (red color #EE4B4B)
    circle_color = (66, 135, 245)
    for x, y, w, h in face_positions:
        if w > 0 and h > 0:  # Only draw for valid positions
            center_x = x + w // 2
            center_y = y + h // 2
            cv2.circle(
                white_background, (center_x, center_y), 5, circle_color, -1
            )  # Draw red circle

    output_image_path = "output_with_face_centers_and_lines.jpg"
    cv2.imwrite(output_image_path, white_background)
    print(f"Image saved to {output_image_path}")


# Example usage
csv_files = [
    "output_ive_baddie_1.csv",
    "output_ive_baddie_2.csv",
    "output_ive_baddie_3.csv",
    "output_ive_baddie_4.csv",
    "output_ive_baddie_5.csv",
    "output_ive_baddie_6.csv",
]  # Replace with your CSV file paths

face_positions = load_csv_data(csv_files)
draw_face_centers_on_white_background(face_positions)

Image saved to output_with_face_centers_and_lines.jpg


In [3]:
# TODO : 전체 영상의 얼굴 포인트 찍기
import cv2
import numpy as np
import os
from tqdm import tqdm
from insightface.app import FaceAnalysis


def process_video(video_path):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"No file found at {video_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise Exception("Failed to open video file.")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps if fps else 0

    app = FaceAnalysis(
        allowed_modules=["detection", "landmark_2d_106"],
        providers=["CUDAExecutionProvider"],
    )
    app.prepare(ctx_id=0, det_size=(640, 640))

    frame_count = 0
    face_positions = []

    with tqdm(total=total_frames // 5, desc=os.path.basename(video_path)) as pbar:
        while frame_count < total_frames:
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1
            if frame_count % 5 != 0:
                continue

            faces = app.get(frame)
            current_frame_positions = []

            for face in faces:
                bbox = face.bbox.astype(int)
                current_frame_positions.append(
                    (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])
                )

            if current_frame_positions:
                face_positions.append(current_frame_positions)
            else:
                face_positions.append([(0, 0, 0, 0)])

            pbar.update(1)

    cap.release()
    return face_positions, fps, total_frames, duration


def load_video_data(video_paths):
    all_face_positions = []
    for video_path in video_paths:
        face_positions, fps, total_frames, duration = process_video(video_path)
        all_face_positions.extend(face_positions)
    return all_face_positions


def draw_face_centers_on_white_background(face_positions, image_size=(1080, 1920)):
    white_background = np.ones((image_size[0], image_size[1], 3), dtype=np.uint8) * 255

    width, height = image_size[1], image_size[0]
    line1_x = int(width * 0.35)
    line2_x = int(width * 0.65)
    line1_y = int(height * 0.25)
    line2_y = int(height * 0.6)

    line_color = (235, 64, 52)
    cv2.line(white_background, (line1_x, 0), (line1_x, height), line_color, 2)
    cv2.line(white_background, (line2_x, 0), (line2_x, height), line_color, 2)
    cv2.line(white_background, (0, line1_y), (width, line1_y), line_color, 2)
    cv2.line(white_background, (0, line2_y), (width, line2_y), line_color, 2)

    circle_color = (66, 135, 245)
    rect_color = (0, 0, 0)
    for frame_faces in face_positions:
        for x, y, w, h in frame_faces:
            if w > 0 and h > 0:
                center_x = x + w // 2
                center_y = y + h // 2
                cv2.circle(white_background, (center_x, center_y), 5, circle_color, -1)
                # cv2.rectangle(white_background, (x, y), (x + w, y + h), rect_color, 2)

    output_image_path = "output_with_face_centers_and_lines_2.jpg"
    cv2.imwrite(output_image_path, white_background)
    print(f"Image saved to {output_image_path}")


# Example usage
video_files = [
    "C:/Users/hancomtst/Desktop/Nvidiaproject/data/ive_baddie_1.mp4",
    "C:/Users/hancomtst/Desktop/Nvidiaproject/data/ive_baddie_2.mp4",
    "C:/Users/hancomtst/Desktop/Nvidiaproject/data/ive_baddie_3.mp4",
    "C:/Users/hancomtst/Desktop/Nvidiaproject/data/ive_baddie_4.mp4",
    "C:/Users/hancomtst/Desktop/Nvidiaproject/data/ive_baddie_5.mp4",
    "C:/Users/hancomtst/Desktop/Nvidiaproject/data/ive_baddie_6.mp4",
]

face_positions = load_video_data(video_files)
draw_face_centers_on_white_background(face_positions)

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\w600k_r50.onnx recognition
set det-size: (640, 640)


ive_baddie_1.mp4:   1%|▏         | 14/963 [00:02<02:51,  5.53it/s]


KeyboardInterrupt: 

In [2]:
import cv2
import numpy as np
import os
import csv
from tqdm import tqdm
from insightface.app import FaceAnalysis


def process_video(video_path):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"No file found at {video_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise Exception("Failed to open video file.")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps if fps else 0

    app = FaceAnalysis(
        allowed_modules=["detection", "landmark_2d_106"],
        providers=["CUDAExecutionProvider"],
    )
    app.prepare(ctx_id=0, det_size=(640, 640))

    window_name = "Video"
    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)

    target_x = width // 2
    target_y = height // 2

    frame_count = 0
    face_positions = []
    face_recognitions = []
    eye_endpoint = []

    input_file_name = os.path.splitext(os.path.basename(video_path))[0]
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    output_video_path = f"video_output_{input_file_name}.mp4"
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    while frame_count < total_frames:
        ret, frame = cap.read()
        if not ret:
            break

        # frame_count += 1
        # if frame_count % 5 != 0:
        #     continue
        display_frame = frame.copy()

        best_face = None
        largest_face = None
        largest_face_size = 0  # To track the largest face size

        faces = app.get(frame)
        for face in faces:
            bbox = face.bbox.astype(int)
            face_size = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            # Update largest face if this face is bigger
            if face_size > largest_face_size:
                largest_face_size = face_size
                largest_face = face

            face_center_x = bbox[0] + (bbox[2] - bbox[0]) // 2
            face_center_y = bbox[1] + (bbox[3] - bbox[1]) // 2

            cv2.rectangle(
                display_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2
            )

            distance = abs(face_center_x - target_x) + abs(face_center_y - target_y)
            if best_face is None or distance < best_face[1]:
                best_face = (face, distance)

        if best_face is None and largest_face is not None:
            best_face = (
                largest_face,
                0,
            )  # Use the largest face if no face found in area

        if best_face:
            face = best_face[0]
            bbox = face.bbox.astype(int)
            cv2.rectangle(
                display_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 4
            )
            if "landmark_2d_106" in face:
                lmk = face.landmark_2d_106.astype(np.int64)
                for point in lmk:
                    cv2.circle(
                        display_frame, tuple(point), 2, (0, 0, 255), -1, cv2.LINE_AA
                    )
                current_frame_positions = [
                    (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])
                ]
                current_frame_face_data = [lmk.tolist()]
                eye_point1 = tuple(lmk[35])
                eye_point2 = tuple(lmk[93])
                current_frame_eye_data = [(eye_point1, eye_point2)]

                face_positions.append(current_frame_positions)
                eye_endpoint.append(current_frame_eye_data)
                face_recognitions.append(current_frame_face_data)

        else:
            # Append zeros if no face is detected
            # Ensure that the structure matches the expected unpacking structure in CSV writing.
            face_positions.append(
                [(0, 0, 0, 0)]
            )  # Enclose in an additional list to match structure
            eye_endpoint.append([((0, 0), (0, 0))])  # Use tuple of tuples
            face_recognitions.append([[]])  # This already matches expected structure

        out.write(display_frame)

        cv2.imshow(window_name, display_frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    return (
        face_positions,
        eye_endpoint,
        face_recognitions,
        fps,
        total_frames,
        duration,

    )


# Example usage
video_files = [
    r"C:/Users/hancomtst/Desktop/Nvidiaproject/data/pose_sync_ive_baddie_1.mp4",
    r"C:/Users/hancomtst/Desktop/Nvidiaproject/data/pose_sync_ive_baddie_2.mp4",
    # Add more video file paths here
]

for video_file in video_files:
    process_video(video_file)

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\w600k_r50.onnx recognition
set det-size: (640, 640)
Applied providers: ['CPUExecution

In [4]:
# TODO : 얼굴 크기가 큰 사람으로 선택할 때
import cv2
import numpy as np
import os
import csv
from tqdm import tqdm
from insightface.app import FaceAnalysis


def process_video(video_path):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"No file found at {video_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise Exception("Failed to open video file.")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps if fps else 0

    app = FaceAnalysis(
        allowed_modules=["detection", "landmark_2d_106"],
        providers=["CUDAExecutionProvider"],
    )
    app.prepare(ctx_id=0, det_size=(640, 640))

    window_name = "Video"
    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)

    target_x = width // 2
    target_y = height // 2

    frame_count = 0
    face_positions = []
    face_recognitions = []
    eye_endpoint = []

    input_file_name = os.path.splitext(os.path.basename(video_path))[0]
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    output_video_path = f"bigface_output_{input_file_name}.mp4"
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    while frame_count < total_frames:
        ret, frame = cap.read()
        if not ret:
            break

        display_frame = frame.copy()

        largest_face = None
        largest_face_size = 0  # To track the largest face size

        faces = app.get(frame)
        for face in faces:
            bbox = face.bbox.astype(int)
            face_size = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            # Update largest face if this face is bigger
            if face_size > largest_face_size:
                largest_face_size = face_size
                largest_face = face

            cv2.rectangle(
                display_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2
            )

        if largest_face:
            face = largest_face
            bbox = face.bbox.astype(int)
            cv2.rectangle(
                display_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 4
            )
            if "landmark_2d_106" in face:
                lmk = face.landmark_2d_106.astype(np.int64)
                for point in lmk:
                    cv2.circle(
                        display_frame, tuple(point), 2, (0, 0, 255), -1, cv2.LINE_AA
                    )
                current_frame_positions = [
                    (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])
                ]
                current_frame_face_data = [lmk.tolist()]
                eye_point1 = tuple(lmk[35])
                eye_point2 = tuple(lmk[93])
                current_frame_eye_data = [(eye_point1, eye_point2)]

                face_positions.append(current_frame_positions)
                eye_endpoint.append(current_frame_eye_data)
                face_recognitions.append(current_frame_face_data)

        else:
            # Append zeros if no face is detected
            # Ensure that the structure matches the expected unpacking structure in CSV writing.
            face_positions.append(
                [(0, 0, 0, 0)]
            )  # Enclose in an additional list to match structure
            eye_endpoint.append([((0, 0), (0, 0))])  # Use tuple of tuples
            face_recognitions.append([[]])  # This already matches expected structure

        out.write(display_frame)

        cv2.imshow(window_name, display_frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    return (
        face_positions,
        eye_endpoint,
        face_recognitions,
        fps,
        total_frames,
        duration,
    )


# Example usage
video_files = [
    r"C:/Users/hancomtst/Desktop/Nvidiaproject/data/pose_sync_ive_baddie_1.mp4",
    r"C:/Users/hancomtst/Desktop/Nvidiaproject/data/pose_sync_ive_baddie_2.mp4",
    # Add more video file paths here
]

for video_file in video_files:
    process_video(video_file)

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\w600k_r50.onnx recognition
set det-size: (640, 640)
Applied providers: ['CPUExecution

In [6]:
import cv2
import numpy as np
import os
import csv
from tqdm import tqdm
from insightface.app import FaceAnalysis


def process_video(video_path):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"No file found at {video_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise Exception("Failed to open video file.")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps if fps else 0

    app = FaceAnalysis(
        allowed_modules=["detection", "landmark_2d_106"],
        providers=["CUDAExecutionProvider"],
    )
    app.prepare(ctx_id=0, det_size=(640, 640))

    window_name = "Video"
    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)

    target_x = width // 2
    target_y = height // 2
    line1_x = int(width * 0.35)
    line2_x = int(width * 0.65)

    frame_count = 0
    face_positions = []
    face_recognitions = []
    eye_endpoint = []

    input_file_name = os.path.splitext(os.path.basename(video_path))[0]
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    output_video_path = f"xlines_output_{input_file_name}.mp4"
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    while frame_count < total_frames:
        ret, frame = cap.read()
        if not ret:
            break

        display_frame = frame.copy()

        # Draw the two vertical lines
        cv2.line(display_frame, (line1_x, 0), (line1_x, height), (255, 255, 0), 2)
        cv2.line(display_frame, (line2_x, 0), (line2_x, height), (255, 255, 0), 2)

        best_face = None
        smallest_distance = float("inf")  # To track the smallest distance to the center

        faces = app.get(frame)
        for face in faces:
            bbox = face.bbox.astype(int)
            face_center_x = bbox[0] + (bbox[2] - bbox[0]) // 2

            if line1_x <= face_center_x <= line2_x:
                distance_to_center = abs(face_center_x - target_x)
                if distance_to_center < smallest_distance:
                    smallest_distance = distance_to_center
                    best_face = face

            cv2.rectangle(
                display_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2
            )

        if best_face:
            face = best_face
            bbox = face.bbox.astype(int)
            cv2.rectangle(
                display_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 4
            )
            if "landmark_2d_106" in face:
                lmk = face.landmark_2d_106.astype(np.int64)
                for point in lmk:
                    cv2.circle(
                        display_frame, tuple(point), 2, (0, 0, 255), -1, cv2.LINE_AA
                    )
                current_frame_positions = [
                    (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])
                ]
                current_frame_face_data = [lmk.tolist()]
                eye_point1 = tuple(lmk[35])
                eye_point2 = tuple(lmk[93])
                current_frame_eye_data = [(eye_point1, eye_point2)]

                face_positions.append(current_frame_positions)
                eye_endpoint.append(current_frame_eye_data)
                face_recognitions.append(current_frame_face_data)

        else:
            # Append zeros if no face is detected
            face_positions.append(
                [(0, 0, 0, 0)]
            )  # Enclose in an additional list to match structure
            eye_endpoint.append([((0, 0), (0, 0))])  # Use tuple of tuples
            face_recognitions.append([[]])  # This already matches expected structure

        out.write(display_frame)

        cv2.imshow(window_name, display_frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    return (
        face_positions,
        eye_endpoint,
        face_recognitions,
        fps,
        total_frames,
        duration,
    )


# Example usage
video_files = [
    r"C:/Users/hancomtst/Desktop/Nvidiaproject/data/pose_sync_ive_baddie_1.mp4",
    r"C:/Users/hancomtst/Desktop/Nvidiaproject/data/pose_sync_ive_baddie_2.mp4",
    # Add more video file paths here
]

for video_file in video_files:
    process_video(video_file)

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\hancomtst/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\hancomtst/.insightface\models\buffalo_l\w600k_r50.onnx recognition
set det-size: (640, 640)
Applied providers: ['CPUExecution