In [1]:
import sys
sys.path.append('..')

from landmarks_detector.landmarks_detector import LandmarksDetector

import torchvision.transforms.functional as tvf

landmark_detector = LandmarksDetector('./pretrained/phase1_wpdc_vdc.pth.tar')

import torch
import imageio, dlib
import cv2 as cv

import numpy as np

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

landmark_detector = landmark_detector.to(DEVICE)

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("./pretrained/shape_predictor_68_face_landmarks.dat")

filename = './Obama.mp4'
vid = imageio.get_reader(filename, 'ffmpeg')

frame_list = []
for frame in vid.iter_data():
    frame_list.append(frame)

# 2D landmark detection
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)

faces = detector(gray)

for face in faces:
    landmarks = predictor(gray, face)

landmark_list = []
for n in range(0, 68):
    x = landmarks.part(n).x
    y = landmarks.part(n).y
    landmark_list.append([x, y])

lnds_2D = np.array(landmark_list)

# 3D landmark detection
face_tensor = torch.from_numpy(frame).to(DEVICE) / 255.
s = landmark_detector(face_tensor.permute(2,0,1).flip(-3)[None])

# # Image resolution
# image_width = 450  # e.g., 1920 pixels
# image_height = 450  # e.g., 1080 pixels

# # Initial camera intrinsics guess (focal length in pixels)
# focal_length = max(image_width, image_height)
# cx, cy = image_width / 2, image_height / 2
# camera_matrix = np.array([[focal_length, 0, cx],
#                           [0, focal_length, cy],
#                           [0, 0, 1]])

# # Distortion coefficients (assuming none)
# dist_coeffs = np.zeros((4, 1))

# # Solve PnP to estimate the pose and camera intrinsics
# success, rotation_vector, translation_vector = cv.solvePnP(
#     lnds_3D, lnds_2D, camera_matrix, dist_coeffs
# )

# # Compute horizontal and vertical FoV
# hfov = 2 * np.arctan(image_width / (2 * focal_length)) * (180 / np.pi)
# vfov = 2 * np.arctan(image_height / (2 * focal_length)) * (180 / np.pi)

# print(f"Horizontal FoV: {hfov:.2f} degrees")

# print(f"Vertical FoV: {vfov:.2f} degrees")

In [None]:
import numpy as np
from scipy.optimize import least_squares

# Synthetic 3DMM landmark points (replace with actual 3DMM landmarks)
np.random.seed(42)
num_points = 68  # Typical number of landmarks for a face
X_3DMM = np.random.normal(0, 1, (num_points, 3))  # 3D coordinates of landmarks

# Synthetic 2D landmarks (replace with actual image points)
true_focal_length = 800
true_principal_point = [320, 240]
X_2D = (X_3DMM[:, :2] / X_3DMM[:, 2, None]) * true_focal_length + true_principal_point  # 2D image points

# Initial estimates for camera intrinsics
initial_focal_length = 1000  # Initial guess for focal length
initial_principal_point = [320, 240]  # Initial guess for the principal point

def project_points(X_3D, f, cx, cy):
    """Projects 3D points to 2D using a simplified pinhole camera model."""
    X = X_3D[:, 0]
    Y = X_3D[:, 1]
    Z = X_3D[:, 2]

    u = f * (X / Z) + cx
    v = f * (Y / Z) + cy

    return np.stack([u, v], axis=1)

def reprojection_error(params, X_3D, X_2D):
    """Computes the reprojection error between 2D and projected 3D landmarks."""
    f, cx, cy = params
    X_projected = project_points(X_3D, f, cx, cy)
    error = X_projected - X_2D
    return error.ravel()

# Initial parameter guess for [focal_length, cx, cy]
initial_params = [initial_focal_length, *initial_principal_point]

# Optimization
result = least_squares(reprojection_error, initial_params, args=(X_3DMM, X_2D))

# Optimized camera intrinsic parameters
f_est, cx_est, cy_est = result.x
print("Estimated focal length:", f_est)
print("Estimated principal point:", (cx_est, cy_est))

# Display final reprojection error
final_error = reprojection_error(result.x, X_3DMM, X_2D).reshape(-1, 2)
print("Mean reprojection error (pixels):", np.mean(np.linalg.norm(final_error, axis=1)))