In [2]:
import os
import cv2
import torch
from pathlib import Path
from fastai.vision.all import *

  from .autonotebook import tqdm as notebook_tqdm


### Load the Model

In [2]:
trn_path = r"./dataset/trainVal"

def get_leftEyeR(file_path):
    # Extract numbers after the first underscore
    file_path = str(file_path)
    file_path = file_path[0:-4]  # Get filename without extension
    return float(file_path.split("_")[1])  # Skip the first part (ID)

def get_leftEyeC(file_path):
    # Extract numbers after the first underscore
    file_path = str(file_path)
    file_path = file_path[0:-4]  # Get filename without extension
    return float(file_path.split("_")[2])  # Skip the first part (ID)

def get_rightEyeR(file_path):
    # Extract numbers after the first underscore
    file_path = str(file_path)
    file_path = file_path[0:-4]  # Get filename without extension
    return float(file_path.split("_")[3])  # Skip the first part (ID)

def get_rightEyeC(file_path):
    # Extract numbers after the first underscore
    file_path = str(file_path)
    file_path = file_path[0:-4]  # Get filename without extension
    return float(file_path.split("_")[4])  # Skip the first part (ID)

# Define the DataBlock
dblock = DataBlock(
    blocks=(ImageBlock, RegressionBlock, RegressionBlock, RegressionBlock, RegressionBlock),
    n_inp=1,
    get_items=get_image_files,
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=[get_leftEyeR, get_leftEyeC, get_rightEyeR, get_rightEyeC],  # Function to extract labels
    item_tfms=[RatioResize(max_sz=512)],
)

#Create the DataLoaders
dls = dblock.dataloaders(trn_path, bs=20)  # Adjust batch size as needed

In [3]:
def MSELossFlat(input, target):
    return ((input - target)**2).mean()

def leftEye_loss(input_from_model, leftEyeR, leftEyeC, rightEyeR, rightEyeC):
    return MSELossFlat(input_from_model[:,0], leftEyeR) + MSELossFlat(input_from_model[:,1], leftEyeC)

def rightEye_loss(input_from_model, leftEyeR, leftEyeC, rightEyeR, rightEyeC):
    return MSELossFlat(input_from_model[:,2], rightEyeR) + MSELossFlat(input_from_model[:,3], rightEyeC)

def net_Loss(input_from_model, leftEyeR, leftEyeC, rightEyeR, rightEyeC):
    #Normalization
    leftEyeR /= 2448
    rightEyeR /= 2448
    leftEyeC /= 3264
    rightEyeC /= 3264
    return leftEye_loss(input_from_model, leftEyeR, leftEyeC, rightEyeR, rightEyeC) + rightEye_loss(input_from_model, leftEyeR, leftEyeC, rightEyeR, rightEyeC)

# architecture = 'convnext_small_in22k' #Best Validation MSE : 5509446.000000 (10 epochs) (lr = 0.1)
# architecture = 'convnext_tiny_hnf' #Best Validation MSE : 327073 (10 epochs) (lr = 0.2)
architecture = 'regnety_006' #Best Validation MSE :  ( epochs) (lr = )

learn = vision_learner(dls, architecture, loss_func=net_Loss, n_out=4).to_fp16()

In [4]:
learn.load('finalModel_saved')

  state = torch.load(file, map_location=device, **torch_load_kwargs)


<fastai.learner.Learner at 0x156c85750d0>

### Create Video using model

In [11]:
# Define scaling factors
scaling_factors = torch.tensor([2448, 3264, 2448, 3264])

# Reference positions for left and right eyes
reference_left_eye = (1081, 1444)  # Example: You can choose any fixed positions
reference_right_eye = (1070, 1952)

# Path to the image directory
image_dir = Path('./PhotoDiaryPrj_Images/')
output_dir = Path('./aligned_images_using_model/')
output_dir.mkdir(exist_ok=True)

# Collect image paths
image_paths = list(image_dir.glob('*.jpg'))

aligned_images = []

for img_path in image_paths:
    # Predict eye positions
    dl = learn.dls.test_dl([str(img_path)])
    predictions = learn.get_preds(dl=dl)
    scaled_predictions = predictions[0][0] * scaling_factors  # Scale to original dimensions
    left_eye = scaled_predictions[:2].tolist()  # [row, col]
    right_eye = scaled_predictions[2:].tolist()  # [row, col]

    # Read image
    img = cv2.imread(str(img_path))

    # Calculate translation offsets
    left_offset = (reference_left_eye[0] - left_eye[0], reference_left_eye[1] - left_eye[1])
    right_offset = (reference_right_eye[0] - right_eye[0], reference_right_eye[1] - right_eye[1])

    # Calculate the average translation
    translation_x = (left_offset[1] + right_offset[1]) / 2
    translation_y = (left_offset[0] + right_offset[0]) / 2

    # Translate image
    rows, cols, _ = img.shape
    M = np.float32([[1, 0, translation_x], [0, 1, translation_y]])
    aligned_img = cv2.warpAffine(img, M, (cols, rows))

    # Save and collect aligned image
    aligned_img_path = output_dir / img_path.name
    cv2.imwrite(str(aligned_img_path), aligned_img)
    aligned_images.append(aligned_img)

In [12]:
# Create a video
video_path = 'aligned_faces_video_using_model.avi'
frame_rate = 10  # Adjust as needed

# Get video dimensions
height, width, _ = aligned_images[0].shape
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video = cv2.VideoWriter(video_path, fourcc, frame_rate, (width, height))

for img in aligned_images:
    video.write(img)

video.release()
print(f"Video saved at {video_path}")

Video saved at aligned_faces_video_using_model.avi


### Create Video without model

In [13]:
# Path to the image directory
image_dir = Path('./PhotoDiaryPrj_Images/')

# Collect image paths
image_paths = list(image_dir.glob('*.jpg'))

# Ensure the images are sorted (optional, for sequential naming)
image_paths.sort()

# Read the first image to determine the frame size
first_image = cv2.imread(str(image_paths[0]))
height, width, _ = first_image.shape

# Create a video
video_path = 'aligned_faces_video.avi'
frame_rate = 10  # Adjust as needed

fourcc = cv2.VideoWriter_fourcc(*'XVID')
video = cv2.VideoWriter(video_path, fourcc, frame_rate, (width, height))

# Add each image to the video
for img_path in image_paths:
    img = cv2.imread(str(img_path))  # Read the image
    video.write(img)  # Add to the video

video.release()
print(f"Video saved at {video_path}")


Video saved at aligned_faces_video.avi


### Create Video using OpenCVs face detection

In [3]:
# Path to the image directory
image_dir = Path('./PhotoDiaryPrj_Images/')
output_video_path = 'opencv_aligned_faces_video.avi'

# Collect all image paths
image_paths = list(image_dir.glob('*.jpg'))
image_paths.sort()

# Load the pre-trained face detector (Haar Cascade)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Read the first image to determine the frame size
first_image = cv2.imread(str(image_paths[0]))
height, width, _ = first_image.shape

# Create a blank (black) frame for consistent borders
base_frame = np.zeros((height, width, 3), dtype=np.uint8)

# Create the VideoWriter
frame_rate = 10  # Adjust as needed
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))

# Process each image
for img_path in image_paths:
    img = cv2.imread(str(img_path))  # Read the image

    # Convert to grayscale for face detection
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Detect faces using OpenCV's pre-trained face detector
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    if len(faces) == 0:
        print(f"No face detected in {img_path}")
        continue

    # Assuming only one face is detected; if multiple, you can iterate over them
    x, y, w, h = faces[0]  # Rectangle of detected face

    # Crop and resize the face region to match all images
    face_img = img[y:y+h, x:x+w]
    resized_face_img = cv2.resize(face_img, (width, height), interpolation=cv2.INTER_AREA)

    # Overlay the resized face image onto the base frame
    aligned_img = base_frame.copy()
    aligned_img[:height, :width] = resized_face_img  # Ensure it matches perfectly

    # Write the aligned image to the video
    video.write(aligned_img)

# Release the video
video.release()
print(f"Aligned face video saved at {output_video_path}")

Aligned face video saved at opencv_aligned_faces_video.avi
