In [4]:
import cv2
import imagehash
from PIL import Image
import numpy as np
import os

def extract_hashes(video_path, max_frames=500):
    cap = cv2.VideoCapture(video_path)
    hashes = []
    count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or count >= max_frames:
            break
        pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        hash_val = imagehash.phash(pil_img)
        hashes.append(hash_val)
        count += 1
    cap.release()
    return hashes

def find_best_match(original_hashes, edited_hashes):
    best_start = 0
    best_distance = float('inf')

    for i in range(len(original_hashes) - len(edited_hashes)):
        distance = sum(
            original_hashes[i + j] - edited_hashes[j]
            for j in range(len(edited_hashes))
        )
        if distance < best_distance:
            best_distance = distance
            best_start = i
    return best_start

def cut_video_segment(original_path, output_path, start_frame, num_frames, fps=120):
    cap = cv2.VideoCapture(original_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    for _ in range(num_frames):
        ret, frame = cap.read()
        if not ret:
            break
        out.write(frame)

    cap.release()
    out.release()

# Example Usage
original_path = '/Users/log/Downloads/20241030_x6roux_ls_05.avi'
edited_path = '/Users/log/Downloads/20241030_x6roux_ls_05.mp4'
output_path = 'aligned_120fps.mp4'

original_hashes = extract_hashes(original_path)
edited_hashes = extract_hashes(edited_path)
start_frame = find_best_match(original_hashes, edited_hashes)

# Match number of frames to edited clip's duration at 30fps
num_frames = len(edited_hashes) * 4  # Since 120fps is 4x faster than 30fps

cut_video_segment(original_path, output_path, start_frame, num_frames)


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject