In [None]:
import cv2
import time
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips, CompositeAudioClip

def record_foot_step_timings(video_file):
    cap = cv2.VideoCapture(video_file)
    fps = cap.get(cv2.CAP_PROP_FPS)
    timings = []
    
    def current_time(frame_number):
        return frame_number / fps

    frame_number = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        cv2.imshow('Video', frame)
        key = cv2.waitKey(int(1000 / fps))
        
        if key == 32:  # Space bar
            timings.append(current_time(frame_number))
        
        frame_number += 1

        if key == 27:  # ESC key to exit
            break

    cap.release()
    cv2.destroyAllWindows()
    return timings

def add_foot_step_sounds(video_file, bgm_file, foot_step_file, output_file, timings):
    video_clip = VideoFileClip(video_file)
    audio_clip = AudioFileClip(bgm_file).volumex(1.50)
    
    if audio_clip.duration < video_clip.duration - 2:
        n_loops = int((video_clip.duration - 2) // audio_clip.duration) + 1
        audio_clip = concatenate_audioclips([audio_clip] * n_loops).subclip(0, video_clip.duration - 2)
    else:
        audio_clip = audio_clip.subclip(0, video_clip.duration - 2)

    audio_clip = audio_clip.set_start(2)
    foot_step_clip = AudioFileClip(foot_step_file).volumex(6.5)
    foot_step_clips = [foot_step_clip.set_start(t) for t in timings]

    final_audio = CompositeAudioClip([audio_clip] + foot_step_clips)
    video_with_audio = video_clip.set_audio(final_audio)

    video_with_audio.write_videofile(output_file, codec='libx264', audio_codec='aac')

video_file = "data/output_slowed_video8.mp4"
bgm_file = "data/Bgm_2.mp3"
foot_step_file = "data/Foot_Step.mp3"
output_file = "data/bgm_with_foot_2.mp4"

timings = record_foot_step_timings(video_file)
add_foot_step_sounds(video_file, bgm_file, foot_step_file, output_file, timings)