In [7]:
from moviepy.editor import VideoFileClip
import librosa
import numpy as np
from datetime import timedelta, datetime
import cv2
import json
import time
import os

def extract_audio(video_file_path, output_audio_path):
    video = VideoFileClip(video_file_path)

    audio = video.audio

    audio.write_audiofile(output_audio_path, codec='pcm_s16le')

    video.close()
    audio.close()

def find_best_match(input_features, query_features):
    best_match = {'score': np.inf, 'index': 0}
    num_frames = input_features.shape[1] - query_features.shape[1] + 1

    for i in range(num_frames):
        current_window = input_features[:, i:i + query_features.shape[1]]
        distance = np.linalg.norm(query_features - current_window)

        if distance < best_match['score']:
            best_match['score'] = distance
            best_match['index'] = i

    return best_match['index'], best_match['score']

# Function to convert seconds into hh:mm:ss.sss format
def seconds_to_timestamp(seconds):
    if(seconds == 0.0):
        return "00:00:00.000"
    td = timedelta(seconds=seconds)
    str_time = str(td)
    hours, minutes, seconds = str_time.split(':')
    seconds, microseconds = seconds.split('.')
    milliseconds = f"{int(microseconds):03d}"[:3]
    return f"{hours}:{minutes}:{seconds}.{milliseconds}"

# functions to convert the timestamp into frame number in the original video
def get_fps(video_path):
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    video.release()
    return fps

def parse_timecode(time_str):
    return datetime.strptime(time_str, "%H:%M:%S.%f")

def timecode_to_frames(timecode, fps):
    time_obj = parse_timecode(timecode)
    total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
    frame_number = int(round(total_seconds * fps))
    return frame_number

def write_to_json(data, filename, key):
    if os.path.exists(filename):
        with open(filename, 'r') as file:
            existing_data = json.load(file)
    else:
        existing_data = {}

    # Round the values to four decimal places
    rounded_data = [[round(val, 4) for val in row] for row in data]

    existing_data[key] = rounded_data

    with open(filename, 'w') as file:
        json.dump(existing_data, file, indent=4)

original_video_path = '../../dataset/originals/joker.mp4'
query_video_path = '../../dataset/queries/joker_query.mp4'
extract_audio(original_video_path, 'output_original_audio.wav')
extract_audio(query_video_path, 'output_query_audio.wav')

original_audio, original_sampling_rate = librosa.load('output_original_audio.wav')
query_audio, query_sampling_rate = librosa.load('output_query_audio.wav')

input_mfcc = librosa.feature.mfcc(y=original_audio, sr=original_sampling_rate, n_mfcc=13)
query_mfcc = librosa.feature.mfcc(y=query_audio, sr=original_sampling_rate, n_mfcc=13)

# print("Input MFCC: ", len(input_mfcc), len(input_mfcc[0]))
# print("Query MFCC: ", query_mfcc[0][0])

start_time = time.time()

start_index, similarity_score = find_best_match(input_mfcc, query_mfcc)

end_time = time.time()

print("Time taken: ", seconds_to_timestamp(end_time - start_time))

HOP_LENGTH = 512
fps = get_fps(original_video_path)

start_time_seconds = start_index * HOP_LENGTH / original_sampling_rate
query_duration_seconds = len(query_audio) / original_sampling_rate
end_time_seconds = start_time_seconds + query_duration_seconds

start_time = seconds_to_timestamp(start_time_seconds)
end_time = seconds_to_timestamp(end_time_seconds)
start_frame = timecode_to_frames(start_time, fps)
end_frame = timecode_to_frames(end_time, fps)

print(f"Start Time: {start_time} seconds")
print(f"End Time: {end_time} seconds")
print(f"Start Frame: {start_frame}")
print(f"End Frame: {end_frame}")

# np.savez('mfcc_arrays.npz', video1=input_mfcc)

# existing_data = npays.load('mfcc_arrays.npz')
# updated_data = dict(existing_data)
# updated_data['video20'] = input_mfcc
# np.savez('mfcc_arr.npz', **updated_data)

MoviePy - Writing audio in output_original_audio.wav


                                                                     

MoviePy - Done.
MoviePy - Writing audio in output_query_audio.wav


                                                                   

MoviePy - Done.
Time taken:  0:00:00.305
Start Time: 0:00:07.124 seconds
End Time: 0:00:25.724 seconds
Start Frame: 171
End Frame: 617


In [15]:
from moviepy.editor import VideoFileClip
import librosa
import numpy as np
from datetime import timedelta, datetime
import cv2
import json
import time

def extract_audio(video_file_path, output_audio_path):
    video = VideoFileClip(video_file_path)

    audio = video.audio

    audio.write_audiofile(output_audio_path, codec='pcm_s16le')

    video.close()
    audio.close()

def find_best_match(input_features, query_features):
    best_match = {'score': np.inf, 'index': 0}
    num_frames = input_features.shape[1] - query_features.shape[1] + 1

    for i in range(num_frames):
        current_window = input_features[:, i:i + query_features.shape[1]]
        distance = np.linalg.norm(query_features - current_window)

        if distance < best_match['score']:
            best_match['score'] = distance
            best_match['index'] = i

    return best_match['index'], best_match['score']

# Function to convert seconds into hh:mm:ss.sss format
def seconds_to_timestamp(seconds):
    if(seconds == 0.0):
        return "00:00:00.000"
    td = timedelta(seconds=seconds)
    str_time = str(td)
    hours, minutes, seconds = str_time.split(':')
    seconds, microseconds = seconds.split('.')
    milliseconds = f"{int(microseconds):03d}"[:3]
    return f"{hours}:{minutes}:{seconds}.{milliseconds}"

# functions to convert the timestamp into frame number in the original video
def get_fps(video_path):
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    video.release()
    return fps

def parse_timecode(time_str):
    return datetime.strptime(time_str, "%H:%M:%S.%f")

def timecode_to_frames(timecode, fps):
    time_obj = parse_timecode(timecode)
    total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
    frame_number = int(round(total_seconds * fps))
    return frame_number

def load_array_from_json(filename):
    with open(filename, 'r') as json_file:
        data = json.load(json_file)
        return data

# original_video_path = '../../dataset/originals/video1.mp4'
query_video_path = '../../dataset/Tests/video10_1_modified.mp4'
# extract_audio(original_video_path, 'output_original_audio.wav')
extract_audio(query_video_path, 'output_query_audio.wav')

# original_audio, original_sampling_rate = librosa.load('output_original_audio.wav')

query_audio, query_sampling_rate = librosa.load('output_query_audio.wav')
query_mfcc = librosa.feature.mfcc(y=query_audio, sr=query_sampling_rate, n_mfcc=13)

# filename = 'audioSignature.json'

# Load all arrays from JSON file
loaded_data = np.load('mfcc_arrays.npz')
# Iterate over each key-value pair in the JSON file
# for key in loaded_data.keys():

key = "video10"
    # Assuming query_audio and original_sampling_rate are defined
input_mfcc = loaded_data[key]

start_time = time.time()

start_index, similarity_score = find_best_match(input_mfcc, query_mfcc)

end_time = time.time()

print("Time taken: ", seconds_to_timestamp(end_time - start_time))

HOP_LENGTH = 512
fps = get_fps(query_video_path)

start_time_seconds = start_index * HOP_LENGTH / query_sampling_rate
query_duration_seconds = len(query_audio) / query_sampling_rate
end_time_seconds = start_time_seconds + query_duration_seconds

start_time = seconds_to_timestamp(start_time_seconds)
end_time = seconds_to_timestamp(end_time_seconds)
start_frame = timecode_to_frames(start_time, fps)
end_frame = timecode_to_frames(end_time, fps)

print(f"For key: {key}")
print(f"Start Time: {start_time} seconds")
print(f"End Time: {end_time} seconds")
print(f"Start Frame: {start_frame}")
print(f"End Frame: {end_frame}\n")

MoviePy - Writing audio in output_query_audio.wav


                                                                   

MoviePy - Done.
Time taken:  0:00:01.201
For key: video10
Start Time: 0:00:00.952 seconds
End Time: 0:00:20.972 seconds
Start Frame: 29
End Frame: 629

