In [11]:
import pandas as pd
import os

BASE_PATH = r"D:\UK\00. 2024 QMUL\00. Course\SAV-ViolenceDetection\Annotations_Final\test"

FRAME_ANNOTATION_PATH = os.path.join(BASE_PATH, "use_annotation_(framelevel_refinement).xlsx")
SOUND_ANNOTATION_PATH = os.path.join(BASE_PATH, "use_annotation(sound_time unit)_ver2.xlsx")

file_1 = pd.read_excel(FRAME_ANNOTATION_PATH)
file_2 = pd.read_excel(SOUND_ANNOTATION_PATH)
video_df = pd.DataFrame(file_1)
sound_df = pd.DataFrame(file_2)


In [3]:
sound_df = sound_df.rename(columns={
    'Start frame' : 'Start frame(sound)',
    'End frame' : 'End frame(sound)',
    'Sound Type' : 'sound_label'
    })

video_df = video_df.rename(columns={
    'Start Frame' : 'Start frame',
    'End Frame' : 'End frame',
    'Violence Type (Sound)' : 'Violence(Sound) Type',
    'Start Time (s)' : 'Start time(s)',
    'End Time (s)' : 'End time(s)'
})

sound_df_refined = sound_df.drop(columns =[
                                 'Filename',
                                 'Duplicated moment with previous',
                                 'Sound Start',
                                 'Sound End',
                                 'Max frame',
                                 'Max Time'])

video_df_refined = video_df.astype(
    {'Video ID': int,
     'Start frame':float,
     'End frame':float}
     )

sound_df_refined = sound_df_refined.astype(
    {'Video ID':int,
     'Start frame(sound)':float,
     'End frame(sound)':float}
     )

combined_df = video_df_refined.copy()

In [4]:
max_columns = 11
for i in range(1, max_columns + 1):
    combined_df[f'Violence(Sound) Type{i}'] = None
    combined_df[f'Sound type{i}'] = None
    combined_df[f'sound_start_frame{i}'] = None
    combined_df[f'sound_end_frame{i}'] = None

combined_df.sort_values(by=['Video ID', 'Start frame'], inplace=True)
combined_df.reset_index(drop=True, inplace=True)

for sound_idx, sound_row in sound_df_refined.iterrows():
    s_video_id = sound_row['Video ID']
    s_type = sound_row['Violence(Sound) Type']
    s_start = sound_row['Start frame(sound)']
    s_end = sound_row['End frame(sound)']
    s_label = sound_row['sound_label']

    matches = combined_df[
        (combined_df['Video ID'] == s_video_id) &
        (combined_df['Start frame'] <= s_start) &
        (combined_df['End frame'] >= s_start)
    ]

    if matches.empty:
        continue

    for match_idx in matches.index:
        for ix in range(1, max_columns + 1):
            if pd.isna(combined_df.at[match_idx, f'Violence(Sound) Type{ix}']):
                combined_df.at[match_idx, f'Violence(Sound) Type{ix}'] = s_type
                combined_df.at[match_idx, f'Sound type{ix}'] = s_label
                combined_df.at[match_idx, f'sound_start_frame{ix}'] = s_start
                fill_end = min(s_end, combined_df.at[match_idx, 'End frame'])
                combined_df.at[match_idx, f'sound_end_frame{ix}'] = fill_end
                break

        if s_end > combined_df.at[match_idx, 'End frame']:
            remaining_start = combined_df.at[match_idx, 'End frame'] + 1

            while remaining_start <= s_end:
                next_matches = combined_df[
                    (combined_df['Video ID'] == s_video_id) &
                    (combined_df['Start frame'] <= remaining_start) &
                    (combined_df['End frame'] >= remaining_start)
                ]

                if next_matches.empty:
                    break

                for next_idx in next_matches.index:
                    segment_end = combined_df.at[next_idx, 'End frame']

                    for next_ix in range(1, max_columns + 1):
                        if pd.isna(combined_df.at[next_idx, f'Violence(Sound) Type{next_ix}']):
                            combined_df.at[next_idx, f'Violence(Sound) Type{next_ix}'] = s_type
                            combined_df.at[next_idx, f'Sound type{next_ix}'] = s_label
                            combined_df.at[next_idx, f'sound_start_frame{next_ix}'] = remaining_start
                            combined_df.at[next_idx, f'sound_end_frame{next_ix}'] = min(s_end, segment_end)
                            remaining_start = segment_end + 1
                            break
                    break  # always move to next row after a fill
        break  # only one initial match per sound_row

for col in combined_df.columns:
    if 'frame' in col:
        combined_df[col] = combined_df[col].apply(lambda x: '{:.0f}'.format(x) if pd.notna(x) else '')

output_path = os.path.join("combined_sound_frame_annotations.xlsx")
combined_df.to_excel(output_path, index=False)

In [None]:
max_columns = 11
missing_annotations = []

for sound_idx, sound_row in sound_df_refined.iterrows():
    s_video_id = sound_row['Video ID']
    s_start = round(sound_row['Start frame(sound)'])
    s_end = round(sound_row['End frame(sound)'])

    # Get all relevant rows in combined_df with same Video ID
    video_segments = combined_df[combined_df['Video ID'] == s_video_id]

    # Create list of all annotated sound ranges
    annotated_ranges = []
    for idx, seg in combined_df.iterrows():
        for i in range(1, 12):
            start_col = f'sound_start_frame{i}'
            end_col = f'sound_end_frame{i}'
    
            start_val = seg[start_col]
            end_val = seg[end_col]
    
            if (
                pd.notna(start_val) and pd.notna(end_val)
                and str(start_val).strip() != '' and str(end_val).strip() != ''
            ):
                annotated_ranges.append((int(float(start_val)), int(float(end_val))))


    # Sort and merge overlapping or adjacent ranges
    annotated_ranges.sort()
    merged = []
    for start, end in annotated_ranges:
        if not merged or start > merged[-1][1] + 1:
            merged.append([start, end])
        else:
            merged[-1][1] = max(merged[-1][1], end)

    # Check if any merged range fully covers s_start to s_end
    fully_covered = any(start <= s_start and end >= s_end for start, end in merged)

    if not fully_covered:
        missing_annotations.append((s_video_id, s_start, s_end))


if missing_annotations:
    print("Missing or incomplete annotations:")
    for vid, s, e in missing_annotations:
        print(f"- Video ID: {vid}, Sound Frame: {s} to {e}")
else:
    print("All sound annotations are fully covered.")
