In [2]:
import pandas as pd
import os

ANNOTATION_DIR= r"D:\UK\00. 2024 QMUL\00. Course\SAV-ViolenceDetection\Annotations_Final"
FINAL_ANNOTATION_PATH = os.path.join(ANNOTATION_DIR, "test","use_combined_sound_frame_annotations_ver2.xlsx") 
df = pd.read_excel(FINAL_ANNOTATION_PATH)

In [3]:
df = df[~((df['Violence Type (Video)'] == 'none') | (df['Video Type'] == 'Transition'))]
# df[(df['Video Type'] == 'Others') & (df['Violence Type (Video)'] == 'none')]

In [213]:
# Total number of videos
no_videos = df['Video ID'].nunique()

# Minimum and maximum duration of videos
video_duration = df.groupby('Video ID')['End time(s)'].max()
min_duration = video_duration.min()
max_duration = video_duration.max()

# Mean/mode length of videos
mean_duration = video_duration.mean()
mode_duration = video_duration.mode()

# Ratio of violence vs non-violence in videos
df['Violence Type (Video)'] = df['Violence Type (Video)'].str.lower()

video_stats = df.groupby('Video ID')['Violence Type (Video)'].value_counts().unstack(fill_value=0)

video_stats['violence_ratio'] = video_stats['violent'] / (video_stats['violent'] + video_stats['non-violent'])
video_stats['non_violence_ratio'] = 1 - video_stats['violence_ratio']

avg_violence_ratio = video_stats['violence_ratio'].mean()
avg_non_violence_ratio = video_stats['non_violence_ratio'].mean()

def define_modality(row):
    has_text = pd.notna(row['Violence Type (Text)'])
    has_video = pd.notna(row['Violence Type (Video)'])
    has_sound = pd.notna(row['Violence(Sound) Type1'])
    if has_text and has_video and has_sound:
        return 'Multimodal(Video, Sound, Text)'
    elif has_text and has_video:
        return 'Multimodal(Video, Text)'
    elif has_text and has_sound:
        return 'Multimodal(Sound, Text)'
    elif has_video and has_sound:
        return 'Multimodal(Video, Sound)'
    elif has_text:
        return 'Unimodal(Text)'
    elif has_video:
        return 'Unimodal(Video)'
    elif has_sound:
        return 'Unimodal(Sound)'
    else:
        return 'none'

# Create Segment ID column
df['Segment Position'] = df.groupby('Video ID').cumcount() + 1
df['Segment ID'] = df['Video ID'].astype(str) + "_" + df['Segment Position'].astype(str)


df['Modality'] = df.apply(define_modality, axis=1)

print(f'Total number of videos: {no_videos}')
print(f'Minimum video duration: {min_duration:.2f} seconds')
print(f'Maximum video duration: {max_duration:.2f} seconds')
print(f'Mean video duration: {mean_duration:.2f} seconds')
print(f'Mode video duration: {mode_duration.values[0]:.2f} seconds')
print(f'The average ratio of violence vs non-violence segments within a video is {avg_violence_ratio:.2f} : {avg_non_violence_ratio:.2f}\n')
print(f'Number of unique modalities: {df['Modality'].value_counts()}')


Total number of videos: 200
Minimum video duration: 54.35 seconds
Maximum video duration: 537.27 seconds
Mean video duration: 113.02 seconds
Mode video duration: 59.73 seconds
The average ratio of violence vs non-violence segments within a video is 0.26 : 0.74

Number of unique modalities: Modality
Unimodal(Video)                   1552
Multimodal(Video, Text)            555
Multimodal(Video, Sound)           223
Multimodal(Video, Sound, Text)      23
Name: count, dtype: int64


In [214]:
df['Violence label(video)'] = df['Violence Type (Video)'].map({
    'violent': 1,
    'non-violent': 0
})

In [215]:
df = df[
    ~((df['Violence Type (Video)'] == 'none') &
      (df['Video Type'] == 'Others') &
      (df['Memo'].isna()) &
      (df['Modality'] == 'Unimodal(Video)'))
]

In [216]:
df = df.reindex(columns=['Video ID', 'Segment ID', 'Modality', 'Start frame', 'End frame', 'Start time(s)',
       'End time(s)', 'Violence Type (Video)','Violence label(video)', 'Video Type',
       'Violence Type (Text)', 'Texts', 'Memo', 'Violence(Sound) Type1',
       'Sound type1', 'sound_start_frame1', 'sound_end_frame1',
       'Violence(Sound) Type2', 'Sound type2', 'sound_start_frame2',
       'sound_end_frame2', 'Violence(Sound) Type3', 'Sound type3',
       'sound_start_frame3', 'sound_end_frame3', 'Violence(Sound) Type4',
       'Sound type4', 'sound_start_frame4', 'sound_end_frame4',
       'Violence(Sound) Type5', 'Sound type5', 'sound_start_frame5',
       'sound_end_frame5', 'Violence(Sound) Type6', 'Sound type6',
       'sound_start_frame6', 'sound_end_frame6', 'Violence(Sound) Type7',
       'Sound type7', 'sound_start_frame7', 'sound_end_frame7',
       'Violence(Sound) Type8', 'Sound type8', 'sound_start_frame8',
       'sound_end_frame8', 'Violence(Sound) Type9', 'Sound type9',
       'sound_start_frame9', 'sound_end_frame9', 'Violence(Sound) Type10',
       'Sound type10', 'sound_start_frame10', 'sound_end_frame10',
       'Violence(Sound) Type11', 'Sound type11', 'sound_start_frame11',
       'sound_end_frame11', 'Filename', 'Segment Position'
       ])

In [None]:
df.to_csv(os.path.join(ANNOTATION_DIR, "preprocessed_dataset_1.csv"), index=False)

In [208]:
df['Violence Type (Video)'].value_counts().to_dict()
df['Video Type'].value_counts().to_dict()

{'News': 1015,
 'CCTV': 484,
 'Self-filmed': 423,
 'Combination': 123,
 'Others': 105,
 'Bodycam': 94,
 'Dashcam': 80}

In [163]:
new_df = df[~df['Video Type'].isin(['Transition', 'Others'])]
new_df.groupby('Video Type').size()

Video Type
Bodycam          94
CCTV            484
Combination     123
Dashcam          80
News           1015
Self-filmed     423
dtype: int64