In [1]:
import os
import numpy as np
import logging
import subprocess
import pandas as pd
import torchaudio
import torch
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

# Configuration Constants
CONFIG = {
    'train_videos_list': r'F:\SRC_Bhuvaneswari\typpo\Crimenet\Annotations\Videolist\vtrainpadamlist.txt',
    'test_videos_list': r'F:\SRC_Bhuvaneswari\typpo\Crimenet\Annotations\Videolist\vtestpadamlist.txt',
    'train_videos_path': r'E:\SRC-Bhuvaneswari\VAD_XDViolence\ViVi\Dataset\XD Violence\Train',
    'test_videos_path': r'E:\SRC-Bhuvaneswari\VAD_XDViolence\ViVi\Dataset\XD Violence\Test',
    'train_save_path': r'E:\SRC-Bhuvaneswari\processed files\audio\ftrain',
    'test_save_path': r'E:\SRC-Bhuvaneswari\processed files\audio\ftest',
    'logs': r'F:\SRC_Bhuvaneswari\typpo\Crimenet\Utilities\logs\XDViolence_aud_first.log',
    'num_train_videos': 4264,
    'num_test_videos': 486,
    'n_train': 320,  # Limit for training videos per label
    'n_test': 80,  # Limit for testing videos per label
    'sample_rate': 16000,
    'segment_duration': 15  # Set to 15 seconds to match MAX_AUDIO_LENGTH in wav2vec2.py
}

LABEL_MAP = {'Normal': 0, 'Abuse': 1, 'Explosion': 2, 'Fighting': 3, 'Car Accident': 4, 'Shooting': 5, 'Riot': 6}
XD_LABEL_MAP = {'A': 'Normal', 'B1': 'Fighting', 'B2': 'Shooting', 'B4': 'Riot', 'B5': 'Abuse', 'B6': 'Car Accident', 'G': 'Explosion'}

In [2]:
def configure_logging(log_path):
    if os.path.exists(log_path):
        os.remove(log_path)
    logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logging.info("Logging configured.")

In [3]:
def assign_labels(video_name):
    label_code = video_name.split('_label_')[1].replace('.mp4', '')
    raw_labels = label_code.split('-')
    primary_event_name = XD_LABEL_MAP.get(raw_labels[0], 'Normal')
    return LABEL_MAP.get(primary_event_name, 0)

In [4]:
def extract_audio_from_video(video_path, save_path, file_name, segment_duration):
    audio_file_path = os.path.join(save_path, f"{file_name}.wav")
    command = [
        'ffmpeg', '-y', '-i', video_path,
        '-ac', '1',
        '-ar', str(CONFIG['sample_rate']),
        '-acodec', 'pcm_s16le',
        audio_file_path
    ]
    
    try:
        logging.info(f"Extracting audio from {video_path} to {audio_file_path}")
        subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, check=True)
        
        if os.path.exists(audio_file_path):
            waveform, sample_rate = torchaudio.load(audio_file_path)
            segment_samples = int(segment_duration * sample_rate)
            num_segments = int(np.ceil(waveform.shape[1] / segment_samples))
            part_paths = []
            
            for part_num in range(num_segments):
                start = part_num * segment_samples
                end = min((part_num + 1) * segment_samples, waveform.shape[1])
                segment_waveform = waveform[:, start:end]
                
                # Apply energy-based segmentation
                energy = torch.sum(segment_waveform**2, dim=0)
                threshold = torch.mean(energy) * 1.5
                high_energy_indices = torch.where(energy > threshold)[0]
                
                if len(high_energy_indices) > 0:
                    start_idx = max(0, high_energy_indices[0] - 1000)  # 1000 samples buffer
                    end_idx = min(segment_waveform.shape[1], high_energy_indices[-1] + 1000)
                    segment_waveform = segment_waveform[:, start_idx:end_idx]
                
                part_file_name = f"{file_name}_Part_{part_num+1}"
                part_file_path = os.path.join(save_path, f"{part_file_name}.wav")
                torchaudio.save(part_file_path, segment_waveform, sample_rate)
                part_paths.append(part_file_path)
                logging.info(f"Saved audio segment: {part_file_path}")
            
            os.remove(audio_file_path)
            return part_paths
        else:
            logging.error(f"Failed to create audio file at {audio_file_path}")
            return []
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio from {video_path}: {e}")
        return []

In [5]:
def save_audio_with_labels(video_name, video_path, save_path, label, file_list, label_counters, n, segment_duration, processed_videos):
    if label == LABEL_MAP['Abuse']:
        logging.info(f"Skipping video {video_name} with label 'Abuse'")
        return
    
    if label_counters[label] >= n:
        return
    
    if video_name in processed_videos[label]:
        logging.info(f"Skipping duplicate video {video_name} for label {label}")
        return
    
    if not os.path.exists(video_path):
        logging.warning(f"Video file {video_name} does not exist, skipping.")
        return
    
    base_name = os.path.splitext(video_name)[0]
    audio_file_name = f"{base_name}_label_{label}"
    audio_segment_paths = extract_audio_from_video(video_path, save_path, audio_file_name, segment_duration)
    
    for audio_file_path in audio_segment_paths:
        part_file_name = os.path.basename(audio_file_path)
        file_list.append((part_file_name, audio_file_path, label))
    
    label_counters[label] += 1
    processed_videos[label].add(video_name)
    
    if label_counters[label] == n:
        print(f"Processing '{list(LABEL_MAP.keys())[list(LABEL_MAP.values()).index(label)]}' complete")

In [6]:
def main():
    configure_logging(CONFIG['logs'])
    
    label_counters_train = {label: 0 for label in LABEL_MAP.values()}
    label_counters_test = {label: 0 for label in LABEL_MAP.values()}
    processed_videos_train = {label: set() for label in LABEL_MAP.values()}
    processed_videos_test = {label: set() for label in LABEL_MAP.values()}
    
    train_data, test_data = [], []
    
    with open(CONFIG['train_videos_list'], 'r') as f:
        train_video_files = [line.strip() for line in f.readlines()][:CONFIG['num_train_videos']]
    
    with open(CONFIG['test_videos_list'], 'r') as f:
        test_video_files = [line.strip() for line in f.readlines()][:CONFIG['num_test_videos']]
    
    logging.info(f"Processing {len(train_video_files)} training videos and {len(test_video_files)} testing videos.")
    
    for video_name in tqdm(train_video_files, desc="Processing train videos"):
        video_path = os.path.join(CONFIG['train_videos_path'], video_name)
        label = assign_labels(video_name)
        save_audio_with_labels(
            video_name, video_path, CONFIG['train_save_path'], label,
            train_data, label_counters_train, CONFIG['n_train'],
            CONFIG['segment_duration'], processed_videos_train
        )
    
    for video_name in tqdm(test_video_files, desc="Processing test videos"):
        video_path = os.path.join(CONFIG['test_videos_path'], video_name)
        label = assign_labels(video_name)
        save_audio_with_labels(
            video_name, video_path, CONFIG['test_save_path'], label,
            test_data, label_counters_test, CONFIG['n_test'],
            CONFIG['segment_duration'], processed_videos_test
        )
    
    train_data = [(name, path.replace('\\', '/'), label) for name, path, label in train_data]
    test_data = [(name, path.replace('\\', '/'), label) for name, path, label in test_data]
    
    train_df = pd.DataFrame(train_data, columns=['audio', 'path', 'label'])
    test_df = pd.DataFrame(test_data, columns=['audio', 'path', 'label'])
    
    train_csv_path = os.path.join(CONFIG['train_save_path'], 'train_audio_labels.csv')
    test_csv_path = os.path.join(CONFIG['test_save_path'], 'test_audio_labels.csv')
    
    train_df.to_csv(train_csv_path, index=False)
    test_df.to_csv(test_csv_path, index=False)
    
    logging.info(f"Saved Train and Test DataFrames to {train_csv_path} and {test_csv_path}")
    print("\nFinished saving Train and Test audio with labels.")

In [7]:
if __name__ == "__main__":
    main()

Processing train videos:  13%|██████▉                                               | 545/4264 [03:13<16:27,  3.76it/s]

Processing 'Normal' complete


Processing train videos:  75%|███████████████████████████████████████▋             | 3197/4264 [08:53<01:18, 13.52it/s]

Processing 'Fighting' complete


Processing train videos:  82%|███████████████████████████████████████████▋         | 3512/4264 [09:20<01:19,  9.43it/s]

Processing 'Car Accident' complete


Processing train videos:  83%|███████████████████████████████████████████▊         | 3520/4264 [09:22<02:11,  5.65it/s]

Processing 'Riot' complete


Processing train videos:  96%|███████████████████████████████████████████████████  | 4110/4264 [09:42<00:12, 12.59it/s]

Processing 'Shooting' complete


Processing train videos: 100%|█████████████████████████████████████████████████████| 4264/4264 [09:46<00:00,  7.26it/s]
Processing test videos:  67%|█████████████████████████████████████▎                  | 324/486 [01:25<00:24,  6.63it/s]

Processing 'Fighting' complete


Processing test videos:  80%|████████████████████████████████████████████▋           | 388/486 [01:38<00:18,  5.18it/s]

Processing 'Riot' complete


Processing test videos:  87%|████████████████████████████████████████████████▋       | 422/486 [01:42<00:07,  9.10it/s]

Processing 'Car Accident' complete


Processing test videos:  92%|███████████████████████████████████████████████████▋    | 449/486 [01:44<00:02, 17.53it/s]

Processing 'Explosion' complete


Processing test videos: 100%|████████████████████████████████████████████████████████| 486/486 [01:45<00:00,  4.63it/s]

Processing 'Shooting' complete






Finished saving Train and Test audio with labels.
