In [1]:
import os
import cv2
import numpy as np
import logging
import gc
import shutil
from tqdm import tqdm
import warnings

In [2]:
warnings.filterwarnings("ignore")

# Configuration Constants
CONFIG = {
    'train_videos_list': r'F:\SRC_Bhuvaneswari\typpo\ViVi\experimental\Annotations\Videolist\vtrainpadamlist.txt',
    'test_videos_list': r'F:\SRC_Bhuvaneswari\typpo\ViVi\experimental\Annotations\Videolist\vtestpadamlist.txt',
    'train_videos_path': r'F:\SRC_Bhuvaneswari\typpo\ViVi\Dataset\XD Violence\Train',
    'test_videos_path': r'F:\SRC_Bhuvaneswari\typpo\ViVi\Dataset\XD Violence\Test',
    'train_save_path': r'E:\SRC-Bhuvaneswari\ftrain',
    'test_save_path': r'E:\SRC-Bhuvaneswari\ftest',
    'annotations_path': r'F:\SRC_Bhuvaneswari\typpo\ViVi\experimental\Annotations\annotations.txt',
    'logs': r'F:\SRC_Bhuvaneswari\typpo\ViVi\experimental\Utilities\logs\XDViolence_vid_first.log',
    'num_train_videos': 1,  # Number of videos for training
    'num_test_videos': 5,    # Number of videos for testing
    'width': 224,
    'height': 224,
    'channels': 3,
    'ftp': 120,  # Frames to process (assuming 24 frames per second, equivalent to 2 seconds)
    'frameskip': 4  # Save only every nth frame
}

LABEL_MAP = {'Normal': 0, 'Abuse': 1, 'Explosion': 2, 'Fighting': 3, 'Car Accident': 4, 'Shooting': 5, 'Riot': 6}
XD_LABEL_MAP = {'A': 'Normal', 'B1': 'Fighting', 'B2': 'Shooting', 'B4': 'Riot', 'B5': 'Abuse', 'B6': 'Car Accident', 'G': 'Explosion'}

In [3]:
def configure_logging(log_path):
    if os.path.exists(log_path):
        os.remove(log_path)
    
    logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logging.info("Logging configured.")

In [4]:
def count_total_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        logging.error(f"Cannot open video file {video_path}")
        return 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return total_frames

In [5]:
def read_video_rgb_and_optical_flow(vid, width, height, resize=False, max_frames=1200, start_frame=0, video_num=0, total_videos=0):
    cap = cv2.VideoCapture(vid)
    if not cap.isOpened():
        logging.error(f"Cannot open video file {vid}")
        return

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    ret1, frame1 = cap.read()
    if not ret1:
        logging.error(f"Error reading the first frame of video {vid} starting at frame {start_frame}")
        cap.release()
        return

    if resize:
        frame1 = cv2.resize(frame1, (width, height))

    prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    hsv = np.zeros_like(frame1)
    hsv[..., 1] = 255

    rgb_frames = []
    optical_flow_frames = []
    frame_count = 0

    desc = f"{video_num}/{total_videos} Processing {os.path.basename(vid)}"
    for _ in tqdm(range(start_frame, total_frames - 1), unit="frames", desc=desc):
        ret2, frame2 = cap.read()
        if not ret2 or frame_count >= max_frames:
            break
        if resize:
            frame2 = cv2.resize(frame2, (width, height))
        next_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prvs, next_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        hsv[..., 0] = ang * 180 / np.pi / 2
        hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
        optical_flow_frame = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
        if frame2.shape == (height, width, 3):
            rgb_frames.append(frame2)
        if optical_flow_frame.shape == (height, width, 3):
            optical_flow_frames.append(optical_flow_frame)
        prvs = next_frame
        frame_count += 1

        if len(rgb_frames) >= max_frames:
            if not np.isnan(rgb_frames).any() and not np.isnan(optical_flow_frames).any():
                yield rgb_frames, optical_flow_frames, _ + 1
            else:
                logging.error(f"NaN values detected in video frames for video {vid}")
            rgb_frames = []
            optical_flow_frames = []

    if rgb_frames:
        if not np.isnan(rgb_frames).any() and not np.isnan(optical_flow_frames).any():
            yield rgb_frames, optical_flow_frames, total_frames
        else:
            logging.error(f"NaN values detected in video frames for video {vid}")

    cap.release()

In [6]:
def save_frames_as_images(rgb_frames, optical_flow_frames, labels, save_path, video_name, batch_num):
    """
    Save every RGB and optical flow frame as image files with labels embedded in the filename.
    """
    # Create directories for saving images if they don't exist
    rgb_save_dir = os.path.join(save_path, 'rgb_frames', video_name)
    flow_save_dir = os.path.join(save_path, 'flow_frames', video_name)
    
    os.makedirs(rgb_save_dir, exist_ok=True)
    os.makedirs(flow_save_dir, exist_ok=True)
    
    # Iterate through the frames and save each frame as an image
    for i, (rgb_frame, flow_frame, label) in enumerate(zip(rgb_frames, optical_flow_frames, labels)):
        # Ensure label is iterable (e.g., list)
        if isinstance(label, int):
            label = [label]  # Wrap integer label in a list
        
        label_str = "_".join(map(str, label))  # Concatenate labels to form a string
        rgb_filename = os.path.join(rgb_save_dir, f"rgb_{video_name}_batch_{batch_num}_frame_{i}_label_{label_str}.jpg")
        flow_filename = os.path.join(flow_save_dir, f"flow_{video_name}_batch_{batch_num}_frame_{i}_label_{label_str}.jpg")
        
        # Save the frames as images (JPEG)
        cv2.imwrite(rgb_filename, rgb_frame)
        cv2.imwrite(flow_filename, flow_frame)

    #print(f"Batch {batch_num}: Saved RGB frames and optical flow frames for video {video_name}.")

In [7]:
# Label assignment function for weakly supervised data
def assign_labels(video_name):
    # Extract label code from the video name
    label_code = video_name.split('_label_')[1].replace('.mp4', '')
    raw_labels = label_code.split('-')
    
    # Map the first label in `raw_labels` to its corresponding event name and label value
    primary_event_name = XD_LABEL_MAP.get(raw_labels[0], 'Normal')
    return LABEL_MAP.get(primary_event_name, 0)  # Defaults to 'Normal' if no match is found

In [8]:
def save_data(video_list, path_videos, save_path, max_frames=1200):
    total_videos = len(video_list)
    batch_num = 1  # Initialize batch number

    for video_num, video_name in enumerate(video_list, start=1):
        video_name = video_name.strip()
        video_path = os.path.join(path_videos, video_name)
        assigned_labels = assign_labels(video_name)
        total_frames = count_total_frames(video_path)

        # Skip the video if it has 0 frames or only 1 frame
        if total_frames <= 1:
            logging.error(f"Video {video_name} has {total_frames} frame(s). Skipping this video.")
            continue

        logging.info(f"Training video: {video_name}, Labels: {assigned_labels}")

        rgb_frames_total = []
        optical_flow_frames_total = []
        labels_total = []
        accumulated_frames = 0
        start_frame = 0

        while start_frame < total_frames:
            logging.info(f"Processing from frame {start_frame} to {total_frames}")
            frames_yielded = False
            for rgb_frames, optical_flow_frames, next_start_frame in read_video_rgb_and_optical_flow(video_path, CONFIG['width'], CONFIG['height'], resize=True, max_frames=max_frames - accumulated_frames, start_frame=start_frame, video_num=video_num, total_videos=total_videos):
                frames_yielded = True
                rgb_frames_total.extend(rgb_frames)
                optical_flow_frames_total.extend(optical_flow_frames)
                labels_total.extend([assigned_labels] * len(rgb_frames))
                accumulated_frames += len(rgb_frames)
                start_frame = next_start_frame

                if accumulated_frames >= max_frames:
                    save_frames_as_images(rgb_frames_total[:max_frames], optical_flow_frames_total[:max_frames], labels_total[:max_frames], save_path, video_name, batch_num)
                    rgb_frames_total = rgb_frames_total[max_frames:]
                    optical_flow_frames_total = optical_flow_frames_total[max_frames:]
                    labels_total = labels_total[max_frames:]
                    accumulated_frames -= max_frames
                    gc.collect()
                    batch_num += 1  # Increment batch number

            # If no frames were yielded, break the loop to avoid an infinite loop
            if not frames_yielded:
                logging.error(f"No frames yielded for video {video_name} starting from frame {start_frame}. Skipping the rest of this video.")
                break

        if rgb_frames_total:
            save_frames_as_images(rgb_frames_total, optical_flow_frames_total, labels_total, save_path, video_name, batch_num)
            gc.collect()
            batch_num += 1  # Increment batch number

In [9]:
def get_test_videos_and_annotations(annotations_path):
    test_videos = set()
    annotations = {}
    with open(annotations_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            video_name = parts[0]
            frames = list(map(int, parts[1:]))
            if video_name not in annotations:
                annotations[video_name] = []

            for i in range(1, len(frames), 2):
                start_frame = frames[i-1]
                end_frame = frames[i]
                labels = assign_labels(video_name)
                annotations[video_name].append((start_frame, end_frame, labels))

            test_videos.add(video_name)

    logging.info(f"Loaded {len(test_videos)} test videos with annotations.")
    return test_videos, annotations

In [10]:
def save_test_data_with_annotations(video_list, path_videos, save_path, annotations, max_frames=1200):
    total_videos = len(video_list)
    batch_num = 1  # Initialize batch number

    for video_num, video_name in enumerate(video_list, start=1):
        video_name = video_name.strip()
        base_name = os.path.splitext(video_name)[0]
        c_video_name = video_name.replace('.mp4', '')
        video_path = os.path.join(path_videos, video_name)
        logging.info(f"Processing video {video_name}.")

        total_frames = count_total_frames(video_path) - 1
        events = annotations.get(c_video_name, [])

        logging.info(f"Processing video: {video_name} with total frames: {total_frames} and intervals: {events}")

        # Create a list of all intervals, including normal intervals
        all_intervals = []
        prev_end = 0
        for (event_start_frame, event_end_frame, mapped_labels) in events:
            if event_start_frame > prev_end:
                all_intervals.append((prev_end, event_start_frame, LABEL_MAP['Normal']))
            all_intervals.append((event_start_frame, event_end_frame, mapped_labels))
            prev_end = event_end_frame
        if prev_end < total_frames:
            all_intervals.append((prev_end, total_frames, LABEL_MAP['Normal']))

        logging.info(f"Intervals for video {video_name}:")
        for (start_frame, end_frame, labels) in all_intervals:
            logging.info(f"  Start: {start_frame}, End: {end_frame}, Labels: {labels}")

        rgb_frames_total = []
        optical_flow_frames_total = []
        labels_total = []
        accumulated_frames = 0
        start_frame = 0

        for (event_start_frame, event_end_frame, mapped_labels) in all_intervals:
            logging.info(f"Test video: {video_name}, Frame interval: {event_start_frame}-{event_end_frame}, Labels: {mapped_labels}")

            frames_yielded = False
            while start_frame <= event_end_frame:
                frames_to_read = min(max_frames - accumulated_frames, event_end_frame - start_frame + 1)
                if frames_to_read <= 1:
                    logging.error(f"Skipping single frame at {start_frame} for video {video_name}")
                    start_frame += frames_to_read
                    continue
                for rgb_frames, optical_flow_frames, next_start_frame in read_video_rgb_and_optical_flow(video_path, CONFIG['width'], CONFIG['height'], resize=True, max_frames=frames_to_read, start_frame=start_frame, video_num=video_num, total_videos=total_videos):
                    frames_yielded = True
                    rgb_frames_total.extend(rgb_frames)
                    optical_flow_frames_total.extend(optical_flow_frames)
                    labels_total.extend([mapped_labels] * len(rgb_frames))
                    accumulated_frames += len(rgb_frames)
                    start_frame = next_start_frame

                    if accumulated_frames >= max_frames:
                        save_frames_as_images(rgb_frames_total[:max_frames], optical_flow_frames_total[:max_frames], labels_total[:max_frames], save_path, video_name, batch_num)
                        rgb_frames_total = rgb_frames_total[max_frames:]
                        optical_flow_frames_total = optical_flow_frames_total[max_frames:]
                        labels_total = labels_total[max_frames:]
                        accumulated_frames -= max_frames
                        gc.collect()
                        batch_num += 1  # Increment batch number

            # If no frames were yielded, break the loop to avoid an infinite loop
            if not frames_yielded:
                logging.error(f"No frames yielded for video {video_name} starting from frame {start_frame}. Skipping the rest of this video.")
                break

        if rgb_frames_total:
            save_frames_as_images(rgb_frames_total, optical_flow_frames_total, labels_total, save_path, video_name, batch_num)
            gc.collect()
            batch_num += 1  # Increment batch number

In [11]:
def delete_all_files(directory_path):
    try:
        for item in os.listdir(directory_path):
            item_path = os.path.join(directory_path, item)
            if os.path.isfile(item_path):
                os.remove(item_path)
                logging.info(f"Deleted file: {item}")
            elif os.path.isdir(item_path):
                shutil.rmtree(item_path)
                logging.info(f"Deleted folder: {item}")
            else:
                logging.error(f"{item} is neither a file nor a folder, skipping.")
        logging.info("All files and folders deleted successfully.")
    except Exception as e:
        logging.error(f"An error occurred while deleting files and folders: {e}")

In [12]:
def delete_lines_from_file(file_path, n):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    start_index = n
    end_index = len(lines)

    with open(file_path, 'w') as file:
        for line in lines[start_index:end_index]:
            file.write(line)

In [13]:
def main():
    configure_logging(CONFIG['logs'])
    test_videos, annotations = get_test_videos_and_annotations(CONFIG['annotations_path'])

    mode = input("Enter 'new' to prepare a new dataset or 'continue' to continue with the existing one: ").strip().lower()
    if mode == 'new':
        shutil.copyfile(CONFIG['train_videos_list'], CONFIG['train_videos_list'] + '_copy')
        shutil.copyfile(CONFIG['test_videos_list'], CONFIG['test_videos_list'] + '_copy')
        delete_all_files(CONFIG['train_save_path'])
        delete_all_files(CONFIG['test_save_path'])
        # Read train and test video file names from the text files
        with open(CONFIG['train_videos_list'], 'r') as f:
            train_video_files = [line.strip() for line in f.readlines()]

        with open(CONFIG['test_videos_list'], 'r') as f:
            test_video_files = [line.strip() for line in f.readlines()]

        train_video_files = [f for f in train_video_files if f not in test_videos][:CONFIG['num_train_videos']]
        test_video_files = [f for f in test_video_files][:CONFIG['num_test_videos']]
        logging.info(f"Number of training videos: {len(train_video_files)}")
        logging.info(f"Number of testing videos: {len(test_video_files)}")
        save_data(train_video_files, CONFIG['train_videos_path'], CONFIG['train_save_path'], CONFIG['ftp'])
        print("Finished saving Train files.")
        save_test_data_with_annotations(test_video_files, CONFIG['test_videos_path'], CONFIG['test_save_path'], annotations, CONFIG['ftp'])
        print("Finished saving Test files.")
        delete_lines_from_file(CONFIG['train_videos_list'] + '_copy', CONFIG['num_train_videos'])
        delete_lines_from_file(CONFIG['test_videos_list'] + '_copy', CONFIG['num_test_videos'])
    elif mode == 'continue':
        # Read train and test video file names from the text files
        with open(CONFIG['train_videos_list'] + '_copy', 'r') as f:
            train_video_files = [line.strip() for line in f.readlines()]

        with open(CONFIG['test_videos_list'] + '_copy', 'r') as f:
            test_video_files = [line.strip() for line in f.readlines()]

        train_video_files = [f for f in train_video_files if f not in test_videos][:CONFIG['num_train_videos']]
        test_video_files = [f for f in test_video_files][:CONFIG['num_test_videos']]
        logging.info(f"Number of training videos: {len(train_video_files)}")
        logging.info(f"Number of testing videos: {len(test_video_files)}")
        save_data(train_video_files, CONFIG['train_videos_path'], CONFIG['train_save_path'], CONFIG['ftp'])
        print("Finished saving Train files.")
        save_test_data_with_annotations(test_video_files, CONFIG['test_videos_path'], CONFIG['test_save_path'], annotations, CONFIG['ftp'])
        print("Finished saving Test files.")
        delete_lines_from_file(CONFIG['train_videos_list'] + '_copy', CONFIG['num_train_videos'])
        delete_lines_from_file(CONFIG['test_videos_list'] + '_copy', CONFIG['num_test_videos'])
    else:
        logging.error("Invalid input. Please enter 'new' or 'continue'.")
        exit()

In [14]:
if __name__ == "__main__":
    main()

Enter 'new' to prepare a new dataset or 'continue' to continue with the existing one:  new


1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   4%| | 120/3216 [00:01<00:45, 67.63frames/s]
1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   4%| | 120/3096 [00:01<00:43, 68.94frames/s]
1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   4%| | 120/2976 [00:01<00:39, 71.63frames/s]
1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   4%| | 120/2856 [00:01<00:38, 71.89frames/s]
1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   4%| | 120/2736 [00:01<00:35, 72.96frames/s]
1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   5%| | 120/2616 [00:01<00:33, 74.30frames/s]
1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   5%| | 120/2496 [00:01<00:31, 75.11frames/s]
1/1 Processing Fast.Furious.6.2013__#01-47-39_01-49-53_label_B1-B6-0.mp4:   5%| | 120/2376 [00:01<00:29, 76.14frames/s]
1/1 Processing Fast.Furious.6.2013__#01-

Finished saving Train files.


1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:   7%|█▉                         | 53/749 [00:00<00:07, 91.65frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:  10%|██▌                        | 67/696 [00:01<00:09, 66.87frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:  19%|████▉                     | 120/629 [00:01<00:06, 73.87frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:  24%|██████▏                   | 120/509 [00:01<00:05, 72.69frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:  18%|████▉                      | 71/389 [00:00<00:03, 91.94frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:  15%|████▏                      | 49/318 [00:00<00:04, 61.16frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:  45%|███████████▌              | 120/269 [00:01<00:01, 74.67frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G-0-0.mp4:  81%|████████████████████▉     | 120/149 [00:01<00:00, 69.46frames/s]
1/5 Processing v=JfLYNEsrTew__#1_label_G

Finished saving Test files.
