# Data Analysis

## Environment setup

## Development setup

In [1]:
#@title Imports libraries
from statistics import mean
from random import randint
from pprint import pprint
from pathlib import Path
import cv2 as cv
import sys
import os

In [2]:
#@title System path manager
%reload_ext autoreload
%autoreload 2

project_name = "DeepLVioDeSurVideos"

working_directory = os.popen("echo $PWD").read().rstrip()

project_root_path = f"{working_directory}/{project_name}" \
    if project_name not in working_directory else working_directory

if str(project_root_path) not in sys.path:
    sys.path.insert(0, str(project_root_path))

pprint(f"Environment paths: {sys.path}")

("Environment paths: ['/workspaces/DeepLVioDeSurVideos', "
 "'/usr/lib/python311.zip', '/usr/lib/python3.11', "
 "'/usr/lib/python3.11/lib-dynload', '', "
 "'/usr/local/lib/python3.11/dist-packages', '/usr/lib/python3/dist-packages']")


In [3]:
#@title Imports project stuff
from src.data_handler.strategies.video_creator import OpenCVVideoCreator, DecordVideoCreator
from src.data_handler.strategies.class_names_finder import UniqueClassNamesFinder
from src.data_handler.strategies.file_path_finder import (
    RecursiveFilePathFinder,
    FilePathFinderByLoad
    )
from src.data_handler.data_splitter import DataSplitter
from configs import settings

## Dataset attribute analysis

In [4]:
file_extensions = ["avi", "mp4"]

file_path_finder = RecursiveFilePathFinder(file_extensions=file_extensions)
class_finder = UniqueClassNamesFinder()
# video_creator = DecordVideoCreator()
video_creator = OpenCVVideoCreator()

file_paths = file_path_finder.finds(Path(os.path.join(settings.DATASETS_PATH, f"{settings.DATASET_NAME}/")))
total_of_videos = len(file_paths)
video_extensions = {path.suffix for path in file_paths}
video_classes = class_finder.finds(file_paths)
video_per_class = {
    label: len(
        list(
            filter(
                lambda path: path.parent.name == label, file_paths
            )
        )
    ) for label in video_classes
}

print(
    f"Total of videos: {total_of_videos}",
    f"Extentions: {', '.join(video_extensions)}",
    f"Video classes: {', '.join(video_classes)}",
    f"Total of classes: {len(video_classes)}",
    sep="\n"
    )

pprint(
    {"Total of videos per class": video_per_class},
)

def gets_length(path: Path) -> int:
    video_creator.opens(path)
    return video_creator.gets_total_length()

frames_per_video = list(
    gets_length(path)
    for path in file_paths
)
overall_frame_rate = round(mean(frames_per_video))
higher_amount_of_frames = max(frames_per_video)
lowest_amount_of_frames = min(frames_per_video)

overall_frame_rate_per_class = {
    label: round(
        mean(
            gets_length(path) for path in list(
                filter(
                    lambda path: path.parent.name == label, file_paths
                )
            )
        )
    ) for label in video_classes
}

print(
    f"Overall frame rate: {overall_frame_rate}",
    ("Higher and lowest amount of frames: "
        f"{higher_amount_of_frames}, "
        f"{lowest_amount_of_frames}"),
    sep="\n"
)
pprint(
    {"Overall frame rate per class": overall_frame_rate_per_class},
)

Total of videos: 1000
Extentions: .mp4
Video classes: Abuse, Arrest, Arson, Assault, Burglary, Explosion, Fighting, Normal, RoadAccidents, Robbery, Shooting, Shoplifting, Stealing, Vandalism
Total of classes: 14
{'Total of videos per class': {'Abuse': 50,
                               'Arrest': 50,
                               'Arson': 50,
                               'Assault': 50,
                               'Burglary': 100,
                               'Explosion': 50,
                               'Fighting': 50,
                               'Normal': 50,
                               'RoadAccidents': 150,
                               'Robbery': 150,
                               'Shooting': 50,
                               'Shoplifting': 50,
                               'Stealing': 100,
                               'Vandalism': 50}}
Overall frame rate: 3791
Higher and lowest amount of frames: 141900, 104
{'Overall frame rate per class': {'Abuse': 3870,
     

## Single random sample analysis

In [5]:
index = randint(0, len(file_paths))

video_creator = DecordVideoCreator(required_length=60)
# video_creator = OpenCVVideoCreator(required_length=60)

video = video_creator.creates(file_paths[index])

print(video)

Name: RoadAccidents088_x264.mp4
Length: 60
Label: RoadAccidents
Path: /workspaces/DeepLVioDeSurVideos/src/assets/violence_detection_datasets/UCF-Crime (14)/RoadAccidents/RoadAccidents088_x264.mp4
Array Shape: (60, 240, 320, 3)


## Cross validation analysis

In [6]:
file_path_finder = RecursiveFilePathFinder(settings.SUPPORTED_VIDEO_EXTENSIONS)
dataset_paths = file_path_finder.finds(Path(f"{settings.DATASETS_PATH}/{settings.DATASET_NAME}"))
data_splitter = DataSplitter(dataset_paths, train_size=0.8, validation_size=0.2)

data_splitter.splits()