In [1]:
import cv2
import os
import pandas as pd
import numpy as np
import tqdm
import zipfile

In [2]:
def get_frames_count(input_file):
    video = cv2.VideoCapture(input_file)
    frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    return frames

In [3]:
def get_frames(input_file, output_dir, frames_list, prefix):
    video = cv2.VideoCapture(input_file)
    for i in frames_list:
        video.set(1, i)
        retval, image = video.read()
        if retval:
            cv2.imwrite(f'{output_dir}{prefix}-{i}.jpg', image)

In [4]:
def split_list(input_list):
    train = input_list[:int(len(input_list)*0.6)]
    test = input_list[-int(len(input_list)*0.1):]
    val = input_list[-int(len(input_list)*0.3):]
    return train, test, val

In [5]:
def generate_frames_list(n, max_n, step_treshold, start, end, endpoint=False):
        while n < max_n:
            frames_range, step = np.linspace(start, end, num=n, endpoint=endpoint, dtype=int, retstep=True)
            if step < step_treshold: break
            n += 1
        return frames_range

In [6]:
data = pd.read_csv('data.csv')
data.set_index('video', inplace=True)

In [7]:
with zipfile.ZipFile("kaggle_archive.zip", 'r') as zip_ref:
    zip_ref.extractall('.')

os.rename("data", "dataset")

with zipfile.ZipFile("videos.zip", 'r') as zip_ref:
    zip_ref.extractall('dataset')

In [8]:
crashes = data[data['crash_happened'] == 1].index.values
no_crashes = data[data['crash_happened'] == 0].index.values

In [9]:
crash_train, crash_test, crash_val = split_list(crashes)
no_crash_train, no_crash_test, no_crash_val = split_list(no_crashes)

In [None]:
# Видео лежат в папке dataset/videos

skip_videos = (1, 3)
train = ()

for i in tqdm.tqdm(range(1, 51)):
    if i in skip_videos: continue
    
    if (i in crash_train) or (i in no_crash_train):
        path = f'dataset/train/'
    elif (i in crash_test) or (i in no_crash_test):
        path = f'dataset/test/'
    else:
        path = f'dataset/val/'
        
    video_data = data[data.index == i]
    crash = bool(video_data['crash_happened'].values)
    
    if crash:
        
        crash_start = int(video_data['crash_start'].values)
        
        # Начало столкновения - берем первые 10 кадров, т.к. самая динамическая часть
        frames_list = list(range(crash_start, crash_start + 10))
        get_frames(f'dataset/videos/{i}.mp4', f'{path}Accident/', frames_list, i)
        
        # Генерируем следующий набор кадров
        # Минимум - 10 кадров, максимум - 40 кадров
        # Минимальный шаг между кадрами - 15 кадров
        frames_list = generate_frames_list(10, 40, 15, crash_start + 10, int(video_data['collision_end'].values), endpoint=True)
        get_frames(f'dataset/videos/{i}.mp4', f'{path}Accident/', frames_list, i)
        
        # Переносим кадры без столкновения, если они есть
        if crash_start > 0:
            # Генерируем набор кадров
            # Максимум - 50 кадров, минимум - 25 кадров
            # Минимальный шаг между кадрами - 15 кадров
            frames_list = generate_frames_list(25, 50, 15, 0, crash_start)
            get_frames(f'dataset/videos/{i}.mp4', f'{path}Non Accident/', frames_list, i)

    # Если ДТП в видео отсутствует
    else:
        frames_list = generate_frames_list(25, 50, 15, 0, int(video_data['video_end'].values))
        get_frames(f'dataset/videos/{i}.mp4', f'{path}Non Accident/', frames_list, i)

 98%|█████████▊| 49/50 [05:12<00:06,  6.36s/it]