# Was?

**Stand: 17/06/19**

Erstellt ein Dataframe, welcher eine Liste aller zusammenängender Videos enthält. Soll ermöglichen daraus eine Timeline zuerstellen. Videos werden als zusammenhängend betrachtet, wenn sie weniger als 1 Sekunde auseinander liegen.

In [1]:
%matplotlib inline

import datetime as dt
import json
import os

import iso8601
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import modules.gtimeline_wrapper as gtwrapper

Laden der Video-Filenames und Sortierung.

In [2]:
names = []
with open("./raw_data/00_2016_video_names.txt", "r") as f:
    for line in f:
        names.append(line.strip('\n'))
names.sort()

# Parsen der Daten

In [3]:
df_files = pd.DataFrame(names, columns=['filename'])

In [4]:
def parse_start_time(row):
    cam_str, interval_str = os.path.splitext(row['filename'])[0].split('_')[1:3]

    time_start_str, time_end_str = interval_str.split('--')

    res = iso8601.parse_date(time_start_str)
    return res

def parse_end_time(row):
    cam_str, interval_str = os.path.splitext(row['filename'])[0].split('_')[1:3]

    time_start_str, time_end_str = interval_str.split('--')

    res = iso8601.parse_date(time_end_str)
    #res = int(cam_str)
    return res

def parse_cam_id(row):
    cam_str, interval_str = os.path.splitext(row['filename'])[0].split('_')[1:3]

    time_start_str, time_end_str = interval_str.split('--')

    res = int(cam_str)
    return res

In [5]:
df_files['time_start']= df_files.apply(parse_start_time, axis = 1)
df_files['time_end']= df_files.apply(parse_end_time, axis = 1)
df_files['cam_id']= df_files.apply(parse_cam_id, axis = 1)

## Bestimmen des zeitlichen Abstandes von aufeinaderfolgenden Videos

In [6]:
cam_0 = df_files[df_files['cam_id'] == 0].sort_values('filename')
cam_1 = df_files[df_files['cam_id'] == 1].sort_values('filename')
cam_2 = df_files[df_files['cam_id'] == 2].sort_values('filename')
cam_3 = df_files[df_files['cam_id'] == 3].sort_values('filename')

In [7]:
cam_0['diff_time'] = (cam_0['time_start'] - cam_0.shift(1)['time_end'])
cam_1['diff_time'] = (cam_1['time_start'] - cam_1.shift(1)['time_end'])
cam_2['diff_time'] = (cam_2['time_start'] - cam_2.shift(1)['time_end'])
cam_3['diff_time'] = (cam_3['time_start'] - cam_3.shift(1)['time_end'])

## Bestimmen von 'zusammenhängenden' Videos
Videos, welche einen zeitlichen Abstand geringer `< time_delta` zueinander haben, sollen als zusammenhängend betrachtet werden.

In [8]:
def get_time_intervalls(data_cam, **kwargs):
    df_intervals = []
    curr_time_start = None
    curr_filename_start = None
    curr_time_end = None
    curr_filename_end = None
    count = 0
    for row in data_cam.iterrows():
        row = row[1]
        if pd.isnull(row['diff_time']) and curr_time_start is None and curr_time_end is None:
            curr_time_start = row['time_start']
            curr_filename_start = row['filename']
            curr_time_end = row['time_end']
            curr_filename_end = row['filename']
        elif row['diff_time'] <= dt.timedelta(**kwargs):
            curr_time_end = row['time_end']
            curr_filename_end = row['filename']
        else:
            count += 1
            data = pd.DataFrame([[curr_time_start,curr_time_end, curr_filename_start, curr_filename_end]],
                                columns=['time_start','time_end', 'filename_start', 'filename_end'])
            df_intervals.append(data)
            curr_time_start = row['time_start']
            curr_filename_start = row['filename']
            curr_time_end = row['time_end']
            curr_filename_end = row['filename']
    data = pd.DataFrame([[curr_time_start,curr_time_end, curr_filename_start, curr_filename_end]],
                        columns=['time_start','time_end', 'filename_start', 'filename_end'])
    df_intervals.append(data)
    appended_data = pd.concat(df_intervals, axis=0, ignore_index=True)
    return appended_data

In [9]:
time_delta = {'seconds': 1}

In [10]:
cam_0_intervals = get_time_intervalls(cam_0, **time_delta)
cam_1_intervals = get_time_intervalls(cam_1, **time_delta)
cam_2_intervals = get_time_intervalls(cam_2, **time_delta)
cam_3_intervals = get_time_intervalls(cam_3, **time_delta)

In [22]:
cams_dfs = [cam_0, cam_1, cam_2, cam_3]

In [27]:
intervals_per_cam_dfs = []
for cam_df in cams_dfs:
    intervals_per_cam_dfs.append(get_time_intervalls(cam_df, **time_delta))

In [29]:
json_intervals = []
for cam_df in intervals_per_cam_dfs:
    json_interval = []
    for i, row in enumerate(cam_df.iterrows()):
        row = row[1]
        item = {
                "id":i,
                "start_video_name": row['filename_start'],
                "end_video_name": row['filename_end'],
            }
        json_interval.append(item)
    json_intervals.append(json_interval)

In [34]:
for i, cam_interval in enumerate(json_intervals):
    with open('./derived_data/01_create_video_timeline/Cam_{id}_intervals_time.json'.format(id=i), 'w') as fp:
        json.dump(cam_interval, fp)