<a href="https://colab.research.google.com/github/Harin99/Event_Dataset_Preprocess/blob/main/DSEC_Training_Dataset_Preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import argparse
from pathlib import Path
import os
import urllib
import shutil
from typing import Union
import zipfile
from requests import get
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
from pandas import Series, DataFrame

## 1. DSEC Training Dataset Download 
1. Dataset Page Access 
2. Generate Directory that save the dataset 
3. Download & Unzip 

In [2]:
TRAIN_SEQUENCES = ['thun_00_a', 'zurich_city_01_a', 'zurich_city_02_a', 'zurich_city_02_c', 'zurich_city_02_d', 
                   'zurich_city_02_e', 'zurich_city_03_a', 'zurich_city_05_a', 'zurich_city_05_b', 'zurich_city_06_a', 
                   'zurich_city_07_a', 'zurich_city_08_a', 'zurich_city_09_a', 'zurich_city_10_a', 'zurich_city_10_b',
                  'zurich_city_11_a', 'zurich_city_11_b', 'zurich_city_11_c']

BASE_TRAIN_URL = 'https://download.ifi.uzh.ch/rpg/DSEC/train/'


In [3]:
def download(url: str, filepath: Path, skip: bool=True) -> bool:
    with open(str(filepath), 'wb') as fl:
        response = get(url)
        fl.write(response.content)

def unzip(file_path, output_path):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(output_path)
        os.remove(file_path)
    return output_path

In [None]:
output_dir = '/share/data/train'

for seq_name in TRAIN_SEQUENCES:
    # Directory Generation 
    seq_path = output_dir + '/' + seq_name
    os.makedirs(seq_path, exist_ok = True)
    os.makedirs(seq_path + '/' + 'events_left')
    os.makedirs(seq_path + '/' + 'optical_flow')
    
    # flow timestamp path 
    flow_name = seq_name + '_optical_flow_forward_timestamps.txt'
    flow_path = BASE_TRAIN_URL + seq_name + '/' + flow_name
    flow_file = seq_path + '/' + 'train_forward_flow_timestamps.txt'
    download(flow_path, flow_file)
    
    # event left path 
    event_name = seq_name + '_events_left.zip'
    event_path = BASE_TRAIN_URL + seq_name + '/' + event_name
    event_file = seq_path + '/' + 'events_left.zip'
    download(event_path, event_file)
    zip_output_path = seq_path + '/events_left/'
    unzip(event_file, zip_output_path)
    
    # image timestamps path 
    image_name = seq_name + '_image_timestamps.txt'
    image_path = BASE_TRAIN_URL + seq_name + '/' + image_name
    image_file = seq_path + '/' + 'image_timestamps.txt'
    download(image_path, image_file)
    
    # optical flow 
    flow_name = seq_name + '_optical_flow_forward_event.zip'
    flow_path = BASE_TRAIN_URL + seq_name + '/' + flow_name
    flow_file = seq_path + '/' + 'optical_flow.zip'
    download(flow_path, flow_file)
    zip_output_path = seq_path + '/optical_flow/'
    unzip(flow_file, zip_output_path)

In [None]:
print(flow_path)
print(event_path)
print(image_path)

## 2. Training Dataset ( flowtimestamp.txt -> .csv ) 
- File Index matching with Image 

In [None]:
train_dir = '/share/data/train/'
train_list = os.listdir(train_dir)
count = 0

In [None]:
for name in train_list:
    flow_ts_path = train_dir + name + '/train_forward_flow_timestamps.txt'
    image_ts_path = train_dir + name + '/image_timestamps.txt'
    flow_ts = open(flow_ts_path, 'r')
    flow_ts = flow_ts.readlines()
    image_ts = np.loadtxt(image_ts_path)
    # DataFrame 생성 
    df = pd.DataFrame(columns = {'# from_timestamp_us', 'to_timestamp_us', 'file_index'})
    for i in range(len(flow_ts)-1):
        from_us = int(flow_ts[i+1].split(',')[0])
        to_us = int(flow_ts[i+1].split(',')[1])
        image_ts_list = list(image_ts)
        idx = image_ts_list.index(from_us)
        df.loc[i] = [from_us, to_us, idx]
    # 생성된 Flow dataframe -> csv 로 save하기 
    output_dir = train_dir + name + '/train_forward_flow_timestamps.csv'
    df.to_csv(output_dir, index = False)
    ## Total 개수 합계
    csv = pd.read_csv(output_dir)
    count += len(csv)

In [None]:
## csv file Colume Order Change 

for name in train_list:
    output_dir = train_dir + name + '/train_forward_flow_timestamps.csv'
    csv = pd.read_csv(output_dir)
    csv = csv.rename(columns={'# from_timestamp_us': 'file_index', 'file_index': '# from_timestamp_us'})
    csv.to_csv(output_dir, index = False)

## 참고
- 위에서 다운받은 DSEC Training dataset sequence 에는 연속적이지않은 데이터가 포함되어있다. \
- 따라서 E-Raft Network에서 데이터셋을 불러올 때 연속적인 데이터를 불러올 수 있도록, 연속적이지않은 Sequence 의 경우에는 이를 또 세부적으로 연속적인 데이터들만 sub-sequence 로 분리하여 새로운 폴더를 생성해줄 필요가 있음 

## 3. Non-sequential Sequence Dataset Preprocess 

In [4]:
# Not release 