In [1]:
import pandas as pd
import numpy as np
import pyedflib
import cv2
import os

In [2]:
def get_videoInfo(path):
    cap = cv2.VideoCapture(path)
    
    info = dict()
    info['length'] = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    info['fps'] = int(cap.get(cv2.CAP_PROP_FPS))
    info['height'] = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))    
    info['width'] = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    info['path'] = path
    
    cap.release()
    
    return info

def get_ecgIfno(path):
    bdf_data = pyedflib.EdfReader(path)
    index = bdf_data.getSignalLabels().index('EXG2')
    info = bdf_data.getSignalHeader(index)
    
    assert(bdf_data.getLabel(index) == 'EXG2')
    assert(info['sample_rate'] == bdf_data.getSampleFrequency(index))
    
    info['length'] = bdf_data.samples_in_file(index)
    info['path'] = path
    bdf_data.close()
    
    return info

In [3]:
video_columns = ['no', 'length', 'fps', 'height', 'width', 'path', 'type']

signal_columns = ['no', 'label', 'dimension', 'length', 'sample_rate', 'path', 'signal_path', \
                  'prefilter', 'physical_max', 'physical_min', \
                  'digital_max', 'digital_min', 'transducer', 'type']

video_dataset = pd.DataFrame(columns=video_columns)
signal_dataset = pd.DataFrame(columns=signal_columns)
names = ['train', 'val', 'test']

In [4]:
numbers = list()
for name in names:
    for folder in os.listdir(os.path.join(os.getcwd(), 'data', name)):
        numbers.append(int(folder))

no2id = dict([(number, i) for i, number in enumerate(sorted(numbers))])

In [5]:
for name in names:
    base = os.path.join(os.getcwd(), 'data', name)

    for folder in os.listdir(base):
        video_length, ecg_path = None, None
        
        for file in os.listdir(os.path.join(base, folder)):
            if file.endswith('avi'):
                video_length = os.path.join(base, folder, file)
            elif file.endswith('bdf'):
                ecg_path = os.path.join(base, folder, file)
        
        if video_length is None or ecg_path is None:
            continue
        
        no = int(folder)
        
        video_info = get_videoInfo(video_length)
        ecg_info = get_ecgIfno(ecg_path)
        
        video_info.update({'no' : no, 'type': name})
        ecg_info.update({'no' : no, 'type': name, 'signal_path': os.path.join(os.getcwd(), 'data', name, folder, 'EXG2.npy') })
        
        video_dataset.loc[no2id[no]] = video_info
        signal_dataset.loc[no2id[no]] = ecg_info

In [6]:
sorted_video_dataset = video_dataset.sort_index(ascending=True)
sorted_signal_dataset = signal_dataset.sort_index(ascending=True)

In [7]:
base = os.path.join(os.getcwd(), 'data')

sorted_video_dataset.to_csv(os.path.join(base, 'video_info.csv'), index=False)
sorted_signal_dataset.to_csv(os.path.join(base, 'signal_info.csv'), index=False)