In [1]:
from enum import Enum
from pathlib import Path
import numpy as np
import h5py
import sys 
sys.path.append('../../')

from source.helper import to_dB, print_spectogram, gen_spectogram, diff_frames, \
                          get_argmaxed_spectrogram, get_tresholded_spectogram,  \
                          get_spectrogram_metrics, plot_metrics, get_spectogram_slices

from tensorflow.keras.utils import to_categorical

class SampleType(Enum):
    BARTEK = 0
    KUBA = 1
    OSKAR = 2
    RAFAL = 3
    FAKE = 4
    NOISE = 5
    AUTO = 6
    WIATRAK = 7


In [2]:
BASE_DIR = Path("D:/SIIUM/3 semestr/RADAR/radar-project/data")  # change to your project dir

loaded_file = h5py.File(BASE_DIR / 'complete_dataset.h5', 'r') # best to make yourself a h5 file containing complete dataset

In [3]:
list(loaded_file['2023_05_08'].keys())

['background',
 'bartek',
 'kuba',
 'oddychacz_2m_o0_s100',
 'oddychacz_2m_o30_s0',
 'oddychacz_2m_o30_s100',
 'oddychacz_3m_o0_s100',
 'oddychacz_3m_o30_s0',
 'oddychacz_3m_o30_s100',
 'oddychacz_4m_o0_s100',
 'oddychacz_4m_o30_s0',
 'oddychacz_4m_o30_s100',
 'oskar',
 'rafal',
 'randomowe_chodzonko_bartek',
 'randomowe_chodzonko_kuba',
 'randomowe_chodzonko_oskar',
 'randomowe_chodzonko_rafal',
 'samochod',
 'wiatrak_2m',
 'wiatrak_3m',
 'wiatrak_4m',
 'wiatrak_foliarz_2m',
 'wiatrak_foliarz_3m',
 'wiatrak_foliarz_4m']

In [4]:
data_keys = list(loaded_file['2023_05_08'].keys())

dict_classes = {
        'background': SampleType.NOISE,
        'bartek': SampleType.BARTEK,
        'kuba': SampleType.KUBA,
        'oddychacz_2m_o0_s100': SampleType.FAKE,
        'oddychacz_2m_o30_s0': SampleType.FAKE,
        'oddychacz_2m_o30_s100': SampleType.FAKE,
        'oddychacz_3m_o0_s100': SampleType.FAKE,
        'oddychacz_3m_o30_s0': SampleType.FAKE,
        'oddychacz_3m_o30_s100': SampleType.FAKE,
        'oddychacz_4m_o0_s100': SampleType.FAKE,
        'oddychacz_4m_o30_s0': SampleType.FAKE,
        'oddychacz_4m_o30_s100': SampleType.FAKE,
        'oskar': SampleType.OSKAR,
        'rafal': SampleType.RAFAL,
        'randomowe_chodzonko_bartek': SampleType.BARTEK,
        'randomowe_chodzonko_kuba': SampleType.KUBA,
        'randomowe_chodzonko_oskar': SampleType.OSKAR,
        'randomowe_chodzonko_rafal': SampleType.RAFAL,
        'samochod': SampleType.AUTO,
        'wiatrak_2m': SampleType.WIATRAK,
        'wiatrak_3m': SampleType.WIATRAK,
        'wiatrak_4m': SampleType.WIATRAK,
        'wiatrak_foliarz_2m': SampleType.WIATRAK,
        'wiatrak_foliarz_3m': SampleType.WIATRAK,
        'wiatrak_foliarz_4m': SampleType.WIATRAK,
}


In [5]:
H5_FILENAME = 'slices_dataset'

try:
    h5file = h5py.File(H5_FILENAME + '.h5','r+')
except:
    print('File not exist, creating file')
    h5file = h5py.File(H5_FILENAME + '.h5','w')

samples_ds =  h5file.create_dataset(name='samples',
                                shape=(0, 64, 128, 8), # dim 0 so without first zero-initialized sample
                                maxshape=(None, 64, 128, 8))
labels_ds =  h5file.create_dataset(name='labels',
                                shape=(0,8),
                                maxshape=(None,8)
                                )

File not exist, creating file


In [6]:
margin = 500

for key in data_keys:

    print(key, dict_classes[key])

    sample_file = loaded_file['2023_05_08'][key]
    sample_file = sample_file[margin:-margin, 0, 0, :]
    print(sample_file.shape)
    slices_diff = []
    for d in [1,2,3,5,10,25,50,100]:
        frames_diff = diff_frames(sample_file, d)  #odejmujemy ostatnią klatkę
        diff_spect, y = gen_spectogram(frames_diff)
        diff_spect = diff_spect[:64] # 64 is max height of spectrogram

        diff_spectdb = to_dB(diff_spect)
        argmax_spectrogram = get_argmaxed_spectrogram(diff_spectdb)[:,100-d:] # this returns spectrogram with 1 at argmax, else 0
        slices_diff.append(argmax_spectrogram)
    # stack slices on new axis
    slices_diff = np.stack(slices_diff, axis=2)

    print(slices_diff.shape)
    class_id = dict_classes[key].value
    slices = get_spectogram_slices(slices_diff)
    print(slices.shape)
    labels = np.full(slices.shape[0], class_id)
    labels = to_categorical(labels, num_classes=8)
    print(labels.shape)

    # append to dataset
    index = len(samples_ds)
    add_size = slices.shape[0]
    samples_ds.resize((index + add_size, 64, 128, 8))
    labels_ds.resize((index + add_size, 8))
    samples_ds[-add_size:] = slices
    labels_ds[-add_size:] = labels
    
    print(f'dataset size after appending {dict_classes[key]}: {samples_ds.shape[0]}')


background SampleType.NOISE
(13060, 512)
(64, 12960, 8)
(201, 64, 128, 8)
(201, 8)
dataset size after appending SampleType.NOISE: 201
bartek SampleType.BARTEK
(86988, 512)
(64, 86888, 8)
(1356, 64, 128, 8)
(1356, 8)
dataset size after appending SampleType.BARTEK: 1557
kuba SampleType.KUBA
(86273, 512)
(64, 86173, 8)
(1345, 64, 128, 8)
(1345, 8)
dataset size after appending SampleType.KUBA: 2902
oddychacz_2m_o0_s100 SampleType.FAKE
(15655, 512)
(64, 15555, 8)
(242, 64, 128, 8)
(242, 8)
dataset size after appending SampleType.FAKE: 3144
oddychacz_2m_o30_s0 SampleType.FAKE
(14697, 512)
(64, 14597, 8)
(227, 64, 128, 8)
(227, 8)
dataset size after appending SampleType.FAKE: 3371
oddychacz_2m_o30_s100 SampleType.FAKE
(18259, 512)
(64, 18159, 8)
(282, 64, 128, 8)
(282, 8)
dataset size after appending SampleType.FAKE: 3653
oddychacz_3m_o0_s100 SampleType.FAKE
(14870, 512)
(64, 14770, 8)
(229, 64, 128, 8)
(229, 8)
dataset size after appending SampleType.FAKE: 3882
oddychacz_3m_o30_s0 SampleType

In [7]:
labels_ds[0]

array([0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)

In [8]:
h5file.close()

In [4]:
margin = 500

H5_FILENAME = 'slices_dataset'

try:
    h5file = h5py.File(H5_FILENAME + '.h5','r+')
except:
    print('File not exist, creating file')
    h5file = h5py.File(H5_FILENAME + '.h5','w')

for sample_type in set(dict_classes.values()):
    slices_list = []
    for key in [key for key in data_keys if dict_classes[key] == sample_type]:
        print(key, dict_classes[key])

        sample_file = loaded_file['2023_05_08'][key]
        sample_file = sample_file[margin:-margin, 0, 0, :]
        print(sample_file.shape)
        slices_diff = []
        for d in [1,2,3,5,10,25,50,100]:
            frames_diff = diff_frames(sample_file, d)  #odejmujemy ostatnią klatkę
            diff_spect, y = gen_spectogram(frames_diff)
            diff_spect = diff_spect[:64]

            diff_spectdb = to_dB(diff_spect)
            argmax_spectrogram = get_argmaxed_spectrogram(diff_spectdb)[:,100-d:] # this returns spectrogram with 1 at argmax, else 0
            slices_diff.append(argmax_spectrogram)
        # stack slices on new axis
        slices_diff = np.stack(slices_diff, axis=2)

        print(slices_diff.shape)
        slices = get_spectogram_slices(slices_diff, window_size=128)
        print(slices.shape) 
        slices_list.append(slices)

    slices = np.concatenate(slices_list, axis=0)
    h5file.create_dataset(sample_type.name, data = slices)

    

File not exist, creating file
rafal SampleType.RAFAL
(90091, 512)
(64, 89991, 8)
(1405, 64, 128, 8)
randomowe_chodzonko_rafal SampleType.RAFAL
(59351, 512)
(64, 59251, 8)
(924, 64, 128, 8)
background SampleType.NOISE
(13060, 512)
(64, 12960, 8)
(201, 64, 128, 8)
oskar SampleType.OSKAR
(98891, 512)
(64, 98791, 8)
(1542, 64, 128, 8)
randomowe_chodzonko_oskar SampleType.OSKAR
(60749, 512)
(64, 60649, 8)
(946, 64, 128, 8)
oddychacz_2m_o0_s100 SampleType.FAKE
(15655, 512)
(64, 15555, 8)
(242, 64, 128, 8)
oddychacz_2m_o30_s0 SampleType.FAKE
(14697, 512)
(64, 14597, 8)
(227, 64, 128, 8)
oddychacz_2m_o30_s100 SampleType.FAKE
(18259, 512)
(64, 18159, 8)
(282, 64, 128, 8)
oddychacz_3m_o0_s100 SampleType.FAKE
(14870, 512)
(64, 14770, 8)
(229, 64, 128, 8)
oddychacz_3m_o30_s0 SampleType.FAKE
(15269, 512)
(64, 15169, 8)
(236, 64, 128, 8)
oddychacz_3m_o30_s100 SampleType.FAKE
(13943, 512)
(64, 13843, 8)
(215, 64, 128, 8)
oddychacz_4m_o0_s100 SampleType.FAKE
(15772, 512)
(64, 15672, 8)
(243, 64, 128, 

In [5]:
h5file.close()


In [6]:
new_h5 = h5py.File('slices_dataset.h5', 'r')


In [12]:
ds = new_h5['AUTO']