# Example

This example only loads the datasets defined in the `_defaults.py`.

It is advised to look into the `_defaults.py`.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from _defaults import init_notebook
from tabulate import tabulate

In [3]:
init_notebook()

## Data loading examples

Getting the current data hash key.

In [4]:
from _defaults import get_data_hash_key

In [5]:
hash_data = get_data_hash_key()

In [6]:
print(hash_data)

098bae95


### Get data chunk sizes

In [7]:
from _defaults import datasets_labels, load_datasets_chunk_lengths

In [8]:
dataset_lengths = load_datasets_chunk_lengths()

[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:537][0m  7 meta data files found.
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_1-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_2-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_3-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_4-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_5-meta.pkl'...
[[1;32mINFO

In [9]:
print(tabulate(zip(datasets_labels, dataset_lengths, strict=True), headers=["Dataset", "Chunk lengths"]))

Dataset       Chunk lengths
------------  -----------------------------------
Fernverkehr   [843, 817, 764, 725, 801, 866, 756]
Güterzüge     [908, 679]
Regioverkehr  [727, 725, 741, 729, 858, 468]


### Load unballanced dataset

In [10]:
from _defaults import load_datasets, print_db_length_table

In [11]:
datasets = load_datasets()

Load previous cached dataset from 'data/datasets-098bae95.pkl' file.


In [12]:
print_db_length_table(datasets)

Dataset         Length
------------  --------
Fernverkehr       5572
Güterzüge         1587
Regioverkehr      4248


Ballance an unballance dataset

In [13]:
from _defaults import ballance_data, print_db_length_table

In [14]:
datasets, (dataset, metadata, targets) = ballance_data(datasets)

In [15]:
print_db_length_table(datasets)

Dataset         Length
------------  --------
Fernverkehr       1587
Güterzüge         1587
Regioverkehr      1587


### Load ballanced dataset

In [16]:
from _defaults import load_ballance_datasets, print_db_length_table

In [17]:
datasets, (dataset, metadata, targets) = load_ballance_datasets()

Load previous cached dataset from 'data/datasets-098bae95.pkl' file.


In [18]:
print_db_length_table(datasets)

Dataset         Length
------------  --------
Fernverkehr       1587
Güterzüge         1587
Regioverkehr      1587


### Load ballanced dataset partially

In [19]:
from _defaults import load_ballance_datasets, print_db_length_table

In [20]:
datasets, (dataset, metadata, targets) = load_ballance_datasets(max_length=200)

Load previous cached dataset from 'data/datasets-098bae95.pkl' file.


In [21]:
print_db_length_table(datasets)

Dataset         Length
------------  --------
Fernverkehr        200
Güterzüge          200
Regioverkehr       200


In [22]:
print_db_length_table(targets)

Dataset         Length
------------  --------
Fernverkehr        200
Güterzüge          200
Regioverkehr       200


## Constants and Variables

In [23]:
from _defaults import (
    CACHING_ENABLED,
    SEED,
    dataset_file_directory,
    datasets_filenames,
    datasets_labels,
    normalize,
    raw_data,
    resample_size,
)

In [24]:
print(SEED)
print(CACHING_ENABLED)

42
True


In [25]:
print(dataset_file_directory)

/home/lab/slki/Dataset/preprecessed/v4


In [26]:
print(raw_data)
print(resample_size)
print(normalize)

False
1000
mone_one_zero_fix


In [27]:
print(datasets_filenames)
print(datasets_labels)

['fernverkehr-points-kionix-sh-z-dt%(chunk?).pkl', 'gueterzuege-points-kionix-sh-z-dt%(chunk?).pkl', 'regioverkehr-points-kionix-sh-z-dt%(chunk?).pkl']
['Fernverkehr', 'Güterzüge', 'Regioverkehr']


## Using the data (directly)

In [28]:
for data, target, meta in zip(dataset, targets, metadata, strict=False):
    signal = data[: meta["sample_length"]] if raw_data else data
    print(data.shape)
    print(target)
    print(meta.keys())
    break

(1000,)
2
dict_keys(['sample_length', 'start_time', 'end_time', 'sample_rate_in_hz', 'origin'])
