# local_normalize

This script designed to normalize *seismo-like* datasets by per-channel absolute maximum.

*DATA* is an array of shape: `(record_number, sample, channel)`. <br>

In [42]:
import h5py as h5
import numpy as np

In [1]:
# Modifying sys.path to be able to load project packages
import sys
sys.path.append('../')

In [2]:
# Load project packages
from utils.h5_tools import write_batch

In [43]:
# Init parameters
path = 'data/2014_2019_global_norm.h5'
save_path = 'data/2014_2019_local_norm.h5'

data_key_stack = ['X']
label_key_stack = ['Y']
id_key_stack = ['ID']

batch_size = 20000

`data_key_stack` and `label_key_stack` are stacks of *.h5* file keys, leading to *DATA* and *LABELS* arrays. For exanple, file might have a structure like:

---
```
Noise
-- STATS
-- PICKS
---- X
---- Y
P-Waves
-- STATS
-- PICKS
---- X
---- Y
```
---
.. then `data_key_stack` might be `[Noise, PICKS, X]` or `[P-Waves, PICKS, X]`.

In [44]:
# Read data
f1 = h5.File(path, 'r')

X = f1
for k in data_key_stack:
    X = X[k]
    
Y = None
if label_key_stack:
    Y = f1
    for k in label_key_stack:
        Y = Y[k]
    
ID = None
if id_key_stack:
    ID = f1
    for k in id_key_stack:
        ID = ID[k]
        
f1_length = X.shape[0]
f1_samples = X.shape[1]
f1_channels = X.shape[2]

In [45]:
# Convert data
batch_count = f1_length // batch_size
last_batch = f1_length % batch_size

if last_batch:
    batch_count += 1
    
for b in range(batch_count):
    
    c_batch_size = batch_size
    if b == batch_count - 1:
        c_batch_size = last_batch
        
    start_pos = batch_size * b
    
    c_X = X[start_pos : start_pos + c_batch_size]
    
    for i in range(c_batch_size):
        
        for j in range(f1_channels):
            
            loc_max = np.max(np.abs(c_X[i, :, j]))
            c_X[i, :, j] /= loc_max
    
    write_batch(save_path, data_key_stack[-1], c_X)
    
    if Y:
        c_Y = Y[start_pos : start_pos + c_batch_size]
        write_batch(save_path, label_key_stack[-1], c_Y)
        
    if ID:
        c_ID = ID[start_pos : start_pos + c_batch_size]
        write_batch(save_path, id_key_stack[-1], c_ID, string = True)

In [46]:
# Close files
f1.close()