# Cube conversion

This notebooks creates an optimized version of each `SEG-Y` cube.
The exact format (`HDF5`, `QHDF5`, `QSGY`) depends on `FORMAT` and `QUANTIZE` parameters.

Pseudocode of this notebook looks like:

```python
for each cube:
    mkdir
    infer geometry
    if SHOW, log to std.out
    
    convert segy to a desired QUANTIZED FORMAT
```

* `paths` controls which cubes are converted
* `RECREATE` determines whether already converted volumes should be re-converted
* `CONVERT` controls whether the cubes should be converted
* `FORMAT` and `QUANTIZE` determine the exact format to convert to
* `SHOW` allows to control whether results are shown in the notebook itself
* `DRY` can be used to check which operations will happen, without actually executing them

In [None]:
import os
import sys
import warnings
warnings.filterwarnings("ignore")

from glob import glob
import matplotlib.pyplot as plt

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
from seismiqb import Geometry
from batchflow import Notifier

In [None]:
# Global parameters
SEPARATOR = '▆'*60

RECREATE = False
SHOW = True
DRY_RUN = False

# Conversion options. Format should be one of {'qsgy', 'hdf5', 'qhdf5'}
CONVERT = True
FORMAT = 'qsgy'
POSTFIX = False
QUANTIZE = True
CONVERSION_KWARGS = {
    # 'chunk_size_divisor' : 3,     # keep smaller chunks in `hdf5`
}

CUBE = '*_*'

In [None]:
paths = sorted(glob(f'/data/seismic_data/seismic_interpretation/{CUBE}/*.s*y'))
[print(path) for path in paths[:]];

In [None]:
%%time
for path_cube in Notifier('n')(paths[:]):
    if not os.path.exists(path_cube):
        continue
    
    # Make an instance with no actual init
    geometry = Geometry.new(path=path_cube, init=False)
    path_converted = geometry.make_output_path(format=FORMAT, postfix=POSTFIX, quantize=QUANTIZE)

    if os.path.exists(path_converted) and not RECREATE:
        print(f'{path_converted} already exists, skipping\n')
        continue
    if DRY_RUN:
        print(f'Will convert ::: {path_cube}\nto           ::: {path_converted}\n')
        continue

    if SHOW:
        print(SEPARATOR); print(SEPARATOR);
        print('Working with', path_cube)

        
    # Re-open geometry, collect stats
    geometry = Geometry.new(path=path_cube,
                            index_headers=Geometry.INDEX_HEADERS_POSTSTACK,
                            additional_headers=Geometry.ADDITIONAL_HEADERS_POSTSTACK_FULL,
                            reload_headers=True,
                            collect_stats=True, recollect_stats=True)
    if SHOW:
        # Textual
        geometry.print()
        print()
        geometry.print_textual()

        # Graphs
        geometry.show('snr')
        plt.show()


    # Conversion
    if CONVERT is False:
        continue
    geometry_converted = geometry.convert(format=FORMAT, postfix=POSTFIX, quantize=QUANTIZE, **CONVERSION_KWARGS)
    if SHOW:
        geometry_converted.print()
    print('\n'*3)