In [1]:
import segyio as sgy
import os
import numpy as np
import h5py
from sklearn.utils import shuffle


def sample_from_sgy(monitor_file, baseline, n_shots=3, total_shots=8):
    with sgy.open(monitor_file, strict=False, ignore_geometry=True) as data:
        shot_ids = np.arange(int(total_shots/n_shots/2), int(total_shots - total_shots/n_shots/2), int(total_shots/n_shots))
        if baseline is not None:
            sample = np.expand_dims(get_shot(data, shot_ids[0]), axis=2) - np.expand_dims(get_shot(baseline, shot_ids[0]), axis=2)
            for i in range(1, n_shots):
                sample = np.concatenate((sample, np.expand_dims(get_shot(data, shot_ids[i]), axis=2) \
                                 - np.expand_dims(get_shot(baseline, shot_ids[i]), axis=2)), axis=2)
        else:
            sample = np.expand_dims(get_shot(data, shot_ids[0]), axis=2)
            for i in range(1, n_shots):
                sample = np.concatenate((sample, np.expand_dims(get_shot(data, shot_ids[i]), axis=2)), axis=2) 
            
        # Normalize sample
        if sample.std() != 0.0:
            sample = sample - sample.mean()
            sample = sample / sample.std()
    return sample

In [2]:
def get_shot(f, i, traces_per_shot=128):
    array = np.zeros((f.trace[0].shape[0], 0))
    for j in range(0, int(traces_per_shot)):
        array = np.hstack([array, np.expand_dims(f.trace[i*traces_per_shot+j], axis=1)])
    return array

In [3]:
def target_from_model(monitor_file, baseline):
    if baseline is not None:
        return np.expand_dims(np.fromfile(monitor_file, dtype='float32').reshape((128, 128)) - baseline, axis=2)
    else:
        return np.expand_dims(np.fromfile(monitor_file, dtype='float32').reshape((128, 128)), axis=2)


def get_features(base_dir, monitor_files, baseline, n_shots=3, total_shots=8):
    samples = []
    for im, monitor_file in enumerate(monitor_files):
        print('Processing sample '+str(im)+'..', end='\r')
        samples.append(sample_from_sgy(os.path.join(base_dir, monitor_file), baseline, 
                                       n_shots=n_shots, total_shots=total_shots))
    return np.stack(samples, axis=0)


def get_targets(base_dir, monitor_files, baseline):
    targets = []
    for monitor_file in monitor_files:
        targets.append(target_from_model(os.path.join(base_dir, monitor_file), baseline))
    return np.stack(targets, axis=0)

In [4]:
def write_data_to_h5(filename, base_dir_seismic, monitor_files_seismic, base_dir_model, 
                                    monitor_files_model, baseline_seismic=None, baseline_model=None, 
                                            feature_shape=(751, 128, 3), target_shape=(128, 128, 1), n_shots=3, total_shots=8):
    if len(monitor_files_model) != len(monitor_files_seismic):
        raise ValueError('Need the same amount of models and seismic files')
    with h5py.File(filename, 'w') as f:
        maxshape_feature = (None,) + feature_shape
        maxshape_target = (None,) + target_shape
        features = f.create_dataset('features', (len(monitor_files_seismic),)+ feature_shape, maxshape=maxshape_feature, dtype=np.float32)
        targets = f.create_dataset('targets', (len(monitor_files_seismic),)+ target_shape, maxshape=maxshape_target, dtype=np.float32)

        for i in range(0, len(monitor_files_model)):
            print('Writing sample '+str(i)+'..', end='\r')
            feature = sample_from_sgy(base_dir_seismic+'/'+monitor_files_seismic[i], baseline_seismic, n_shots=n_shots, total_shots=total_shots)
            target = target_from_model(base_dir_model+'/'+monitor_files_model[i], baseline_model)
            features[i] = feature
            targets[i] = target




In [5]:
def generator_from_data_file(filename, batch_size=32):
    with h5py.File(filename, 'r') as h5f:
        shape = h5f['features'][0].shape
        size = h5f['features'].shape[0]
        start_index = 0
        while True:
            yield (h5f['features'][start_index:start_index+batch_size]), \
                    h5f['targets'][start_index:start_index+batch_size]
            start_index = start_index + batch_size
            # Reset generator when it reaches the end
            if start_index + batch_size >= size:
                start_index = 0