# Playing with PoroTomo SEG-Y Data

These data are available from https://gdr.openei.org/submissions/980.

In [1]:
import numpy as np
import segyio
import re

One SEG-Y file is enough:

In [2]:
segy_file = '../../data/PoroTomo/SEG-Y/PoroTomo_iDAS16043_160326001145.sgy'

In [3]:
f = segyio.open(segy_file, mode='r', strict=False)

Header:

In [4]:
hdr = f.text[0].decode('ascii')
hdr

'C01 Client: Uni. Wisconsin                                                      C02 Field: PoroTomo                                                             C03 Fibre: Trenched Surface Fibre                                               C04 Data collected by Silixa iDAS, Distributed Fibre Optic Sensor               C05 iDAS S/N: iDAS16043                                                         C06 SEGY Format: Rev.1, IEEE 32bit float, big endian                            C07 Field Recording Filename: DAS__160326001145.tdms                            C08 Continuous acquisition data converted to SEGY                               C09 UTC Timestamp of first sample: 26-Mar-2016 00:11:45.133759819               C10                                                                             C11                                                                             C12                                                                             C13 Receiver positions are in true E, N

Parse the header into these "C" sections:

In [5]:
def header2dict(hdr):
    zz = [s.strip() for s in re.split('(C\d{2})', hdr)[1:]]
    return dict(zip(zz[:-1:2], zz[1::2]))

In [6]:
hdr = header2dict(hdr)
hdr

{'C01': 'Client: Uni. Wisconsin',
 'C02': 'Field: PoroTomo',
 'C03': 'Fibre: Trenched Surface Fibre',
 'C04': 'Data collected by Silixa iDAS, Distributed Fibre Optic Sensor',
 'C05': 'iDAS S/N: iDAS16043',
 'C06': 'SEGY Format: Rev.1, IEEE 32bit float, big endian',
 'C07': 'Field Recording Filename: DAS__160326001145.tdms',
 'C08': 'Continuous acquisition data converted to SEGY',
 'C09': 'UTC Timestamp of first sample: 26-Mar-2016 00:11:45.133759819',
 'C10': '',
 'C11': '',
 'C12': '',
 'C13': 'Receiver positions are in true E, N, Elevation (m)',
 'C14': 'Number of Traces: 8721',
 'C15': 'Samples Per Trace: 30000',
 'C16': 'Sampling Interval (us): 1000',
 'C17': 'Record Length (sec): 30',
 'C18': 'Measurement Units: Depths = Metres, Coordinates = Metres',
 'C19': '',
 'C20': 'Trace amplitude is proportional to fibre strain-rate',
 'C21': 'For comparison with conventional geophones it is recommended to',
 'C22': 'time-integrate this data',
 'C23': '',
 'C24': 'Trace Header Byte Positio

Try accessing some of the header fields:

In [7]:
hdr['C09']

'UTC Timestamp of first sample: 26-Mar-2016 00:11:45.133759819'

In [8]:
hdr['C14']

'Number of Traces: 8721'

_Important_ header fields:

In [9]:
{k: hdr[k] for k in ('C01', 'C02', 'C05', 'C14', 'C15', 'C16')}

{'C01': 'Client: Uni. Wisconsin',
 'C02': 'Field: PoroTomo',
 'C05': 'iDAS S/N: iDAS16043',
 'C14': 'Number of Traces: 8721',
 'C15': 'Samples Per Trace: 30000',
 'C16': 'Sampling Interval (us): 1000'}

Now, back to reading the actual DAS data...

In [10]:
f.tracecount

8721

In [11]:
f.samples

array([0.0000e+00, 1.0000e+00, 2.0000e+00, ..., 2.9997e+04, 2.9998e+04,
       2.9999e+04])

In [12]:
len(f.samples)

30000

In [13]:
f.trace

Trace(traces = 8721, samples = 30000)

In [14]:
f.trace.shape

30000

Converting the datetime of the first trace in the file to Python's datetime:

In [15]:
hdr['C09']

'UTC Timestamp of first sample: 26-Mar-2016 00:11:45.133759819'

In [16]:
dt_str = re.match('UTC Timestamp of first sample: (.+)$', hdr['C09']).group(1) + 'Z'
dt_str

'26-Mar-2016 00:11:45.133759819Z'

In [17]:
import dateutil

In [18]:
dt = dateutil.parser.parse(dt_str)

In [19]:
dt

datetime.datetime(2016, 3, 26, 0, 11, 45, 133759, tzinfo=tzutc())

In [20]:
dt.tzinfo

tzutc()

In [21]:
dt.timestamp()

1458951105.133759

In [22]:
from datetime import datetime

In [23]:
datetime.utcnow().isoformat(timespec='microseconds')

'2019-06-13T15:29:46.926235'

In [24]:
np.arange(f.tracecount, dtype=np.uint32)

array([   0,    1,    2, ..., 8718, 8719, 8720], dtype=uint32)

In [25]:
cvrc = np.genfromtxt('/Users/ajelenak/Downloads/Horizontal_DAS_DTS_UTM_LatLon_Coordinates.csv',
                     dtype=np.dtype('float32'),
                     skip_header=2,
                     delimiter=',',
                     usecols=(0, 1, 2, 3),
                     names=('channel', 'x', 'y', 'z'))

In [26]:
cvrc

array([( -20., 0., 0., 0.), ( -19., 0., 0., 0.), ( -18., 0., 0., 0.), ...,
       (8698., 0., 0., 0.), (8699., 0., 0., 0.), (8700., 0., 0., 0.)],
      dtype=[('channel', '<f4'), ('x', '<f4'), ('y', '<f4'), ('z', '<f4')])

In [27]:
cvrc.shape

(8721,)

In [28]:
cvrc[1967]

(1947., 328397.38, 4408088.5, 1241.899)

In [29]:
cvrc['channel'].astype(np.dtype('int16'))

array([ -20,  -19,  -18, ..., 8698, 8699, 8700], dtype=int16)

In [30]:
hdr['C16']

'Sampling Interval (us): 1000'

In [31]:
re.match('Sampling Interval \(us\): (\d+)$', hdr['C16']).group(1)

'1000'

In [32]:
x = segyio.tools.collect(f.trace[:])

In [33]:
x.shape

(8721, 30000)

In [34]:
(1, 2)[::-1]

(2, 1)

In [35]:
(f.trace.shape, f.tracecount)

(30000, 8721)

In [36]:
datetime.utcfromtimestamp(np.ceil(1458951105.133759)).isoformat(timespec='seconds')

'2016-03-26T00:11:46'

In [37]:
dt

datetime.datetime(2016, 3, 26, 0, 11, 45, 133759, tzinfo=tzutc())

In [38]:
zero_elems = np.where(cvrc['x'] == 0)
zero_elems

(array([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
          11,   12,   13,   14,   15,   16,   17,   18,   19,   20,   21,
          22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,
          33,   34,   35,   36,   37,   38,   39,   40,   41,   42,   43,
          44,   45,   46,   47,   48,   49, 8671, 8672, 8673, 8674, 8675,
        8676, 8677, 8678, 8679, 8680, 8681, 8682, 8683, 8684, 8685, 8686,
        8687, 8688, 8689, 8690, 8691, 8692, 8693, 8694, 8695, 8696, 8697,
        8698, 8699, 8700, 8701, 8702, 8703, 8704, 8705, 8706, 8707, 8708,
        8709, 8710, 8711, 8712, 8713, 8714, 8715, 8716, 8717, 8718, 8719,
        8720]),)

In [39]:
cvrc['x'][zero_elems] = np.nan

In [40]:
cvrc['x'][:51]

array([      nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan, 327809.78], dtype=float32)

In [41]:
cvrc[8670:]

array([(8650., 329135.4, 4408562.5, 1261.511),
       (8651.,      nan,       0. ,    0.   ),
       (8652.,      nan,       0. ,    0.   ),
       (8653.,      nan,       0. ,    0.   ),
       (8654.,      nan,       0. ,    0.   ),
       (8655.,      nan,       0. ,    0.   ),
       (8656.,      nan,       0. ,    0.   ),
       (8657.,      nan,       0. ,    0.   ),
       (8658.,      nan,       0. ,    0.   ),
       (8659.,      nan,       0. ,    0.   ),
       (8660.,      nan,       0. ,    0.   ),
       (8661.,      nan,       0. ,    0.   ),
       (8662.,      nan,       0. ,    0.   ),
       (8663.,      nan,       0. ,    0.   ),
       (8664.,      nan,       0. ,    0.   ),
       (8665.,      nan,       0. ,    0.   ),
       (8666.,      nan,       0. ,    0.   ),
       (8667.,      nan,       0. ,    0.   ),
       (8668.,      nan,       0. ,    0.   ),
       (8669.,      nan,       0. ,    0.   ),
       (8670.,      nan,       0. ,    0.   ),
       (8671.

In [42]:
np.nanmax(cvrc['x'])

329135.4

In [43]:
np.nanmin(cvrc['x'])

327805.47