In [None]:
cd ..

In [None]:
import os

In [None]:
import numpy as np

In [None]:
import pandas as pd

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [None]:
from echofilter.rawloader import *

In [None]:
ROOT_DATA_DIR = '/media/scott/scratch/Datasets/dsforce'
ROOT_DATA_DIR = '/data/dsforce'

In [None]:
def load_transect_data(transect_pth, dataset='surveyExports', root_data_dir=ROOT_DATA_DIR):

    dirname = os.path.join(root_data_dir, dataset)
    raw_fname = os.path.join(dirname, transect_pth + '_Sv_raw.csv')
    bot_fname = os.path.join(dirname, transect_pth + '_bottom.evl')
    top_fname = os.path.join(dirname, transect_pth + '_turbulence.evl')

    timestamps, depths, signals = transect_loader(raw_fname)
    t_bot, d_bot = evl_loader(bot_fname)
    t_top, d_top = evl_loader(top_fname)

    return timestamps, depths, signals, np.interp(timestamps, t_top, d_top), np.interp(timestamps, t_bot, d_bot)

In [None]:
def load_transect_data2(survey, transect_name, dataset='surveyExports', root_data_dir=ROOT_DATA_DIR):

    return load_transect_data(
        os.path.join('Survey{}'.format(survey), 'Survey{}_{}'.format(survey, transect_name)),
        dataset=dataset,
        root_data_dir=root_data_dir,
    )

In [None]:
def plot_transect_data(transect_pth, dataset='surveyExports', root_data_dir=ROOT_DATA_DIR):

    timestamps, depths, signals, d_top, d_bot = load_transect_data(transect_pth, dataset, root_data_dir)

    plt.figure(figsize=(12, 12))
    plt.pcolormesh(timestamps, -depths, signals.T)
    plt.plot(timestamps, -d_bot, 'b')
    plt.plot(timestamps, -d_top, 'c')
    plt.show()

In [None]:
def plot_transect_data2(survey, transect_name, dataset='surveyExports', root_data_dir=ROOT_DATA_DIR):

    timestamps, depths, signals, d_top, d_bot = load_transect_data2(survey, transect_name, dataset, root_data_dir)

    plt.figure(figsize=(12, 12))
    plt.pcolormesh(timestamps, -depths, signals.T)
    plt.plot(timestamps, -d_bot, 'b')
    plt.plot(timestamps, -d_top, 'c')
    plt.show()

In [None]:
plot_transect_data('Survey17/Survey17_GR1_N0A_E')

In [None]:
survey = 17
transect_name = 'GR1_N0A_E'
plot_transect_data2(survey, transect_name)

In [None]:
def get_partition_data(
        partition, dataset='surveyExports', partitioning_version='firstpass', root_data_dir=ROOT_DATA_DIR,
    ):

    dirname = os.path.join(root_data_dir, dataset, 'sets', partitioning_version)
    fname_partition = os.path.join(dirname, partition + '.txt')
    fname_header = os.path.join(dirname, 'header' + '.txt')

    with open(fname_header, 'r') as hf:
        for row in csv.reader(hf):
            header = [entry.strip() for entry in row]
            break

    df = pd.read_csv(fname_partition, header=None, names=header)
    return df

In [None]:
get_partition_data('train')

In [None]:
def get_partition_list(
        partition,
        dataset='surveyExports',
        full_path=False,
        partitioning_version='firstpass',
        root_data_dir=ROOT_DATA_DIR,
    ):
    df = get_partition_data(
        partition,
        dataset=dataset,
        partitioning_version=partitioning_version,
        root_data_dir=root_data_dir,
    )
    fnames = df['Filename']
    fnames = [os.path.join(f.split('_')[0], f.strip().replace('_Sv_raw.csv', '')) for f in fnames]
    if full_path:
        fnames = [os.path.join(root_data_dir, dataset, f) for f in fnames]
    return fnames

In [None]:
get_partition_list('train')

In [None]:
get_partition_list('train', full_path=True)

In [None]:
print('{:<40s}{:>6s} {:>6s}'.format('Filename', 'Min D', 'Max D'))

for fname in sorted(get_partition_list('train', full_path=True)):
    fname = fname + '_bottom.evl'
    try:
        depths = evl_loader(fname)[1]
    except:
        continue
    print(
        '{:<40s}{:6.1f} {:6.1f}  {}'
        .format(os.path.split(fname)[1], min(depths), max(depths), '*' if max(depths) > 62 else '')
    )

In [None]:
print('{:<40s}{:>6s} {:>6s}'.format('Filename', 'Min D', 'Max D'))

for fname in sorted(get_partition_list('validate', full_path=True)):
    fname = fname + '_bottom.evl'
    try:
        depths = evl_loader(fname)[1]
    except:
        continue
    print(
        '{:<40s}{:6.1f} {:6.1f}  {}'
        .format(os.path.split(fname)[1], min(depths), max(depths), '*' if max(depths) > 62 else '')
    )

In [None]:
print('{:<40s}{:>6s} {:>6s}'.format('Filename', 'Min D', 'Max D'))

for fname in sorted(get_partition_list('test', full_path=True)):
    fname = fname + '_bottom.evl'
    try:
        depths = evl_loader(fname)[1]
    except:
        continue
    print(
        '{:<40s}{:6.1f} {:6.1f}  {}'
        .format(os.path.split(fname)[1], min(depths), max(depths), '*' if max(depths) > 62 else '')
    )

In [None]:
print('{:<40s}{:>6s} {:>6s}'.format('Filename', 'Min D', 'Max D'))

for fname in sorted(get_partition_list('leaveout', full_path=True)):
    fname = fname + '_bottom.evl'
    try:
        depths = evl_loader(fname)[1]
    except:
        continue
    print(
        '{:<40s}{:6.1f} {:6.1f}  {}'
        .format(os.path.split(fname)[1], min(depths), max(depths), '*' if max(depths) > 62 else '')
    )

In [None]:
# One weird survey
plot_transect_data('Survey17/Survey17_GR4_S3A_E')

In [None]:
plot_transect_data('Survey17/Survey17_GR1_S3W_F')

In [None]:
plot_transect_data('Survey03/Survey03_GR2_S1A_survey3')

In [None]:
40, 62, 96

In [None]:
plot_transect_data('Survey17/Survey17_GR1_S3W_F')

In [None]:
transect_pth = 'Survey17/Survey17_GR1_S3W_F'

In [None]:
timestamps, depths, signals, d_top, d_bot = load_transect_data(
    transect_pth, dataset='surveyExports', root_data_dir=ROOT_DATA_DIR
)

In [None]:
timestamps

In [None]:
depths

In [None]:
signals

In [None]:
d_top

In [None]:
d_bot

In [None]:
plt.figure(figsize=(12, 12))
plt.pcolormesh(timestamps[:128], -depths[:2000], signals[:128, :2000].T)
plt.plot(timestamps[:128], -d_bot[:128], 'b')
plt.plot(timestamps[:128], -d_top[:128], 'c')
plt.show()

In [None]:
dwn_sig = signals[:128, :2000].reshape(128, 200, 10).mean(-1).reshape(128, 200)

plt.figure(figsize=(12, 12))
plt.pcolormesh(timestamps[:128], -depths[:2000:10], dwn_sig.T)
plt.plot(timestamps[:128], -d_bot[:128], 'b')
plt.plot(timestamps[:128], -d_top[:128], 'c')
plt.show()

In [None]:
def shard_transect(transect_pth, dataset='surveyExports', max_depth=100, shard_len=128, root_data_dir=ROOT_DATA_DIR):
    root_shard_dir = os.path.join(root_data_dir, dataset + '_sharded')
    timestamps, depths, signals, d_top, d_bot = load_transect_data(transect_pth, dataset, root_data_dir)
    depth_mask = depths <= 100
    indices = range(128, signals.shape[0], 128)
    dirname = os.path.join(root_shard_dir, transect_pth)
    os.makedirs(dirname, exist_ok=True)
    with open(os.path.join(dirname, 'shard_size.txt'), 'w') as hf:
        print('{},{}'.format(len(timestamps), shard_len), file=hf)
    for i, (ts_i, sig_i, top_i, bot_i) in enumerate(
            zip(
                np.split(timestamps, indices),
                np.split(np.single(signals[:, depth_mask]), indices),
                np.split(np.single(d_top), indices),
                np.split(np.single(d_bot), indices),
            )
    ):
        os.makedirs(os.path.join(dirname, str(i)), exist_ok=True)
        for obj, fname in (
                (depths[depth_mask], 'depths'), (ts_i, 'timestamps'),
                (sig_i, 'Sv'), (top_i, 'top'), (bot_i, 'bottom')):
            obj.dump(os.path.join(dirname, str(i), fname + '.npy'))

In [None]:
def load_transect_from_shards(
        transect_pth, i1=0, i2=None, dataset='surveyExports', root_data_dir=ROOT_DATA_DIR
    ):
    root_shard_dir = os.path.join(root_data_dir, dataset + '_sharded')
    dirname = os.path.join(root_shard_dir, transect_pth)
    with open(os.path.join(dirname, 'shard_size.txt'), 'r') as f:
        n_timestamps, shard_len = f.readline().strip().split(',')
        n_timestamps = int(n_timestamps)
        shard_len = int(shard_len)
    if i2 is None: i2 = n_timestamps
    j1 = max(0, int(i1 / shard_len))
    j2 = int(min(i2, n_timestamps - 1) / shard_len)

    depths = np.load(os.path.join(dirname, str(j1), 'depths.npy'), allow_pickle=True)
    def load_shard(fname):
        return np.concatenate([
            np.load(os.path.join(dirname, str(j), fname + '.npy'), allow_pickle=True)
            for j in range(j1, j2+1)
        ])[(i1 - j1 * shard_len) : (i2 - j1 * shard_len)]
    timestamps = load_shard('timestamps')
    signals = load_shard('Sv')
    d_top = load_shard('top')
    d_bot = load_shard('bottom')

    return timestamps, depths, signals, d_top, d_bot

In [None]:
shard_transect(transect_pth)

In [None]:
o = load_transect_from_shards(transect_pth)
for io in o:
    print(io.shape)

In [None]:
o = load_transect_from_shards(transect_pth, 200, 500)
for io in o:
    print(io.shape)

In [None]:
timestamps, depths, signals, d_top, d_bot = load_transect_from_shards(transect_pth, 100, 800)

plt.figure(figsize=(12, 12))
plt.pcolormesh(timestamps, -depths, signals.T)
plt.plot(timestamps, -d_bot, 'b')
plt.plot(timestamps, -d_top, 'c')
plt.show()