In [1]:
import os
import timeit
import ipywidgets

filename = '../../input/train.csv'
event_points_file= '../../src/event_points.txt'

In [2]:
with open(event_points_file, 'r') as fin:
    event_points = [int(line.strip()) for line in fin]
    event_points = sorted(event_points)
    
print(event_points)

[5656575, 50085879, 104677357, 138772454, 187641821, 218652631, 245829586, 307838918, 338276288, 375377849, 419368881, 461811624, 495800226, 528777116, 585568145, 621985674, 629145481]


In [3]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )

In [4]:
def export(input_stream, filename_out, linecount, header=None, show_progress=False):
#     try:
#         os.remove(filename_out)
#     except FileNotFoundError:
#         pass
#     print(input_stream, filename_out, linecount, header)
#     return 0
    with open(filename_out, 'w') as fout:
        progvar = 0
        if header:
            fout.write(header)
        all_items = range(linecount)
        if show_progress:
            all_items = log_progress(all_items, every=65432, size=linecount)
        for i in all_items:
#         for idx, line in log_progress(input_stream, every=100, size=event_points[-1]):
            fout.write(next(fin))


with open(filename, 'r') as fin:
    header = fin.readline()
    previous_event_time = 1
    for idx, event_time in enumerate(event_points):
        filename_out = f'{os.path.splitext(filename)[0]}_{idx:02d}.csv'
        segment_length = event_time - previous_event_time
        export(fin, filename_out, segment_length, header=header, show_progress=True)
        previous_event_time = event_time

VBox(children=(HTML(value=''), IntProgress(value=0, max=5656574)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=44429304)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=54591478)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=34095097)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=48869367)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=31010810)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=27176955)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=62009332)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=30437370)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=37101561)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=43991032)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=42442743)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=33988602)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=32976890)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=56791029)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=36417529)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=7159807)))

In [15]:
import subprocess

def head_or_tail(is_head, f, n=1, offset=0, **_):
    cmd = ['tail', 'head'][is_head]
    command = f'{cmd} -n {n + offset} {f}'
    proc = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, universal_newlines=True)
    return [line.strip() for line in proc.stdout]


def tail(**kwargs):
    return head_or_tail(is_head=False, **kwargs)


def head(**kwargs):
    return head_or_tail(is_head=True, **kwargs)

files = f'{os.path.splitext(filename)[0]}_*'
res = tail(f=files)
print('\n'.join(res))

==> ../../input/train_00.csv <==
4,0.00079547982295

==> ../../input/train_01.csv <==
8,0.00069548217471

==> ../../input/train_02.csv <==
6,0.00079548506392

==> ../../input/train_03.csv <==
3,0.0010954868681

==> ../../input/train_04.csv <==
7,0.0005954894541

==> ../../input/train_05.csv <==
5,0.0010954910954

==> ../../input/train_06.csv <==
1,0.00049549253299

==> ../../input/train_07.csv <==
7,0.00069549581485

==> ../../input/train_08.csv <==
4,0.00019549742592

==> ../../input/train_09.csv <==
0,0.00049549938876

==> ../../input/train_10.csv <==
9,0.00019550171692

==> ../../input/train_11.csv <==
5,9.5503963166e-05

==> ../../input/train_12.csv <==
1,9.5505761692e-05

==> ../../input/train_13.csv <==
5,0.00089550750727

==> ../../input/train_14.csv <==
6,0.00059551051299

==> ../../input/train_15.csv <==
10,0.0005955124393

==> ../../input/train_16.csv <==
5,9.7597955148


In [17]:
res = head(f=files, n=2)
print('\n'.join(res))

==> ../../input/train_00.csv <==
acoustic_data,time_to_failure
12,1.4690999832

==> ../../input/train_01.csv <==
acoustic_data,time_to_failure
4,11.540799987

==> ../../input/train_02.csv <==
acoustic_data,time_to_failure
1,14.18059999

==> ../../input/train_03.csv <==
acoustic_data,time_to_failure
4,8.8566999914

==> ../../input/train_04.csv <==
acoustic_data,time_to_failure
-4,12.693999994

==> ../../input/train_05.csv <==
acoustic_data,time_to_failure
2,8.0554999956

==> ../../input/train_06.csv <==
acoustic_data,time_to_failure
4,7.058999997

==> ../../input/train_07.csv <==
acoustic_data,time_to_failure
2,16.1074

==> ../../input/train_08.csv <==
acoustic_data,time_to_failure
2,7.9056000019

==> ../../input/train_09.csv <==
acoustic_data,time_to_failure
3,9.6371000039

==> ../../input/train_10.csv <==
acoustic_data,time_to_failure
0,11.426400006

==> ../../input/train_11.csv <==
acoustic_data,time_to_failure
-3,11.024200008

==> ../../input/train_12.csv <==
acoustic_data,time_to_f