In [2]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
from google.colab import files
files.upload()

In [None]:
!pip install kaggle==1.5.12
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c g2net-gravitational-wave-detection -p /content/
!unzip -qq /content/g2net-gravitational-wave-detection.zip -d /content/g2net/

In [None]:
!pip install --upgrade git+git://github.com/Sunnesoft/g2net-challenge.git

In [None]:
import os
from tqdm import tqdm
from gwnet import GwTimeseries, GwSpectrogram

TRAIN_PATH = '/content/g2net/train/'
TRAIN_FILTERED_PATH = '/content/filtered/train/'
TRAIN_CQT_PATH = '/content/cqt/train/'
SAMPLE_RATE = 2048

createdf_count = 0
createdf_count_view = 0
createdf_count_step = 1000

for root, dirs, files in tqdm(os.walk(TRAIN_PATH)):
    rel_path = root.replace(TRAIN_PATH, '')
    out_path = os.path.join(TRAIN_FILTERED_PATH, rel_path)
    os.makedirs(out_path, exist_ok=True)

    for fname in files:
        in_fn = os.path.join(root, fname)
        out_fn = os.path.join(out_path, fname.split('.')[0] + '.npy')

        if os.path.exists(out_fn):
            continue

        tss = GwTimeseries.load(in_fn, SAMPLE_RATE)

        sps = []
        for ts in tss:
            f, Pxx = ts.psd(fftlength=ts.duration, nperseg=2048, overlap=0.75, window=('tukey', 0.5))
            ts.apply_window(window=('tukey', 0.1))
            ts.whiten(psd_val=(f, Pxx))
            ts.filter(frange=(50, 250),
                      psd_val=(f, Pxx),
                      outlier_threshold=3.0)

        GwTimeseries.save(out_fn, tss)
        createdf_count += 1

    if createdf_count > createdf_count_view:
        print(f'{createdf_count} files processed.')
        createdf_count_view += createdf_count_step

In [None]:
!zip -rq /content/filtered_train.zip /content/filtered/train/
!cp /content/filtered_train.zip /content/drive/MyDrive/g2net/filtered_train.zip

In [None]:
createdf_count = 0
createdf_count_view = 0
createdf_count_step = 1000

for root, dirs, files in tqdm(os.walk(TRAIN_FILTERED_PATH)):
    rel_path = root.replace(TRAIN_FILTERED_PATH, '')
    out_path = os.path.join(TRAIN_CQT_PATH, rel_path)
    os.makedirs(out_path, exist_ok=True)

    for fname in files:
        in_fn = os.path.join(root, fname)
        out_fn = os.path.join(out_path, fname.split('.')[0] + '.png')

        if os.path.exists(out_fn):
            continue

        tss = GwTimeseries.load(in_fn, SAMPLE_RATE)

        sps = []
        for ts in tss:
            sp = GwSpectrogram(ts)
            sp.cqt(out_time_range=(0, ts.duration, 1e-2),
                    out_freq_range=(50, 250, 5),
                    qrange=(1, 64),
                    qmismatch=0.05)
            sp.normalize()
            sps.append(sp)

        GwSpectrogram.save(out_fn, sps, size=(512, 512))
        createdf_count += 1

    if createdf_count > createdf_count_view:
        print(f'{createdf_count} files processed.')
        createdf_count_view += createdf_count_step

In [None]:
!zip -rq /content/cqt_train.zip /content/cqt/train/
!cp /content/cqt_train.zip /content/drive/MyDrive/g2net/cqt_train.zip