In [1]:
!pip install -q nnAudio -qq
!pip install -q -U efficientnet -qq

import os
import gc
import pickle
import numpy as np
import pandas as pd
import time
import random
from random import shuffle
import math
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
from matplotlib.gridspec import GridSpec
import seaborn as sns

import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.preprocessing import sequence
from keras.utils import Sequence

from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Bidirectional
from keras.layers import BatchNormalization
from keras.layers import InputLayer
from keras.layers import Conv2D
from keras.layers import GlobalAveragePooling2D

from keras.layers.convolutional import Conv1D, Conv2D
from keras.layers.convolutional import MaxPooling1D

https://www.kaggle.com/mrigendraagrawal/tf-g2net-eda-and-starter

In [2]:
training = pd.read_csv('/kaggle/input/g2net-gravitational-wave-detection/training_labels.csv')
sample_submission = pd.read_csv('/kaggle/input/g2net-gravitational-wave-detection/sample_submission.csv')

In [3]:
def idx2path(idx: str, is_train: bool = True) -> str:
    if is_train:
        parent = '/kaggle/input/g2net-gravitational-wave-detection/train/'
    else:
        parent = '/kaggle/input/g2net-gravitational-wave-detection/test/'
    return os.path.join(parent, idx[0], idx[1], idx[2], idx + '.npy')

In [4]:
import torch
from nnAudio.Spectrogram import CQT1992v2

Q_TRANSFORM = CQT1992v2(sr = 2048, fmin = 20, fmax = 1024, hop_length = 64)

def transform(idx: str, is_train: bool = True) -> list:
    spectrograms = []
    waves = np.load(idx2path(idx, is_train))
    for i in range(3):
        wave = waves[i]
        wave = wave / np.max(wave)
        wave = torch.from_numpy(wave).float()
        spectrogram = Q_TRANSFORM(wave)
        spectrogram = np.array(spectrogram)
        spectrogram = np.squeeze(spectrogram)
        spectrogram = np.swapaxes(spectrogram,0,1)
        spectrograms.append(spectrogram)
    return spectrograms

CQT kernels created, time used = 0.0348 seconds




In [5]:
class Dataset(Sequence):
    def __init__(self, x: str, y = None, batch_size = 256, shuffle = True):
        '''x is array of id and y is array of the associated class.'''
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.train = True if y is not None else False
        
    def __len__(self):
        return math.ceil(len(self.x)/self.batch_size)
    
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size : (idx + 1) * self.batch_size]
        if self.train:
            batch_y = self.y[idx * self.batch_size : (idx + 1) * self.batch_size]
        list_x = np.array([transform(_x, self.train) for _x in batch_x])
        batch_X = np.stack(list_x)
        
        # Input for CNN
        batch_X = np.transpose(batch_X,(0,2,3,1))
        
        if self.train:
            return batch_X, batch_y
        else:
            return batch_X
    
    def on_epoch_end(self):
        '''Method called at the end of every epoch.'''
        if self.shuffle and self.train:
            x_y = list(zip(self.x, self.y))
            shuffle(x_y)
            self.x, self.y = list(zip(*x_y))

In [6]:
x = training['id'].values
y = training['target'].values
x_test = sample_submission['id'].values

In [7]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=42, stratify=y)

In [8]:
train_dataset = Dataset(x_train, y_train)
valid_dataset = Dataset(x_val, y_val)
test_dataset = Dataset(x_test)

In [9]:
train_dataset[0][0].shape

(256, 65, 69, 3)

In [10]:
import efficientnet.keras as efn

# EfficientNetB0
# weights='imagenet'

In [11]:
model=Sequential()

model.add(InputLayer(input_shape=(65,69,3)))
model.add(efn.EfficientNetB0(include_top=False, input_shape=(), weights='imagenet'))
model.add(GlobalAveragePooling2D())
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'AUC'])
print(model.summary())

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b0 (Functional) (None, None, None, 1280)  4049564   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                40992     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 4,090,589
Trainable params: 4,048,573
Non-trainable params: 42,016
_________________________________________________________________
None


In [12]:
chkpt = tf.keras.callbacks.ModelCheckpoint("efficientnet_weights.h5", save_best_only=True, save_weights_only=True,)

start_time = time.time()
train_history = model.fit(train_dataset,
                          use_multiprocessing=True, 
                          workers=4, 
                          epochs=3, 
                          validation_data=valid_dataset,
                          callbacks=[chkpt],)
end_time = time.time()

print('Model training took {} seconds'.format(end_time - start_time))

Epoch 1/3
Epoch 2/3
Epoch 3/3
Model training took 9851.65667772293 seconds


In [13]:
model.load_weights('efficientnet_weights.h5')

predictions = model.predict(test_dataset, use_multiprocessing=True, workers=4, verbose=1)
predictions = predictions.reshape(-1)
submission = pd.DataFrame({'id':sample_submission['id'], 'target':predictions})

submission.to_csv('efficientnet_submission.csv', index = False)

