In [None]:
!pip install -U efficientnet
!pip install -q nnAudio

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
import tensorflow as tf
import keras
import keras.layers as L
import math
from keras.utils import Sequence
from keras.preprocessing import image
import efficientnet.keras as efn
from nnAudio.Spectrogram import CQT1992v2
import torch
from random import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [None]:
train_labels = pd.read_csv('../input/g2net-gravitational-wave-detection/training_labels.csv')
sample_submission = pd.read_csv('../input/g2net-gravitational-wave-detection/sample_submission.csv')
train_labels.head()

In [None]:
sns.countplot(x='target', data=train_labels,palette='Set2')

In [None]:
def getTrainPathById(idx) -> str:
    return '../input/g2net-gravitational-wave-detection/train/' + idx[0] + '/' + idx[1] + '/' + idx[2] + '/' + idx + '.npy'

def getTestPathById(idx) -> str:
    return '../input/g2net-gravitational-wave-detection/test/' + idx[0] + '/' + idx[1] + '/' + idx[2] + '/' + idx + '.npy'

In [None]:
def increase_dimension(idx, is_train, transform=CQT1992v2(sr=2048, fmin=20, fmax=1024, hop_length=64)): # in order to use efficientnet we need 3 dimension images
    waves = np.load(getTrainPathById(idx)) if is_train else np.load(getTestPathById(idx))
    waves = np.hstack(waves)
    waves = waves / np.max(waves)
    waves = torch.from_numpy(waves).float()
    image = transform(waves)
    image = np.array(image)
    image = np.transpose(image, (1,2,0))
    return image

In [None]:
example = np.load('../input/g2net-gravitational-wave-detection/train/0/0/0/00000e74ad.npy')
#print(example)
fig,a =  plt.subplots(3, 1)
a[0].plot(example[0], color='red')
a[1].plot(example[1], color='green')
a[2].plot(example[2], color='blue')
fig.suptitle('Target 1', fontsize=16)
plt.show()
plt.imshow(increase_dimension(train_labels['id'][0], is_train=True))
plt.show()

In [None]:
class Dataset(Sequence):
    def __init__(self, idx, y=None, batch_size=256, shuffle=True):
        self.idx = idx
        self.batch_size = batch_size
        self.shuffle = shuffle
        if y is not None:
            self.is_train=True
        else:
            self.is_train=False
        self.y = y
        
    def __len__(self):
        return math.ceil(len(self.idx) / self.batch_size)
    
    def __getitem__(self, ids):
        batch_ids = self.idx[ids * self.batch_size: (ids + 1) * self.batch_size]
        if self.y is not None:
            batch_y = self.y[ids * self.batch_size: (ids + 1) * self.batch_size]
            
        list_x = np.array([increase_dimension(x, self.is_train) for x in batch_ids])
        batch_X = np.stack(list_x)
        if self.is_train:
            return batch_X, batch_y
        else:
            return batch_X
    
    def on_epoch_end(self):
        if self.shuffle and self.is_train:
            ids_y = list(zip(self.idx, self.y))
            shuffle(ids_y)
            self.idx, self.y = list(zip(*ids_y))

In [None]:
train_idx =  train_labels['id'].values
y = train_labels['target'].values
test_idx = sample_submission['id'].values

In [None]:
x_train, x_valid, y_train, y_valid = train_test_split(train_idx, y, test_size=0.05, random_state=42, stratify=y)

In [None]:
train_dataset = Dataset(x_train, y_train)
valid_dataset = Dataset(x_valid, y_valid)
test_dataset = Dataset(test_idx)

In [None]:
model = tf.keras.Sequential(
    [
        L.InputLayer(input_shape=(69, 193, 1)),
        L.Conv2D(3, 3, activation='relu', padding='same'),
        efn.EfficientNetB2(include_top=False, input_shape=(), weights='imagenet'),
        L.GlobalAveragePooling2D(),
        L.Dropout(0.),
        L.Dense(32, activation='relu'),
        L.Dense(1, activation='sigmoid')
    ]
)

model.summary()
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0009),
              loss='binary_crossentropy', metrics=[keras.metrics.AUC()])

In [None]:
gpus = tf.config.list_physical_devices('GPU')
print("GPUs available: ", gpus)
if gpus:
    print("Training on device: ", gpus[0])
    with tf.device("/GPU:0"):
        model.fit(train_dataset, epochs=3, validation_data=valid_dataset)
else:
    print("No GPUs found. Training on CPU.")
    model.fit(train_dataset, epochs=1, validation_data=valid_dataset)

In [None]:
preds = model.predict(test_dataset)
preds = preds.reshape(-1)
submission = pd.DataFrame({'id':sample_submission['id'], 'target':preds})
submission.to_csv('submission.csv', index=False)
print("Successfully wrote submission.csv")