In [1]:
!pip install -q nnAudio -qq
import torch
from nnAudio.Spectrogram import CQT1992v2

In [2]:
!pip install -U efficientnet

Collecting efficientnet
  Downloading efficientnet-1.1.1-py3-none-any.whl (18 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 725 kB/s 
Installing collected packages: keras-applications, efficientnet
Successfully installed efficientnet-1.1.1 keras-applications-1.0.8


In [3]:
import efficientnet.keras as efn

In [4]:
import os
import json
import random
import collections
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import math
from random import shuffle

import keras
import tensorflow as tf
from keras.models import Sequential
from keras.utils import Sequence
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam


from tensorflow.keras import models, layers
from tensorflow.keras.layers import Dense, Dropout, Activation, Input, BatchNormalization, GlobalAveragePooling2D


from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn import model_selection as sk_model_selection

In [5]:
train = pd.read_csv('../input/g2net-gravitational-wave-detection/training_labels.csv')
datosValidacion = pd.read_csv('../input/g2net-gravitational-wave-detection/sample_submission.csv')
test = datosValidacion

display(train.head(3))
display(test.head(3))

Unnamed: 0,id,target
0,00000e74ad,1
1,00001f4945,0
2,0000661522,0


Unnamed: 0,id,target
0,00005bced6,0.5
1,0000806717,0.5
2,0000ef4fe1,0.5


In [6]:
def obtenerRutaDeImagenEntrenamiento(IDImagen):
    return "../input/g2net-gravitational-wave-detection/train/{}/{}/{}/{}.npy".format(
        IDImagen[0], IDImagen[1], IDImagen[2], IDImagen)

def obtenerRutaDeImagenDePrueba(IDImagen):
    return "../input/g2net-gravitational-wave-detection/test/{}/{}/{}/{}.npy".format(
        IDImagen[0], IDImagen[1], IDImagen[2], IDImagen)

train['file_path'] = train['id'].apply(obtenerRutaDeImagenEntrenamiento)
test['file_path'] = test['id'].apply(obtenerRutaDeImagenDePrueba)

display(train.head(3))
display(test.head(3))

Unnamed: 0,id,target,file_path
0,00000e74ad,1,../input/g2net-gravitational-wave-detection/tr...
1,00001f4945,0,../input/g2net-gravitational-wave-detection/tr...
2,0000661522,0,../input/g2net-gravitational-wave-detection/tr...


Unnamed: 0,id,target,file_path
0,00005bced6,0.5,../input/g2net-gravitational-wave-detection/te...
1,0000806717,0.5,../input/g2net-gravitational-wave-detection/te...
2,0000ef4fe1,0.5,../input/g2net-gravitational-wave-detection/te...


In [7]:
x_train, x_val = sk_model_selection.train_test_split(
    train, 
    test_size=0.05, 
    random_state=42
)
print(len(x_train))
print(len(x_val))

532000
28000


In [8]:
class Dataset(Sequence):
    def __init__(self,df,esEntrenamiento=True,tamanoLote=32,shuffle=True):
        self.id = df["id"].values
        self.ruta = df["file_path"].values
        self.y =  df["target"].values
        self.esEntrenamiento = esEntrenamiento
        self.tamanoLote = tamanoLote
        self.shuffle = shuffle
        self.transformadaDeOnda = CQT1992v2(sr=2048, fmin=20, fmax=1024, hop_length=64)

    def __len__(self):
        return math.ceil(len(self.id)/self.tamanoLote)
    
    def aplicarTransformadaQ(self,pathx,transform): 
        ondas = np.load(pathx)
        ondas = np.hstack(ondas)
        ondas = ondas / np.max(ondas)
        ondas = torch.from_numpy(ondas).float()
        imagen = transform(ondas)
        imagen = np.array(imagen)
        imagen = np.transpose(imagen,(1,2,0))
        return imagen 
    
    def __getitem__(self,ids):
        rutaDeLotes = self.ruta[ids * self.tamanoLote:(ids + 1) * self.tamanoLote]
        
        if self.y is not None:
            loteY = self.y[ids * self.tamanoLote: (ids + 1) * self.tamanoLote]
            
        listaX = np.array([self.aplicarTransformadaQ(x,self.transformadaDeOnda) for x in rutaDeLotes])
        loteX = np.stack(listaX)
        if self.esEntrenamiento:
            return loteX, loteY
        else:
            return loteX

In [9]:
DatasetDeEntrenamiento = Dataset(x_train) 
DatasetDeValidacion = Dataset(x_val) 

CQT kernels created, time used = 0.0437 seconds
CQT kernels created, time used = 0.0172 seconds




In [10]:
def crearModelo(): 
    inputs = layers.Input(shape=(69,193,1))
    capasEfficientnet = efn.EfficientNetB7(include_top=False,input_shape=(),weights='imagenet',pooling='avg')
    modelo = Sequential()
    
    modelo.add(inputs)
    modelo.add(keras.layers.Conv2D(3,3,activation='relu',padding='same'))
    modelo.add(capasEfficientnet)

    modelo.add(Dense(1, activation="sigmoid"))
    
    modelo.compile(optimizer = Adam(lr = 0.00005),
                loss = "binary_crossentropy",
                metrics = ["acc"])
    return modelo

modelo = crearModelo()
modelo.summary()

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b7_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 69, 193, 3)        30        
_________________________________________________________________
efficientnet-b7 (Functional) (None, 2560)              64097680  
_________________________________________________________________
dense (Dense)                (None, 1)                 2561      
Total params: 64,100,271
Trainable params: 63,789,551
Non-trainable params: 310,720
_________________________________________________________________


In [11]:
unaPrueba = Dataset(x_train[:1000])

CQT kernels created, time used = 0.0221 seconds


In [12]:

train_history = modelo.fit(
    unaPrueba,
    epochs = 1,
    validation_data = DatasetDeValidacion
)



In [13]:
DatasetAPredecir = Dataset(test,esEntrenamiento=False)

CQT kernels created, time used = 0.0208 seconds


In [14]:
predicciones = modelo.predict(DatasetAPredecir)
predicciones = predicciones.reshape(-1)

In [15]:
submission = pd.DataFrame({'id':datosValidacion['id'],'target':predicciones})


In [16]:
submission

Unnamed: 0,id,target
0,00005bced6,0.442705
1,0000806717,0.438100
2,0000ef4fe1,0.446000
3,00020de251,0.442880
4,00024887b5,0.448217
...,...,...
225995,ffff4125f1,0.439217
225996,ffff9d32a6,0.447396
225997,ffff9f4c1f,0.448584
225998,ffffa19693,0.446683


In [17]:
submission.to_csv('submission.csv',index=False)