Many ideas were taken from the following notebooks:<br>
https://www.kaggle.com/andradaolteanu/g2net-searching-the-sky-pytorch-effnet-w-meta<br>
https://www.kaggle.com/yasufuminakama/g2net-efficientnet-b7-baseline-training/output?select=oof_df.csv<br>
https://www.kaggle.com/yasufuminakama/g2net-efficientnet-b7-baseline-inference

In [None]:
!pip install -q nnAudio -qq
!pip install -q efficientnet_pytorch -qq
!pip install -q timm -qq

import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import gc
from tqdm.auto import tqdm
import pickle
import numpy as np
import pandas as pd
import time
import random
from random import shuffle
import math
from scipy import signal

from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
from matplotlib.gridspec import GridSpec
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

import cProfile, pstats
import re

In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# lr_scheduler provides methods to adjust the learning rate based on the number of epochs 
# https://pytorch.org/docs/stable/optim.html
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR, CosineAnnealingWarmRestarts

# automatic mixed precision training
from torch.cuda.amp import GradScaler #https://pytorch.org/docs/stable/amp.html#gradient-scaling
from torch.cuda.amp import autocast #https://pytorch.org/docs/stable/amp.html#autocasting

# albumentations to define transformation/augmentation for the train and validation datasets
import albumentations as A
from albumentations.pytorch import ToTensorV2

from efficientnet_pytorch import EfficientNet
from nnAudio.Spectrogram import CQT1992v2

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, train_test_split

import timm

In [None]:
class VAR:
    use_timm = True
    debug = False
    if not use_timm:
        model_name = 'efficientnet-b7'
    else:
        model_name = 'tf_efficientnet_b7_ns'
    epochs = 3
    down_sample = 10000
    stack_images = True
    target_col='target'
    n_fold = 5
    trn_folds = [0] # [0, 1, 2, 3, 4]
    batch_size = 64
    lr = 1e-4
    min_lr = 1e-6
    weight_decay = 0.000001
    apex = False
    num_workers = 4
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    print_freq = 250
    seed = 42
    scheduler = 'CosineAnnealingLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    factor = 0.2 # ReduceLROnPlateau
    patience = 4 # ReduceLROnPlateau
    eps = 1e-6 # ReduceLROnPlateau
    T_max = 3 # CosineAnnealingLR
    T_0 = 3 # CosineAnnealingWarmRestarts

In [None]:
def set_seed(seed=19):
    '''Sets the seed of the entire notebook so results are the same every time we run.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device available now:', device)

In [None]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [None]:
def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
def get_score(y_true, y_pred):
    score = roc_auc_score(y_true, y_pred)
    return score

In [None]:
def get_result(result_df: pd.DataFrame):
    preds = result_df['preds'].values
    labels = result_df[VAR.target_col].values
    score = get_score(labels, preds)
    print(f'Score: {score:<.4f}')

In [None]:
def idx2path(idx: str, is_train: bool = True) -> str:
    if is_train:
        parent = '/kaggle/input/g2net-gravitational-wave-detection/train/'
    else:
        parent = '/kaggle/input/g2net-gravitational-wave-detection/test/'
    return os.path.join(parent, idx[0], idx[1], idx[2], idx + '.npy')

In [None]:
def print_training_info(epoch, step, train_loader, data_time, losses, start, grad_norm):
    print('Epoch: [{0}][{1}/{2}] '
          'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
          'Elapsed {remain:s} '
          'Loss: {loss.val:.4f}({loss.avg:.4f}) '
          'Grad: {grad_norm:.4f}  '
          .format(epoch+1, step, len(train_loader), data_time=data_time, loss=losses,
                  remain=timeSince(start, float(step+1)/len(train_loader)), grad_norm=grad_norm))

In [None]:
def print_validation_info(step, valid_loader, data_time, losses, start):
    print('EVAL: [{0}/{1}] '
          'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
          'Elapsed {remain:s} '
          'Loss: {loss.val:.4f}({loss.avg:.4f}) '
          .format(step, len(valid_loader), data_time=data_time, loss=losses,
                  remain=timeSince(start, float(step+1)/len(valid_loader))))

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
# ====================================================
# Transforms or Augmentation using Albumentations
# https://albumentations.ai/docs/examples/migrating_from_torchvision_to_albumentations/
# ====================================================
def get_transforms(*, data: str):
    if data == 'train':
        return A.Compose([ToTensorV2(),])

    elif data == 'valid':
        return A.Compose([ToTensorV2(),])

In [None]:
def filterSig(waves, aHP, bHP):
    '''Apply a 20Hz high pass filter to the three events'''
    return np.array([signal.filtfilt(bHP, aHP, wave) for wave in waves]) #lfilter introduces a larger spike around 20hz

In [None]:
class Dataset(Dataset):
    def __init__(self, df, transform = None, prints = False, stack_images = True, test = False):
        self.path = df['path'].values
        self.target = df['target'].values
        self.wave_transform = CQT1992v2(sr=2048, fmin=20, fmax=1024, hop_length=32, bins_per_octave=8)
        self.transform = transform
        self.test = test
        self.prints = prints
        self.stack_images = stack_images
        #self.bHP, self.aHP = signal.butter(8, (20, 500), btype='bandpass', fs=2048)

    def __len__(self):
        return len(self.path)
    
    def __transform__(self, waves, wave_transform):
        
        '''Transforms the np_file into spectrogram.'''
        #waves = filterSig(waves, aHP=self.aHP, bHP=self.bHP)
        
        if self.stack_images:
            waves = np.hstack(waves)
            waves = waves / np.max(waves)
            waves = torch.from_numpy(waves).float()
            image = wave_transform(waves)  
        else:
            image = []
            for i in range(3):
                wave = waves[i] / np.max(waves[i])
                wave = torch.from_numpy(wave).float()
                channel = wave_transform(wave).squeeze().numpy()
                if self.transform:
                    channel = self.transform(image=channel)['image'].squeeze().numpy()
                image.append(channel)
            image = torch.tensor(image).float() # Convert numpy array into torch object  
        return image
    
    def __getitem__(self, idx):   
        waves = np.load(self.path[idx])
        image = self.__transform__(waves, self.wave_transform)
        if self.stack_images and self.transform:
            image = image.squeeze().numpy()
            image = self.transform(image=image)['image']
        if not self.test:
            y = torch.tensor(self.target[idx], dtype=torch.float)
            return image, y
        else:
            return image

In [None]:
class EffNet(nn.Module):
    def __init__(self, var):
        super().__init__()
        self.var = var
        self.channel = 1 if var.stack_images else 3
        self.efficient_net = EfficientNet.from_pretrained(VAR.model_name, in_channels=self.channel) # output shape 1000 by default
        self.classification = nn.Sequential(nn.Linear(1000, 1))
        
    def forward(self, x, prints=False):
        x = self.efficient_net(x)
        out = self.classification(x)
        return out
    

class EffNetTimm(nn.Module):
    def __init__(self, var, pretrained=False):
        super().__init__()
        self.var = var
        self.channel = 1 if var.stack_images else 3
        self.model = timm.create_model(self.var.model_name, pretrained=pretrained, in_chans=self.channel)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(self.n_features, 1)

    def forward(self, x):
        out = self.model(x)
        return out

In [None]:
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    '''
    Function is called VAR.epochs number of times for a given (training folds)-(validation fold) combination.
    '''
    # store average over batches
    #batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    
    model.train() # switch to train mode
    
    start = end = time.time()
    scaler = GradScaler() # for automatic mixed precision training
    
    # iterate through batches for a given fold
    for step, (images, labels) in enumerate(train_loader): # train_loader loads data by batches
        
        data_time.update(time.time() - end) # measure batch data loading time
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        
        if VAR.apex:
            with autocast(): # for automatic mixed precision training
                y_preds = model(images)
                loss = criterion(y_preds.view(-1), labels)
        else:
            y_preds = model(images)
            loss = criterion(y_preds.view(-1), labels)
            
        losses.update(loss.item(), batch_size) # record batch loss
        
        # accumulate loss over a given number of batches and then average
        if VAR.gradient_accumulation_steps > 1:
            loss = loss / VAR.gradient_accumulation_steps
        
        # backward() computes the gradient of current tensor w.r.t. graph leaves
        # gradient (loss) gets accumulated until step() and zero_grad() are called! 
        if VAR.apex:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        
        # clips gradient norm of an iterable of parameters
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), VAR.max_grad_norm)
        
        # step() updates the parameters and zero_grad() sets the gradients to zero
        if (step + 1) % VAR.gradient_accumulation_steps == 0:
            if VAR.apex:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
                
            for param in model.parameters():
                param.grad = None
        
        #batch_time.update(time.time() - end) # measure elapsed time
        end = time.time()
        
        if step % VAR.print_freq == 0 or step == (len(train_loader)-1):
            print_training_info(epoch, step, train_loader, data_time, losses, start, grad_norm)
            
    return losses.avg

In [None]:
def valid_fn(valid_loader, model, criterion, device):
    '''
    Function is called VAR.epochs number of times for a given (training folds)-(validation fold) combination.
    '''
    #batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    
    model.eval() # switch to evaluation mode
    preds = []
    start = time.time()
    
    for step, (images, labels) in enumerate(valid_loader):
        data_time.update(time.time() - start) # measure data loading time
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        
        # compute loss
        with torch.no_grad(): # disable gradient calculation
            y_preds = model(images)
        loss = criterion(y_preds.view(-1), labels) #view(-1)?
        losses.update(loss.item(), batch_size) # record loss
        preds.append(y_preds.sigmoid().to('cpu').numpy()) # record accuracy
        
        if VAR.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        
        #batch_time.update(time.time() - end) # measure elapsed time
        end = time.time()
        
        if step % VAR.print_freq == 0 or step == (len(valid_loader)-1):
            print_validation_info(step, valid_loader, data_time, losses, start)
            
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [None]:
# ====================================================
# Train loop
# ====================================================
def train_loop(model, folds: pd.DataFrame, fold: int) -> pd.DataFrame: #train_loop(train,0)
    '''
    Function is called once per a given (training folds)-(validation fold) combination: e.g. ([1,2,3,4])-([0]).
    '''
    print(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[VAR.target_col].values

    train_dataset = Dataset(train_folds, transform=get_transforms(data='train'), 
                            stack_images=VAR.stack_images, test=False)
    valid_dataset = Dataset(valid_folds, transform=get_transforms(data='train'), 
                            stack_images=VAR.stack_images, test=False)
    
    train_loader = DataLoader(train_dataset, batch_size=VAR.batch_size, shuffle=True, 
                              num_workers=VAR.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=VAR.batch_size * 2, shuffle=False, 
                              num_workers=VAR.num_workers, pin_memory=True, drop_last=False)
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if VAR.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=VAR.factor, patience=VAR.patience, verbose=True, eps=VAR.eps)
        elif VAR.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=VAR.T_max, eta_min=VAR.min_lr, last_epoch=-1)
        elif VAR.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=VAR.T_0, T_mult=1, eta_min=VAR.min_lr, last_epoch=-1)
        return scheduler

    # ====================================================
    # optimizer / scheduler / criterion
    # ====================================================
    optimizer = Adam(model.parameters(), lr=VAR.lr, weight_decay=VAR.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)
    criterion = nn.BCEWithLogitsLoss()
    # ====================================================
    # loop
    # ====================================================
    
    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(VAR.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
        
        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        # dynamic learning rate reduction based on some validation measurements
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        print(f'Epoch {epoch+1} - Score: {score:.4f}')

        if score > best_score:
            best_score = score
            print(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 'preds': preds}, 
                       f'{VAR.model_name}_fold{fold}_best_score.pth')
            
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            print(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save({'model': model.state_dict(), 'preds': preds},
                        f'{VAR.model_name}_fold{fold}_best_loss.pth')
    
    valid_folds['preds'] = torch.load(f'{VAR.model_name}_fold{fold}_best_score.pth',
                                      map_location=torch.device('cpu'))['preds']

    return valid_folds

In [None]:
# ====================================================
# inference
# ====================================================
def inference(model, test, states, device):
    
    test_dataset = Dataset(test, transform=get_transforms(data='valid'), stack_images=VAR.stack_images, 
                           test=True)
    test_loader = DataLoader(test_dataset, batch_size=VAR.batch_size, shuffle=False, 
                             num_workers=VAR.num_workers, pin_memory=True)
    
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        for state in states:
            model.load_state_dict(state['model'])
            model.eval()
            with torch.no_grad():
                y_preds = model(images)
            avg_preds.append(y_preds.sigmoid().to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

In [None]:
# ====================================================
# main
# ====================================================
def main():
    
    # ====================================================
    # train
    # ====================================================
    train = pd.read_csv('/kaggle/input/g2net-gravitational-wave-detection/training_labels.csv')
    train['path'] = train.apply(lambda x: idx2path(x['id']), axis=1)
    
    if VAR.debug:
        VAR.epochs = 1
        train = train.sample(n=VAR.down_sample, random_state=VAR.seed).reset_index(drop=True)
    
    Fold = StratifiedKFold(n_splits=VAR.n_fold, shuffle=True, random_state=VAR.seed)
    
    # Fold.split() produces 5 splits -> [1,2,3,4][5], [1,2,3,5][4], [1,2,4,5][3], [1,3,4,5][2], [2,3,4,5][1]
    # Following loop assigns fold ID [1,2,3,4,5] to each sample
    for n, (train_index, val_index) in enumerate(Fold.split(train, train[VAR.target_col])):
        train.loc[val_index, 'fold'] = int(n)
    train['fold'] = train['fold'].astype(int) # train (n_fold=5, trn_folds=[0])

    # Model Instantiation
    if not VAR.use_timm:
        model = EffNet(VAR).to(device)
    else:
        model = EffNetTimm(VAR, pretrained=True).to(device)
    
    oof_df = pd.DataFrame() # will store 
    for fold in range(VAR.n_fold):
        if fold in VAR.trn_folds:
            _oof_df = train_loop(model, train, fold) # use the fold as the validation set
            oof_df = pd.concat([oof_df, _oof_df]) # accumulate results over different val folds
            print(f"========== fold: {fold} result ==========")
            get_result(_oof_df) # get result of the val fold

    # CV result
    print(f"========== CV ==========")
    get_result(oof_df) # get result of all the accumulated val folds
    # save result
    oof_df.to_csv('oof_df.csv', index=False)
    
    # ====================================================
    # infer
    # ====================================================
    test = pd.read_csv('/kaggle/input/g2net-gravitational-wave-detection/sample_submission.csv')
    test['path'] = test.apply(lambda x: idx2path(x['id'], is_train=False), axis=1)
    if VAR.debug:
        test = test.sample(n=VAR.down_sample, random_state=VAR.seed).reset_index(drop=True)
    
    states = [torch.load(f'{VAR.model_name}_fold{fold}_best_score.pth') for fold in VAR.trn_folds]        
    predictions = inference(model, test, states, device)
    test['target'] = predictions
    test[['id', 'target']].to_csv('submission.csv', index=False)
    
    print(test.head())
    print(test['target'].hist())

In [None]:
if __name__ == '__main__':
    main()

<a href="./submission.csv"> Download File </a>