# Import packages

Import all required packages.

In [1]:
import os
import gc
import sys
import cv2
import math
import numpy as np
import pandas as pd
from glob import glob
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
import librosa
from scipy import signal as sci_signal
import json

import torch
from torch import nn
from torchvision.models import efficientnet

#import tensorflow as tf

import albumentations as albu

import pytorch_lightning as pl
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from pytorch_lightning.callbacks import ModelCheckpoint, TQDMProgressBar

# import score function of BirdCLEF
#sys.path.append('/kaggle/input/birdclef-roc-auc')
#sys.path.append('/kaggle/usr/lib/kaggle_metric_utilities')
#from metric import score

In [2]:
# Import for visualization
import matplotlib as mpl
cmap = mpl.cm.get_cmap('coolwarm')
import matplotlib.pyplot as plt
import librosa.display as lid
import IPython.display as ipd
#import cv2

  cmap = mpl.cm.get_cmap('coolwarm')


# Configuration

Hyper-paramters

In [3]:
class config:
    
    # == global config ==
    SEED = 28082015  # random seed
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' # device to be used
    MIXED_PRECISION = False  # whether to use mixed-16 precision
    OUTPUT_DIR = './output/'  # output folder
    
    # == data config ==
    DATA_ROOT = 'E:/PycharmProjects/birdclef24/data'  # root folder
    PREPROCESSED_DATA_ROOT = '/kaggle/input/birdclef24-spectrograms-via-cupy'
    LOAD_DATA = True  # whether to load data from pre-processed dataset

    
    # == model config ==
    MODEL_TYPE = 'efficientnet_b0'  # model type
    
    # == dataset config ==
    BATCH_SIZE = 256  # batch size of each step
    N_WORKERS = 6  # number of workers
    
    
    # == training config ==
    FOLDS = 7  # n fold
    EPOCHS = 200  # max epochs
    LR = 7e-4  # learning rate
    WEIGHT_DECAY = 9e-6  # weight decay of optimizer
    
    # == other config ==
    VISUALIZE = True  # whether to visualize data and batch
    
    
print('fix seed')
pl.seed_everything(config.SEED, workers=True)

CFG = config

TARGET_WEIGHTS = [30981.265271661872, 22502.432413914863, 18894.14713004499, 14514.244730542465, 10944.348069459196, 9065.01072024503, 9663.669038687454, 12688.557362943708, 19890.17226527665, 25831.37317235381, 33890.367561807274, 44122.94111025334, 59811.25595068309, 79434.07500078829, 107358.80916894016, 135720.8418348218, 149399.8411114814, 128492.95185325432, 91746.23687305572, 72748.76911097553, 66531.53596840335, 62932.30598423903, 56610.26874314136, 49473.14369220607, 43029.18495420936, 36912.67491908133, 31486.93117928144, 26898.072997215502, 23316.638282978325, 20459.73133196152, 18385.68309639014, 17111.405107656312, 16337.80991958771, 15857.759882318944, 15580.902485189716, 15497.59045982052, 15612.2556996736, 15797.88455410361, 15974.218740897895, 16130.395527176632, 16261.310866446129, 16371.892401608216, 16397.019695140876, 16325.463899570548, 16228.641108112768, 16191.809643436269, 16341.207925934068, 16645.711351490587, 17005.493716683693, 17430.29874509864, 17907.24023203076, 18431.55334008694, 19032.471309392287, 19701.355113141435, 20408.236605392685, 20967.20795006453, 21194.427318009974, 21088.521528526755, 19437.91555757985, 13677.902713248171, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 871528441401.8333, 1083221770553.0684, 147034752676.7702, 35556045575.13566, 35153369257.41337, 46086368691.51654, 24689305171.692936, 11343276593.440475, 5396624651.94418, 2449353007.641508, 1132225885.703891, 579547849.1340877, 330219246.7861086, 207613930.3131764, 144580292.27473342, 109933282.92266414, 88706603.092171, 73819777.54163922, 63615988.74519494, 57250262.292053565, 52976073.06761927, 49653169.17819005, 46544975.11484598, 43167606.9599748, 39724375.20499403, 36317177.25886468, 33057511.80930482, 29869089.497658804, 26982386.85583376, 24416235.17215712, 22273651.697369896, 20553426.04804544, 19216240.03357431, 18167694.44812838, 17501855.536957663, 17169938.630597908, 17005382.258644175, 16998475.26752617, 17082890.987979066, 17227982.77516062, 17445823.21630204, 17757404.421785507, 18346092.75160569, 19400573.66632694, 20506722.48296608, 22469648.380506545, 23432031.455169585, 26204163.40545158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 1000000000000000.0, 3673829810926.31, 371405570725.2526, 14219163611.984406, 3001863018.1934915, 1432766589.9326108, 884599805.0283787, 560127980.1033351, 386052567.7087711, 287331851.051439, 222703657.59538063, 181069239.6264349, 154620864.3164144, 138093777.60284117, 126605828.89875436, 117967840.02553518, 111005814.39518328, 105186901.20678852, 100168133.0295481, 95568646.67416307, 91457433.39515457, 88871610.45308323, 88829796.26374224, 91398113.73291488, 96585131.67000748, 104507692.01463065, 115895119.998433, 131939701.08213414, 154492946.00677127, 183147918.17086875, 215151374.22324687, 247158314.6345976, 266792879.42215955, 279115128.29108113, 370541510.87006927, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 877670509694.7871, 1174826943136.8308, 1270605570069.038, 21727315470.5208, 3159456646.5437946, 1090653401.282219, 727967089.8459107, 384399548.9506704, 290787296.9451616, 232703218.45048887, 197467462.7577736, 174310890.8025987, 160536437.73297343, 153567098.77048483, 152120124.9453068, 153115566.6756177, 153955545.42558223, 153734675.21565756, 154798666.36905554, 163346213.58113608, 180013139.3707387, 200324358.8534948, 220754613.1646765, 241290935.478592, 262868932.2066308, 284448910.01847774, 305681084.4142859, 327605088.8575117, 350473296.7263526, 373964594.1196182, 398396925.8173239, 423528355.65716046, 450447055.544388, 478857006.4973163, 508200335.7126168, 537309657.5789208, 566854568.2904652, 594618842.9455439, 619715928.2391286, 641395460.8414665, 663290039.7810476, 689274894.631561, 718208866.3397261, 743951200.8024124, 761776104.2945968, 772911224.3082078, 804001144.8046833, 772448774.7758856, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4613823.568205323, 1999308.9343799097, 904636.2296014762, 433823.6123842511, 207201.39055371704, 107836.09164720173, 57647.915219220784, 40606.52305039815, 47739.86647922776, 51669.35493930698, 56438.19768395407, 60447.45665200092, 65251.4153955275, 71920.88588011517, 78529.58115204438, 83422.30217897324, 87036.98552475807, 90389.72631774022, 93982.39165674087, 97578.0099352472, 101428.21366062944, 104630.69200130588, 105685.04322626138, 103962.58423268417, 99650.31670632094, 94290.49986206587, 89514.90144353417, 85905.45713126978, 82784.9857650212, 79152.28707014346, 74847.81017353121, 70378.81859610273, 65420.04643792357, 59953.75184604176, 54764.28281143022, 50362.51288353384, 46212.571031725325, 41997.52779088816, 37692.05148110484, 33834.73460995647, 31846.09764364542, 31934.145655397457, 31454.81247448105, 30105.4073072481, 26957.830283611693, 27760.04479210889, 29853.374336459365, 19133.428743715107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7619940.584531054, 3148394.472742347, 1308415.0022178134, 540515.7720745018, 215237.1053603881, 102546.7276372816, 68453.67122640925, 50692.59053608593, 51487.52043139844, 52104.76838400132, 54019.39151917722, 55856.02168787862, 60347.30240270209, 68990.96019017675, 79096.88768563846, 87574.33453690328, 94158.56052476274, 101903.63670531697, 111746.9753834774, 122460.65399236557, 132086.69387474353, 141041.48571028374, 146354.09441287292, 145953.09590059065, 139496.8007888401, 128508.85108217449, 116665.51769667884, 107458.39706309135, 100259.97236694951, 94108.98505029618, 88439.89456238014, 82734.9027659809, 77061.08621371102, 71333.5319243128, 65999.72532130677, 61798.9972058361, 58237.356419617165, 54715.10266341248, 50825.84431702935, 46059.17688689915, 40740.26050401376, 36335.80228304863, 33981.57568605091, 33589.7143390849, 33988.88524112733, 36272.9364507092, 41183.34413717943, 29194.12369278645, 0.0040536134869726, 0.0138824238058072, 135129884.5084534, 12219717.5342461, 0.0090705273332672, 0.0085898851680217, 0.0215368188774867, 0.0336321308942602]
TARGET_WEIGHTS = np.array(TARGET_WEIGHTS)

fix seed


Seed set to 28082015


In [4]:
class ECA(nn.Module):
    def __init__(self, kernel_size=5):
        super().__init__()
        self.kernel_size = kernel_size
        self.supports_masking = True
        self.conv = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=kernel_size, stride=1, padding="same", bias=False)
    def forward(self, inputs):
        b, c, s = inputs.shape
        
        x = torch.mean(inputs, axis = -1)
        x = x.view(b, 1, c)
        x = self.conv(x)
        x = x.squeeze(1)
        x = nn.Sigmoid()(x)
        x = x[:,:,None]
        return inputs * x


class CausalDWConv1D(nn.Module):
    def __init__(self, 
        kernel_size=17,
        dilation_rate=1,
        use_bias=False,
        in_channels = 64,
        out_channels = 32,       
        depthwise_initializer='glorot_uniform',
        **kwargs):
        super().__init__()
        #self.causal_pad = tf.keras.layers.ZeroPadding1D((dilation_rate*(kernel_size-1),0),name=name + '_pad')
        self.dw_conv = nn.Conv1d(
            in_channels, 
            out_channels, 
            kernel_size, 
            stride=1, 
            padding='same', 
            dilation=dilation_rate, 
            groups=out_channels if kernel_size > 3 else 1,
            bias=False, 
            padding_mode='zeros')

        
    def forward(self, inputs):
        x = self.dw_conv(inputs)
        return x


class Conv1DBlock(nn.Module):
    def __init__(self, 
                 kernel_size=17,
                 channels = 32,
                 expand_channels = 64,
                 drop_rate=0.0,
                ):
        super().__init__()
        self.kernel_size = kernel_size
        self.conv = CausalDWConv1D(
                        kernel_size=kernel_size,
                        dilation_rate=1,
                        use_bias=False,
                        in_channels = expand_channels,
                        out_channels = expand_channels
                    )
        self.dnn_expand = nn.Linear(in_features = channels, 
                                    out_features = expand_channels
                                     )
        self.dnn_project = nn.Linear(in_features = expand_channels, 
                             out_features = channels
                                    )
        self.bn = nn.BatchNorm1d(num_features = expand_channels, eps=0.95)
        self.eca = ECA()
        self.dropout = nn.Dropout(drop_rate)
        self.act = nn.SiLU()

    def forward(self, inputs):
        skip = inputs

        x = inputs.permute([0,2,1])
        x = self.dnn_expand(x)
        
        x = x.permute([0,2,1])
        x = self.act(x)
        x = self.conv(x)
        x = self.bn(x)
        x = self.eca(x)
        
        x = x.permute([0,2,1])
        x = self.dnn_project(x)
        x = x.permute([0,2,1])

        return x + skip


class Conv1DModel(nn.Module):
    def __init__(self, 
                 kernel_size=17,
                 channels = 32,
                 expand_channels = 64,
                 drop_rate=0.0,
                 num_blocks_in_stage = 3,
                 input_len = 32_000*5,
                 n_classes = 182
                ):
        super().__init__()
        self.stem_conv = nn.Linear(in_features = 1, 
                                    out_features = channels
                                     )
        self.stem_bn = nn.BatchNorm1d(num_features = channels, eps=0.95)

        self.ConvStage_1 = nn.ModuleList([
            Conv1DBlock(kernel_size=kernel_size, channels = channels,expand_channels = expand_channels, drop_rate=drop_rate)
                                         for _ in range(num_blocks_in_stage)])
        self.PoolStage_1 = nn.AvgPool1d(kernel_size=(4))
        
        self.ConvStage_2 = nn.ModuleList([
            Conv1DBlock(kernel_size=kernel_size, channels = channels,expand_channels = expand_channels, drop_rate=drop_rate)
                                          for _ in range(num_blocks_in_stage)])
        self.PoolStage_2 = nn.AvgPool1d(kernel_size=(4))

        
        self.ConvStage_3 = nn.ModuleList([
            Conv1DBlock(kernel_size=kernel_size, channels = channels,expand_channels = expand_channels, drop_rate=drop_rate)
                                          for _ in range(num_blocks_in_stage)])
        self.PoolStage_3 = nn.AvgPool1d(kernel_size=(4))

        self.pre_out = nn.Linear(in_features = channels, out_features = n_classes*2)
        self.dropout = nn.Dropout(drop_rate)
        self.out_act = nn.SiLU()
        self.out = nn.Linear(in_features = n_classes*2, out_features = n_classes)
        self.sigmoid = nn.Sigmoid()

        
    def forward(self, inputs):
        
        b, s = inputs.shape
        x = inputs.view(b, s, 1)
        x = self.stem_conv(x)
        x = x.permute([0,2,1])
        x = self.stem_bn(x)

        for block in self.ConvStage_1:
            x = block(x)
        x = self.PoolStage_1(x)

        for block in self.ConvStage_2:
            x = block(x)
        x = self.PoolStage_2(x)

        for block in self.ConvStage_3:
            x = block(x)
        x = self.PoolStage_3(x)

        x = x.mean(axis=2)

        x = self.pre_out(x)
        x = self.dropout(x)
        x = self.out_act(x)
        
        logits = self.out(x)
        probs = self.sigmoid(logits)

        return {
                "clipwise_logits_long": logits,
                "clipwise_pred_long": probs,
            }


        

In [5]:

class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """

    def __init__(self, n_embd, dropout):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)
        

class Block(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd, n_head, dropout):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        head_size = n_embd // n_head
        self.sa = nn.MultiheadAttention(n_embd, n_head)
        self.ffwd = FeedFoward(n_embd, dropout)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x, q = None):
        if q is not None:
            X = (q, x, x)
        else:
            X = (x, x, x)
        y = self.sa(*X)
        y = y[0]
        
        x = self.ln1(x + y)
        y = self.ffwd(x)
        x = self.ln2(x + y)
        return x


In [6]:
class ConvTransBlock(nn.Module):
    def __init__(self, block_kernels = [5, 3], n_head = 4, channels=16, expand_channels=32, drop_rate = 0.1, att_drop_rate = 0.25, n_features=25):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        self.conv = nn.Sequential(*[
            Conv1DBlock(kernel_size=k, channels = channels,expand_channels = expand_channels, drop_rate=drop_rate)
            for k in block_kernels
        ])

        self.block = Block(n_embd = channels, n_head=n_head, dropout = att_drop_rate)

    def forward(self, x):
        x = self.conv(x)
        x = x.permute([0,2,1])
        x = self.block(x)
        x = x.permute([0,2,1])
        return x

In [7]:
class FeatureExctractor(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, kernel_size = 7, channels=16, expand_channels=32, drop_rate = 0.1, n_features=25):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        self.Scales60 = nn.ModuleList([nn.Conv1d(in_channels = 1,
                                                out_channels = channels,
                                                kernel_size = 1,
                                                stride=1, 
                                                padding='same') for _ in range(9)])
        
        self.ScalesFlat = nn.ModuleList([nn.Conv1d(in_channels = 1,
                                                out_channels = channels//2,
                                                kernel_size = 1,
                                                stride=1, 
                                                padding='same') for _ in range(16)])
        
        self.ConvExt60 = nn.ModuleList([
            Conv1DBlock(kernel_size=kernel_size, channels = channels,expand_channels = expand_channels, drop_rate=drop_rate)
                                          for _ in range(9)])
        self.ConvExtFlat = nn.ModuleList([
            Conv1DBlock(kernel_size=kernel_size, channels = channels//2,expand_channels = expand_channels//2, drop_rate=drop_rate)
                                          for _ in range(16)])

    def forward(self, x):
        x = x.view(-1, 556)
        
        state_t = x[:, 0:60] - 273
        state_q0001 = x[:, 60:120] *1_000
        state_q0002 = x[:, 120:180] *1_000
        state_q0003 = x[:, 180:240] *1_000
        state_u = x[:, 240:300] / 100
        state_v = x[:, 300:360] / 100
    
        state_ps = x[:, 360:361]/ 100_000 - 1
        pbuf_SOLIN = x[:, 361:362] / 1000
        pbuf_LHFLX = x[:, 362:363] / 1000
        pbuf_SHFLX = x[:, 363:364] / 1000
        pbuf_TAUX = x[:, 364:365] / 1
        pbuf_TAUY = x[:, 365:366] / 1
        pbuf_COSZRS = x[:, 366:367] / 1
        cam_in_ALDIF = x[:, 367:368] / 1
        cam_in_ALDIR = x[:, 368:369] / 1
        cam_in_ASDIF = x[:, 369:370] / 1
        cam_in_ASDIR = x[:, 370:371] / 1
        cam_in_LWUP = x[:, 371:372] / 1000
        cam_in_ICEFRAC = x[:, 372:373] / 1
        cam_in_LANDFRAC = x[:, 373:374] /1
        cam_in_OCNFRAC = x[:, 374:375]  /1
        cam_in_SNOWHLAND = x[:, 375:376] / 1
    
        pbuf_ozone = x[:, 376:436] * 100_000
        pbuf_CH4 = x[:, 436:496] * 100_000
        pbuf_N2O = x[:, 496:556] * 100_000
            
        inputs_60 = [
                state_t,
                state_q0001,
                state_q0002,
                state_q0003, 
                state_u,
                state_v,
    
                pbuf_ozone,
                pbuf_CH4,
                pbuf_N2O
        ]

        inputs_flat = [            
                torch.repeat_interleave(state_ps, 60, dim=-1),
                torch.repeat_interleave(pbuf_SOLIN, 60, dim=-1),
                torch.repeat_interleave(pbuf_LHFLX, 60, dim=-1),
                torch.repeat_interleave(pbuf_SHFLX, 60, dim=-1),
                torch.repeat_interleave(pbuf_TAUX, 60, dim=-1),
               torch.repeat_interleave(pbuf_TAUY, 60, dim=-1),
                torch.repeat_interleave(pbuf_COSZRS, 60, dim=-1),
                torch.repeat_interleave(cam_in_ALDIF, 60, dim=-1),
                torch.repeat_interleave(cam_in_ALDIR, 60, dim=-1),
               torch.repeat_interleave(cam_in_ASDIF, 60, dim=-1),
                torch.repeat_interleave(cam_in_ASDIR, 60, dim=-1),
                torch.repeat_interleave(cam_in_LWUP, 60, dim=-1),
                torch.repeat_interleave(cam_in_ICEFRAC, 60, dim=-1),
                torch.repeat_interleave(cam_in_LANDFRAC, 60, dim=-1),
                torch.repeat_interleave(cam_in_OCNFRAC, 60, dim=-1),
                torch.repeat_interleave(cam_in_SNOWHLAND, 60, dim=-1),
        ]
        
        output = []
        for i, conv in enumerate(self.ConvExt60):
            t = inputs_60[i]
            t = t.view(-1, 1, 60)
            t = self.Scales60[i](t)
            output.append(conv(t))
            
        for i, conv in enumerate(self.ConvExtFlat):
            t = inputs_flat[i]
            t = t.view(-1, 1, 60)
            t = self.ScalesFlat[i](t)
            output.append(conv(t))


        return torch.cat(output, 1)#.permute([0,2,1])


In [8]:
class FeatureExctractor(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, kernel_size = 7, channels=16, expand_channels=32, drop_rate = 0.1, n_features=25):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        self.Scales60 = nn.ModuleList([nn.Conv1d(in_channels = 1,
                                                out_channels = channels,
                                                kernel_size = 1,
                                                stride=1, 
                                                padding='same') for _ in range(9)])
        
        self.ScalesFlat = nn.ModuleList([nn.Conv1d(in_channels = 1,
                                                out_channels = channels//2,
                                                kernel_size = 1,
                                                stride=1, 
                                                padding='same') for _ in range(16)])
        
        self.ConvExt60 = nn.ModuleList([
            Conv1DBlock(kernel_size=kernel_size, channels = channels,expand_channels = expand_channels, drop_rate=drop_rate)
                                          for _ in range(9)])
        self.ConvExtFlat = nn.ModuleList([
            Conv1DBlock(kernel_size=kernel_size, channels = channels//2,expand_channels = expand_channels//2, drop_rate=drop_rate)
                                          for _ in range(16)])
        
        self.emb  = nn.Embedding(385, int(channels//2 * 16 + channels*9) , max_norm=True)

    def forward(self, x):
        x = x.view(-1, 557)
        
        state_t = x[:, 0:60] - 273
        state_q0001 = x[:, 60:120] *1_000
        state_q0002 = x[:, 120:180] *1_000
        state_q0003 = x[:, 180:240] *1_000
        state_u = x[:, 240:300] / 100
        state_v = x[:, 300:360] / 100
    
        state_ps = x[:, 360:361]/ 100_000 - 1
        pbuf_SOLIN = x[:, 361:362] / 1000
        pbuf_LHFLX = x[:, 362:363] / 1000
        pbuf_SHFLX = x[:, 363:364] / 1000
        pbuf_TAUX = x[:, 364:365] / 1
        pbuf_TAUY = x[:, 365:366] / 1
        pbuf_COSZRS = x[:, 366:367] / 1
        cam_in_ALDIF = x[:, 367:368] / 1
        cam_in_ALDIR = x[:, 368:369] / 1
        cam_in_ASDIF = x[:, 369:370] / 1
        cam_in_ASDIR = x[:, 370:371] / 1
        cam_in_LWUP = x[:, 371:372] / 1000
        cam_in_ICEFRAC = x[:, 372:373] / 1
        cam_in_LANDFRAC = x[:, 373:374] /1
        cam_in_OCNFRAC = x[:, 374:375]  /1
        cam_in_SNOWHLAND = x[:, 375:376] / 1
    
        pbuf_ozone = x[:, 376:436] * 100_000
        pbuf_CH4 = x[:, 436:496] * 100_000
        pbuf_N2O = x[:, 496:556] * 100_000
            
        inputs_60 = [
                state_t,
                state_q0001,
                state_q0002,
                state_q0003, 
                state_u,
                state_v,
    
                pbuf_ozone,
                pbuf_CH4,
                pbuf_N2O
        ]

        inputs_flat = [            
                torch.repeat_interleave(state_ps, 60, dim=-1),
                torch.repeat_interleave(pbuf_SOLIN, 60, dim=-1),
                torch.repeat_interleave(pbuf_LHFLX, 60, dim=-1),
                torch.repeat_interleave(pbuf_SHFLX, 60, dim=-1),
                torch.repeat_interleave(pbuf_TAUX, 60, dim=-1),
               torch.repeat_interleave(pbuf_TAUY, 60, dim=-1),
                torch.repeat_interleave(pbuf_COSZRS, 60, dim=-1),
                torch.repeat_interleave(cam_in_ALDIF, 60, dim=-1),
                torch.repeat_interleave(cam_in_ALDIR, 60, dim=-1),
               torch.repeat_interleave(cam_in_ASDIF, 60, dim=-1),
                torch.repeat_interleave(cam_in_ASDIR, 60, dim=-1),
                torch.repeat_interleave(cam_in_LWUP, 60, dim=-1),
                torch.repeat_interleave(cam_in_ICEFRAC, 60, dim=-1),
                torch.repeat_interleave(cam_in_LANDFRAC, 60, dim=-1),
                torch.repeat_interleave(cam_in_OCNFRAC, 60, dim=-1),
                torch.repeat_interleave(cam_in_SNOWHLAND, 60, dim=-1),
        ]

        pos = x[:, 556:557]
        pos = torch.repeat_interleave(pos, 60, dim=1)       
        
        output = []
        for i, conv in enumerate(self.ConvExt60):
            t = inputs_60[i]
            t = t.view(-1, 1, 60)
            t = self.Scales60[i](t)
            output.append(conv(t))
            
        for i, conv in enumerate(self.ConvExtFlat):
            t = inputs_flat[i]
            t = t.view(-1, 1, 60)
            t = self.ScalesFlat[i](t)
            output.append(conv(t))

        x = torch.cat(output, 1)
        x_pos = self.emb(pos.long()).squeeze()

        
        return x + x_pos.permute([0,2,1])


In [9]:
FeatureExctractor()(torch.ones([8,557])).shape

torch.Size([8, 272, 60])

In [10]:
class LEADHead(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()

        self.act = nn.SELU()
        self.conv_seq = nn.Conv1d(in_channels = n_embd, out_channels = 6,
                                                kernel_size = 1,
                                                stride=1, 
                                                padding='same')
        
        self.conv_flat = nn.Conv1d(in_channels = n_embd, out_channels = 8,
                                                kernel_size = 1,
                                                stride=1, 
                                                padding='same')
        
    def forward(self, x):

        #x = x.permute([0,2,1])
        
        p_seq = self.conv_seq(x)
        p_seq = nn.Flatten()(p_seq)
    
        p_flat = self.conv_flat(x)
        p_flat = torch.mean(p_flat, axis = -1)
        
        return torch.cat([p_seq, p_flat], axis= -1)

In [11]:
LEADHead(32)(torch.ones([8,32,60]))

tensor([[-1.2465, -1.2465, -1.2465,  ..., -1.0379, -0.0395,  0.4833],
        [-1.2465, -1.2465, -1.2465,  ..., -1.0379, -0.0395,  0.4833],
        [-1.2465, -1.2465, -1.2465,  ..., -1.0379, -0.0395,  0.4833],
        ...,
        [-1.2465, -1.2465, -1.2465,  ..., -1.0379, -0.0395,  0.4833],
        [-1.2465, -1.2465, -1.2465,  ..., -1.0379, -0.0395,  0.4833],
        [-1.2465, -1.2465, -1.2465,  ..., -1.0379, -0.0395,  0.4833]],
       grad_fn=<CatBackward0>)

In [12]:
# batch_size = 16
# block_size = 256
# max_iters = 5000
# learning_rate = 3e-4
# eval_iters = 100
# n_embd = 384
# n_head = 8
# n_layer = 12
# dropout = 0.2

nn_config = dict(
    n_embd = 256,
    n_head = 4,
    fe_channels = 32, 
    encoder_layers = 3, 
    fe_drop_rate = 0.1,
    att_drop_rate = 0.2,
    n_features = 25,
    bottleneck_k_size = 3,
    block_kernels = [5, 3]
)

    
class LEADModelAtt(nn.Module):
    def __init__(self, n_embd = 64, n_head = 4, encoder_layers = 3, fe_channels=16, fe_drop_rate=0.1, 
                 att_drop_rate=0.2, n_features = 25, bottleneck_k_size = 3, block_kernels = [5, 3]):
        super().__init__()
        self.fe = FeatureExctractor(kernel_size = 7, channels=fe_channels, expand_channels=fe_channels*2, drop_rate = fe_drop_rate, n_features=n_features)
        self.linearStem = nn.Linear(fe_channels*9 + fe_channels//2 * 16, n_embd)
        self.bottleneck = Conv1DBlock(kernel_size=bottleneck_k_size, channels = n_embd, expand_channels = n_embd*2, drop_rate=fe_drop_rate)

        self.blocks = nn.Sequential(*[ConvTransBlock(block_kernels = block_kernels, 
                                                     channels = n_embd, 
                                                     expand_channels = n_embd*2, 
                                                     n_head=n_head, 
                                                     drop_rate = fe_drop_rate, 
                                                     att_drop_rate = att_drop_rate) for _ in range(encoder_layers)])
        
        self.head  = LEADHead(n_embd = n_embd)
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)



    def forward(self, inputs, targets=None):
        #B, T = inputs.shape

        xf = self.fe(inputs)
        xf = xf.permute([0,2,1])
        xf = self.linearStem(xf)
        xf = xf.permute([0,2,1])
        xf = self.bottleneck(xf)
        x = xf#.permute([0,2,1])
        
        x = self.blocks(x)

        out = self.head(x)
        return out

In [13]:
fe_channels = 32
fe_channels*9 + fe_channels//2 * 16

544

In [14]:
LEADModelAtt(**nn_config)(torch.ones([8, 557])).shape

torch.Size([8, 368])

In [15]:
import torchvision

class FocalLossBCE(torch.nn.Module):
    def __init__(
            self,
            alpha: float = 0.25,
            gamma: float = 2,
            reduction: str = "mean",
            bce_weight: float = 1.0,
            focal_weight: float = 1.0,
    ):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.bce = torch.nn.BCEWithLogitsLoss(reduction=reduction) #, pos_weight=sample_weights_420)
        self.bce_weight = bce_weight
        self.focal_weight = focal_weight

    def forward(self, logits, targets):
        focall_loss = torchvision.ops.focal_loss.sigmoid_focal_loss(
            inputs=logits,
            targets=targets,
            alpha=self.alpha,
            gamma=self.gamma,
            reduction=self.reduction,
        )
        bce_loss = self.bce(logits, targets)
        return self.bce_weight * bce_loss + self.focal_weight * focall_loss


criterion = FocalLossBCE(focal_weight=5, alpha = 0.3)

# DATASET

In [16]:
df = pd.read_parquet("train_data_sample.parquet").sample(200000)
df['pos'] = (df.index % 384).to_list()

df = df.drop('sample_id', axis=1).reset_index(drop=True)

In [17]:
y = df.iloc[:, 556:924].to_numpy() * TARGET_WEIGHTS

mean_y = y.mean(0)
std_y = df.iloc[:, 556:924].std().to_numpy()
std_y = np.clip(std_y, 1e-10, 1e3)

In [18]:
std_y = torch.maximum(torch.sqrt(torch.mean(torch.pow(torch.tensor(y), 2), 0)), torch.tensor(1e-10)).numpy()

In [19]:
class LEAD_Dataset(torch.utils.data.Dataset):
    def __init__(self, df, augmentation=False, mode='train'):
        if mode == 'train':
            self.df = df.reset_index(drop=True)
        elif mode == 'valid':
            self.df = df.reset_index(drop=True)
        else:
            self.df = df.reset_index(drop=True)
        self.mode = mode
        self.augmentation = augmentation
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        x = self.df.iloc[idx, :556].to_numpy() 
        y = self.df.iloc[idx, 556:924].to_numpy() * TARGET_WEIGHTS
        
        pos = self.df.iloc[idx, 924:].to_numpy() 
        
        y = (y - mean_y) / std_y
        
        return torch.cat([torch.tensor(x), torch.tensor(pos)], -1).float(), torch.tensor(y)

 

In [20]:
LEAD_Dataset(df).__getitem__(3)[0].view(1, -1)

tensor([[ 2.2394e+02,  2.3503e+02,  2.3989e+02,  2.4910e+02,  2.5971e+02,
          2.6623e+02,  2.6409e+02,  2.5529e+02,  2.4416e+02,  2.3618e+02,
          2.2987e+02,  2.2462e+02,  2.2019e+02,  2.1663e+02,  2.1381e+02,
          2.1161e+02,  2.0894e+02,  2.0685e+02,  2.0652e+02,  2.0863e+02,
          2.1057e+02,  2.1351e+02,  2.1750e+02,  2.2186e+02,  2.2602e+02,
          2.2998e+02,  2.3367e+02,  2.3709e+02,  2.4037e+02,  2.4352e+02,
          2.4655e+02,  2.4945e+02,  2.5247e+02,  2.5539e+02,  2.5818e+02,
          2.6080e+02,  2.6344e+02,  2.6589e+02,  2.6838e+02,  2.7051e+02,
          2.7258e+02,  2.7437e+02,  2.7624e+02,  2.7754e+02,  2.7865e+02,
          2.7968e+02,  2.8091e+02,  2.8205e+02,  2.8295e+02,  2.8380e+02,
          2.8344e+02,  2.8443e+02,  2.8518e+02,  2.8568e+02,  2.8602e+02,
          2.8613e+02,  2.8620e+02,  2.8607e+02,  2.8516e+02,  2.8601e+02,
          1.1173e-06,  1.0842e-06,  1.0669e-06,  1.0953e-06,  1.0951e-06,
          1.0896e-06,  1.0900e-06,  1.

In [21]:
LEADModelAtt(**nn_config)(torch.cat([LEAD_Dataset(df).__getitem__(3)[0].view(1, -1), LEAD_Dataset(df).__getitem__(5)[0].view(1, -1)]))

tensor([[ 2.0727e-01,  2.5569e-01,  2.6243e-01,  2.1243e-01,  2.0033e-01,
          3.7126e-01,  4.8705e-01,  2.7588e-01,  3.1144e-01,  3.6225e-01,
          2.1717e-01,  3.3029e-01,  2.8145e-01,  3.4692e-01,  2.8682e-01,
          3.5745e-01,  4.0530e-01,  2.7902e-01,  2.6630e-01,  2.8650e-01,
          3.1841e-01,  3.4007e-01,  2.0181e-01,  3.4871e-01,  2.4535e-01,
          1.7324e-01,  2.4345e-01,  2.0873e-01,  3.9702e-01,  2.8750e-01,
          2.4002e-01,  2.3984e-01,  2.6250e-01,  2.6552e-01,  5.0149e-01,
          4.9482e-01,  3.7351e-01,  4.0266e-01,  5.2865e-01,  7.2146e-01,
          5.6031e-01,  5.7967e-01,  3.4545e-01,  3.7563e-01,  3.4779e-01,
          2.6583e-01,  2.9614e-01,  2.4823e-01,  2.7815e-01,  2.9194e-01,
          2.0267e-01,  4.1963e-01,  1.7632e-01,  2.2962e-01,  1.5188e-01,
          1.7362e-01,  1.5981e-01,  1.9565e-01,  1.2137e-01,  4.6855e-02,
          1.0724e+00,  1.0687e+00,  1.0417e+00,  1.0586e+00,  1.1666e+00,
          1.2827e+00,  1.2685e+00,  1.

In [22]:
a = LEAD_Dataset(df).__getitem__(63)[1]
b = LEAD_Dataset(df).__getitem__(9)[1]

In [23]:
a.shape

torch.Size([368])

In [24]:
nn.MSELoss()(torch.tensor(np.expand_dims(a, 0)), torch.tensor(np.expand_dims(b, 0)))

tensor(2.1720, dtype=torch.float64)

In [25]:
def r2_score(y_pred:torch.Tensor, y_true:torch.Tensor) -> float:
    """
    Calculate the R^2 (coefficient of determination) regression score.
    
    Parameters
    ----------
    y_pred : torch.Tensor
        The predicted values.
    y_true : torch.Tensor
        The true values.

    Returns
    -------
    float
        The R^2 score, a float value.
    """
    #y_true = y_true * torch.tensor(TARGET_WEIGHTS)
    #y_pred = y_pred * torch.tensor(TARGET_WEIGHTS)

    
    ss_res = torch.sum((y_true - y_pred) ** 2)
    ss_tot = torch.sum((y_true - torch.mean(y_true)) ** 2)
    
    r2 = 1 - ss_res / ss_tot
    
    return r2.item()

In [26]:

import torch
#from torcheval.metrics import R2Score 
from torchmetrics.regression import R2Score
metric = R2Score()



class LEADModel(pl.LightningModule):
    
    def __init__(self):
        super().__init__()
        
        # == backbone ==
        self.backbone = LEADModelAtt(**nn_config).to(config.DEVICE)
        

        self.loss_fn = nn.MSELoss()
        self.metric = r2_score
        #self.metric = R2Score()
        
        # == record ==
        self.validation_step_outputs = []
        
    def forward(self, images):
        return self.backbone(images)
    
    def configure_optimizers(self):
        
        # == define optimizer ==
        model_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, self.parameters()),
            lr=config.LR,
            weight_decay=config.WEIGHT_DECAY
        )
        
        # == define learning rate scheduler ==
        lr_scheduler = CosineAnnealingWarmRestarts(
            model_optimizer,
            T_0=config.EPOCHS,
            T_mult=1,
            eta_min=1e-7,
            last_epoch=-1
        )
        
        return {
            'optimizer': model_optimizer,
            'lr_scheduler': {
                'scheduler': lr_scheduler,
                'interval': 'epoch',
                'monitor': 'val_loss',
                'frequency': 1
            }
        }
    
    def training_step(self, batch, batch_idx):
        
        # == obtain input and target ==
        image, target = batch
        image = image.to(self.device).float()
        target = target.to(self.device).float()
        
        # == pred ==
        y_pred = self(image)
        
        # == compute loss ==
        train_loss = self.loss_fn(y_pred, target)
        
        # == record ==
        self.log('train_loss', train_loss, True)
        
        return train_loss
    
    def validation_step(self, batch, batch_idx):
        
        # == obtain input and target ==
        image, target = batch
        image = image.to(self.device).float()
        target = target.to(self.device).float()
        
        # == pred ==
        with torch.no_grad():
            y_pred = self(image)
            
        self.validation_step_outputs.append({"logits": y_pred, "targets": target})
        
    def train_dataloader(self):
        return self._train_dataloader

    def validation_dataloader(self):
        return self._validation_dataloader
    
    def on_epoch_start(self):
        print('\n')

    def on_load_checkpoint(self, checkpoint: dict) -> None:
        state_dict = checkpoint["state_dict"]
        model_state_dict = self.state_dict()
        is_changed = False
        for k in state_dict:
            if k in model_state_dict:
                if state_dict[k].shape != model_state_dict[k].shape:
                    print(f"Skip loading parameter: {k}, "
                                f"required shape: {model_state_dict[k].shape}, "
                                f"loaded shape: {state_dict[k].shape}")
                    state_dict[k] = model_state_dict[k]
                    is_changed = True
            else:
                print(f"Dropping parameter {k}")
                is_changed = True

        if is_changed:
            checkpoint.pop("optimizer_states", None)
    
    def on_validation_epoch_end(self):
        
        # = merge batch data =
        outputs = self.validation_step_outputs
        
        #output_val = nn.Sigmoid()(torch.cat([x['logits'] for x in outputs], dim=0)).cpu().detach()
        #output_val = torch.cat([x['logits'] for x in outputs], dim=0).cpu().detach()
        #target_val = torch.cat([x['targets'] for x in outputs], dim=0).cpu().detach()
        output_val = torch.cat([x['logits'] for x in outputs], dim=0)#.cpu().detach()
        target_val = torch.cat([x['targets'] for x in outputs], dim=0)#.cpu().detach()
        
        
        # = compute validation loss =
        val_loss = self.loss_fn(output_val, target_val)
        # == record ==
        print(f"val_loss: {val_loss}")
        self.log('val_loss', val_loss, True)
        
        val_loss = val_loss.cpu().detach()

    
        #output_val = nn.Sigmoid()(output_val).cpu().detach()
        output_val = output_val.view(-1,368).cpu().detach()
        target_val = target_val.view(-1,368).cpu().detach()

  
        y = (output_val * std_y) + mean_y
        
        y_pred = target_val
        y_pred[:, std_y < 1e-9] = 0
        y_pred = (y_pred * std_y) + mean_y

        # r2=0
        # for i in range(368):
        #     r2_i = self.metric(y_pred[:, i], y[:, i])
        #     r2 += r2_i
        # val_score  = r2/ 368
        # #val_score = self.metric(y_pred, y)

        val_score = self.metric(y_pred, y)
        
        # r2=0
        # delim = 0
        # for i in range(368):
        #     r2_i = self.metric(y_pred[:, i], y[:, i])
        #     if r2_i > 1e-6:
        #         r2 += r2_i
        #         delim += 1
        # val_score  = r2/ delim
        
        
        # self.metric.update(target_val, output_val)
        # val_score = self.metric.compute()
        
        # target to one-hot
        #target_val = torch.nn.functional.one_hot(target_val, len(label_list))
        
        # = val with ROC AUC =
        # gt_df = pd.DataFrame(target_val.numpy().astype(np.float32), columns=label_list)
        # pred_df = pd.DataFrame(output_val.numpy().astype(np.float32), columns=label_list)
        
        # gt_df['id'] = [f'id_{i}' for i in range(len(gt_df))]
        # pred_df['id'] = [f'id_{i}' for i in range(len(pred_df))]
        
        # val_score = score(gt_df.drop(cols_drop_on_val, axis=1), pred_df.drop(cols_drop_on_val, axis=1), row_id_column_name='id')
        
        print(f"val_R2: {val_score}")
        
        self.log("val_R2", val_score, True)
        
        # clear validation outputs
        self.validation_step_outputs = list()
        
        return {'val_loss': val_loss, 'val_R2': val_score}

In [27]:
USE_CHECKPOINT = False
#CHK_PATH = './pretrain_checkpoints/eca_nfnet_l0_fold_0_0.97126.ckpt'


def run_training(fold_id, total_df):
    print('================================================================')
    print(f"==== Running training for fold {fold_id} ====")
    
    # == create dataset and dataloader ==
    train_df = total_df[total_df['fold'] != fold_id].drop('fold', axis=1).copy()
    valid_df = total_df[total_df['fold'] == fold_id].drop('fold', axis=1).copy()
    
    print(f'Train Samples: {len(train_df)}')
    print(f'Valid Samples: {len(valid_df)}')
    
  
    train_ds = LEAD_Dataset(train_df)
    val_ds =  LEAD_Dataset(valid_df)
    #val_ds = WaveAllFileDataset(df=valid_df, name_col="filepath", **val_dataset_config)
    
    
    train_dl = torch.utils.data.DataLoader(
        train_ds,
        batch_size=config.BATCH_SIZE,
        shuffle=True,
        #num_workers=config.N_WORKERS,
        pin_memory=True,
        #persistent_workers=True
    )
    
    val_dl = torch.utils.data.DataLoader(
        val_ds,
        batch_size=config.BATCH_SIZE * 2,
        shuffle=False,
        #num_workers=config.N_WORKERS,
        pin_memory=True,
        #persistent_workers=True
    )
    
    # == init model ==
    if USE_CHECKPOINT:
        model = LEADModel.load_from_checkpoint(CHK_PATH, strict=False)
    else:
        model = LEADModel()
    # == init callback ==
    checkpoint_callback = ModelCheckpoint(monitor='val_loss',
                                          dirpath=config.OUTPUT_DIR,
                                          save_top_k=1,
                                          save_last=True,
                                          save_weights_only=True,
                                          filename=f"fold_{fold_id}",
                                          mode='min')

    callbacks_to_use = [checkpoint_callback, TQDMProgressBar(refresh_rate=1)]

    print(f'trainer')
    # == init trainer ==
    trainer = pl.Trainer(
        max_epochs=config.EPOCHS,
        val_check_interval=1.,
        num_sanity_val_steps=0,
        callbacks=callbacks_to_use,
        enable_model_summary=False,
        accelerator="gpu" if torch.cuda.is_available() else 'auto',
        deterministic=True,
        precision='16-mixed' if config.MIXED_PRECISION else 32,
    )
    
    # == Training ==
    trainer.fit(model, train_dataloaders=train_dl, val_dataloaders=val_dl)
    
    # == Prediction ==
    best_model_path = checkpoint_callback.best_model_path
    weights = torch.load(best_model_path)['state_dict']
    model.load_state_dict(weights)
    
    
    return trainer

In [28]:
#train_df = train_df[train_df.target<30].reset_index(drop=True)

kf = KFold(n_splits=config.FOLDS, shuffle=True, random_state=config.SEED)
df['fold'] = 0
for fold, (train_idx, val_idx) in enumerate(kf.split(df)):
    df.loc[val_idx, 'fold'] = fold
    

In [29]:
#config.EPOCHS = 10
#config.LR = 1e-5

In [30]:


import logging

def disable_logging_during_tests():
    # Store the current log level to restore it later
    original_log_level = logging.getLogger().getEffectiveLevel()

    # Set the log level to a higher level, e.g., WARNING or CRITICAL
    logging.disable(logging.ERROR)

    # Run your tests here

    # Restore the original log level after the tests
    logging.disable(original_log_level)

# Call this function before running your tests
disable_logging_during_tests()



In [None]:
selected_folds = [0,4,5]
    
# training
torch.set_float32_matmul_precision('high')



for f in range(config.FOLDS):
    
    if f not in selected_folds:
        continue
    
    # get validation index
    #val_idx = list(train_df[train_df['fold'] == f].index)
    
    # main loop of f-fold
    trainer = run_training(f, df)
    

    
    # only training one fold
    #break


# for idx, val_score in enumerate(fold_val_score_list):
#     print(f'Fold {idx} Val Score: {val_score:.5f}')

# oof_gt_df = oof_df[['samplename'] + label_list].copy()
# oof_pred_df = oof_df[['samplename'] + pred_cols].copy()
# oof_pred_df.columns = ['samplename'] + label_list
# oof_score = score(oof_gt_df, oof_pred_df, 'samplename')
# print(f'OOF Score: {oof_score:.5f}')

#oof_df.to_csv(f"{config.OUTPUT_DIR}/oof_pred.csv", index=False)

==== Running training for fold 0 ====
Train Samples: 171428
Valid Samples: 28572
trainer


E:\PycharmProjects\birdclef24\venv\Lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
E:\PycharmProjects\birdclef24\venv\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:653: Checkpoint directory E:\PycharmProjects\LEAP\output exists and is not empty.
E:\PycharmProjects\birdclef24\venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `D

Training: |                                                                                                   …

Validation: |                                                                                                 …

val_loss: 0.5748037695884705
val_R2: -0.8353511652778718


Validation: |                                                                                                 …

val_loss: 0.44946566224098206
val_R2: -0.2161903721419367


Validation: |                                                                                                 …

val_loss: 0.4143925607204437
val_R2: 0.07491585129627798


E:\PycharmProjects\birdclef24\venv\Lib\site-packages\pytorch_lightning\trainer\call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


==== Running training for fold 4 ====
Train Samples: 171429
Valid Samples: 28571
trainer


E:\PycharmProjects\birdclef24\venv\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:653: Checkpoint directory E:\PycharmProjects\LEAP\output exists and is not empty.
E:\PycharmProjects\birdclef24\venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
E:\PycharmProjects\birdclef24\venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |                                                                                                   …

In [None]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(tokenized, split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [None]:
model = LEADModelAtt(**nn_config).to(config.DEVICE)

In [None]:
train_dl = torch.utils.data.DataLoader(
        LEAD_Dataset(df.drop('fold', axis=1)),
        batch_size=config.BATCH_SIZE,
        shuffle=True,
        num_workers=0,
        pin_memory=True,
        #persistent_workers=True
    )

In [None]:
max_iters = 2000
eval_iters = 1000
learning_rate = 3e-4
# create a PyTorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for step in range(max_iters):
    #print(iter)
    # if iter % eval_iters == 0:
    #     losses = estimate_loss()
    #     print(f"step: {iter}, train loss: {losses['train']:.3f}, val loss: {losses['val']:.3f}")

    # sample a batch of data
    #xb, yb = LEAD_Dataset(df.drop('fold', axis=1)).__getitem__(iter)

    xb, yb = next(iter(train_dl))

    # evaluate the loss
    logits = model.forward(xb.to('cuda').float(), yb.to('cuda').float())

    loss = nn.MSELoss()(logits, yb.to('cuda').float())
    
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    print(loss.item())





In [None]:
        y = (yb * std_y) + mean_y
        
        y_pred = logits.cpu().detach()
        y_pred[:, std_y < 1e-9] = 0
        y_pred = (y_pred * std_y) + mean_y

        # r2=0
        # for i in range(368):
        #     r2_i = self.metric(y_pred[:, i], y[:, i])
        #     r2 += r2_i
        # val_score  = r2/ 368
        r2_score(y_pred.view(-1, 368, 1), y.view(-1, 368, 1))

In [None]:
(y_pred*torch.tensor(TARGET_WEIGHTS)).shape

In [None]:
y

In [None]:
y_pred.shape

In [None]:
y.shape

In [None]:
#config.EPOCHS = 25  # max epochs
#config.LR = 3e-4  # learning rate

In [None]:
%reload_ext tensorboard
%tensorboard --logdir ./lightning_logs/version_0/

