In [1]:
%load_ext autoreload
%load_ext lab_black
%autoreload 2

In [2]:
import torch

In [3]:
import os
import sys
import ast
import glob
import json
import torch
import torchmetrics
import pandas as pd
import numpy as np
from tqdm import tqdm

from omegaconf import OmegaConf
from hydra.utils import instantiate

from pytorch_lightning import Trainer
from pytorch_lightning.utilities.seed import seed_everything

sys.path.insert(0, "../")

from src.datasets import WaveformDatasetRating, WaveformDatasetRatingV2
from src.models.timmsed import TimmSEDGPU, SelectiveTimmSEDGPU, SelectivePartTimmSEDGPU

from src.models.module import TrainModule, SelectiveTrainModule


os.environ["CUDA_VISIBLE_DEVICES"] = "3"



In [4]:
# all_path = glob.glob("../datasets/train_np/*/*/*.npy")
train = pd.read_csv("../datasets/2022/train_folds.csv")
train["new_target"] = (
    train["primary_label"]
    + " "
    + train["secondary_labels"].map(lambda x: " ".join(ast.literal_eval(x)))
)
train["len_new_target"] = train["new_target"].map(lambda x: len(x.split()))

In [5]:
train["file_path"] = "../datasets/2022/train_audio/" + train["filename"]

In [6]:
scored_birds = "../datasets/2022/scored_birds.json"
with open(scored_birds, "r") as f:
    scored_birds_list = json.load(f)

train.loc[~train["primary_label"].isin(scored_birds_list), "kfold"] = 6
cfg = "../src/configs/timmsed/timmSED_exp_008.yaml"
cfg = OmegaConf.load(cfg)
cfg.model.target_columns = train.primary_label.unique().tolist()
cfg.model.num_classes = len(cfg.model.target_columns)

In [7]:
label_dict = dict()
labels = train.primary_label.unique().tolist()
for i, label in enumerate(labels):
    label_dict[label] = i

In [8]:
train["scored"] = False

In [9]:
for i, row in tqdm(train.iterrows(), total=len(train)):
    for label in row["new_target"].split():
        if label in scored_birds_list:
            train.loc[i, "scored"] = True
#         if label_dict[label] in scored_birds_list:
#             train.loc[i, "scored"] = True

100%|██████████| 14852/14852 [00:01<00:00, 11044.74it/s]


In [10]:
fold = 0
trn_df = train[train.kfold != fold].reset_index(drop=True)
val_df = train[train.kfold == fold].reset_index(drop=True)
mixup = trn_df.loc[trn_df["scored"] == True].reset_index(drop=True)

In [12]:
cfg.model.train_ds.batch_size = 24

In [13]:
train_dataset = WaveformDatasetRatingV2(trn_df, cfg.model, mode="train")
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=cfg.model.train_ds.batch_size,
    num_workers=cfg.model.valid_ds.num_workers,
    pin_memory=True,
    shuffle=True,
)

mixup_dataset = WaveformDatasetRatingV2(mixup, cfg.model, mode="train")
mixup_dataloader = torch.utils.data.DataLoader(
    mixup_dataset,
    batch_size=cfg.model.train_ds.batch_size,
    num_workers=cfg.model.valid_ds.num_workers,
    pin_memory=True,
    shuffle=True,
)


valid_dataset = WaveformDatasetRatingV2(val_df, cfg.model, mode="valid")
valid_dataloader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=cfg.model.valid_ds.batch_size,
    num_workers=cfg.model.valid_ds.num_workers,
    pin_memory=True,
    shuffle=False,
)

In [14]:
sed_model = SelectivePartTimmSEDGPU(cfg.model)
train_module = SelectiveTrainModule(sed_model, cfg.model, train_dataset, mixup_dataset)

In [15]:
train_inputs = next(iter(train_dataloader))
mixup_inputs = next(iter(mixup_dataloader))

In [17]:
sed_model.train()
None

SelectivePartTimmSEDGPU(
  (logmel_extractor): Sequential(
    (0): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
    (1): AmplitudeToDB()
  )
  (spec_augmenter): SpecAugmentation(
    (time_dropper): DropStripes()
    (freq_dropper): DropStripes()
  )
  (bn0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (backbone): NormFreeNet(
    (stem): Sequential(
      (conv1): ScaledStdConv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (act2): SiLU(inplace=True)
      (conv2): ScaledStdConv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act3): SiLU(inplace=True)
      (conv3): ScaledStdConv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act4): SiLU(inplace=True)
      (conv4): ScaledStdConv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    )
    (stages): Sequential(
      (0): Sequential(
        (0): NormFreeBlock(
          (downsample):

In [18]:
sed_model(
    train_inputs["image"],
    train_inputs["targets"],
    train_inputs["weight"],
)

torch.Size([12, 2304, 24, 10])


{'logit': tensor([[ 0.2940,  1.0152,  0.5102,  ...,  0.2386, -0.5693, -0.1450],
         [-0.2282,  0.7965,  0.4875,  ...,  0.9514, -0.5581, -0.2262],
         [-0.5497,  1.6442,  0.6380,  ...,  0.8771, -0.8651,  0.2105],
         ...,
         [-0.6476,  0.8156,  0.4823,  ...,  0.7495, -0.7614, -0.4226],
         [ 0.2023,  0.7698,  0.1261,  ...,  0.9613, -0.5548, -0.0635],
         [-0.1067,  1.3152,  0.2878,  ...,  0.8540, -0.7358, -0.4024]],
        grad_fn=<SumBackward1>),
 'framewise_logit': tensor([[[-0.7086,  0.6598,  0.0481,  ..., -0.7614,  1.1161,  0.7727],
          [-0.7086,  0.6598,  0.0481,  ..., -0.7614,  1.1161,  0.7727],
          [-0.7086,  0.6598,  0.0481,  ..., -0.7614,  1.1161,  0.7727],
          ...,
          [ 0.7355,  0.4605,  0.9530,  ..., -0.0941, -0.6875,  0.6736],
          [ 0.7355,  0.4605,  0.9530,  ..., -0.0941, -0.6875,  0.6736],
          [ 0.7355,  0.4605,  0.9530,  ..., -0.0941, -0.6875,  0.6736]],
 
         [[-0.0224,  0.5817,  0.3786,  ...,  0.7

sed_model(valid_inputs["image"], valid_inputs["targets"], valid_inputs["weight"])