imprements by https://www.kaggle.com/code/awsaf49/birdclef23-effnet-fsr-cutmixup-train

# Impoert library

In [1]:
import os
import random
from glob import glob
from pathlib import Path
import yaml
from tqdm import tqdm
import IPython.display as ipd

import numpy as np
import pandas as pd

import matplotlib as mpl
cmap = mpl.cm.get_cmap('coolwarm')
import matplotlib.pyplot as plt

import librosa
import wandb
from sklearn import metrics

import torch
from torch import nn
from torch.nn import functional as F
from torch.cuda.amp import autocast, GradScaler

from timm.scheduler import CosineLRScheduler

In [2]:
from config import CFG
from pytorch_model import BirdCLEF23Net
from pytorch_wav2logmel import Wav2Logmel
import pytorch_modeler as modeler
import pytorch_preprocessing as prep
import common as com

In [3]:
modeler.set_seed(CFG.seed)
# setting
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print('Debug :', CFG.debug)

cuda:0
Debug : False


# Wandb

In [4]:
# Try to get the API key from Kaggle secrets
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("WANDB")
    # Login to wandb with the API key
    wandb.login(key=api_key)
    print('kaggle notebook mode')
except:
    key_path = '/kaggle/input/wandb_key.txt'
    p = Path(key_path)
    api_key = p.read_text()
    wandb.login(key=api_key)
    print('local mode')

[34m[1mwandb[0m: Currently logged in as: [33mhirokin1999[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


local mode


In [5]:
from datetime import datetime
import pytz

# 日本時間のタイムゾーンを設定
jst = pytz.timezone('Asia/Tokyo')

# 現在時刻を取得し、日本時間に変換
now = datetime.now(jst)

# 現在時刻を文字列に変換
now_str = now.strftime('%Y-%m-%d %H:%M:%S')

print(now_str)

2023-05-13 21:55:02


In [6]:
import yaml
from tqdm import tqdm

def wandb_init(fold):
    config = {k: v for k, v in dict(vars(CFG)).items() if '__' not in k}
    config.update({"fold": int(fold)})
    yaml.dump(config, open(f'./config fold-{fold}.yaml', 'w'), )
    config = yaml.load(open(f'./config fold-{fold}.yaml', 'r'), Loader=yaml.FullLoader)
    run = wandb.init(project="birdclef-2023-public",
                     name=f"fold-{fold}|dim-{CFG.img_size[1]}x{CFG.img_size[0]}|model-{CFG.model_name}|{now_str}",
                     config=config,
                     group=CFG.comment,
                     save_code=True, )
    return run


def log_wandb(valid_df):
    save_df = valid_df.query("miss==True")
    save_df.loc[:, 'pred_name'] = save_df.pred.map(CFG.label2name)
    save_df.loc[:, 'target_name'] = save_df.target.map(CFG.label2name)
    if CFG.debug:
        save_df = save_df.iloc[:CFG.batch_size * CFG.valid_bs]
    noimg_cols = [*CFG.tab_cols, 'target', 'pred', 'target_name', 'pred_name']
    save_df = save_df.loc[:, noimg_cols]

    data = []
    for idx, row in tqdm(save_df.iterrows(), total=len(save_df), desc='wandb ', position=0, leave=True):
        filepath = '/kaggle/input/birdclef-2023/train_audio/' + row.filename
        audio, sr = librosa.load(filepath, sr=None)
        data += [[*row.tolist(), wandb.Audio(audio, caption=row.filename, sample_rate=sr)]]
    wandb_table = wandb.Table(data=data, columns=[*noimg_cols, 'audio'])
    wandb.log({'best': scores,
               'table': wandb_table,
               })

# Data Frame

In [7]:
df = pd.read_csv(f'{CFG.BASE_PATH}/train_metadata.csv')
filename = df.filename.str.replace('.ogg', '.wav')
df['filepath'] = CFG.BASE_PATH + '/train_audio_wav/' + filename
df['target'] = df.primary_label.map(CFG.name2label)
df.head(2)

  


Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename,filepath,target
0,abethr1,[],['song'],4.3906,38.2788,Turdus tephronotus,African Bare-eyed Thrush,Rolf A. de By,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/128013,abethr1/XC128013.ogg,/kaggle/input/birdclef-2023/train_audio_wav/ab...,0
1,abethr1,[],['call'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363501,abethr1/XC363501.ogg,/kaggle/input/birdclef-2023/train_audio_wav/ab...,0


In [8]:
# Import required packages
from sklearn.model_selection import StratifiedKFold

# Initialize the StratifiedKFold object with 5 splits and shuffle the data
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=CFG.seed)

# Reset the index of the dataframe
df = df.reset_index(drop=True)

# Create a new column in the dataframe to store the fold number for each row
df["fold"] = -1

# Iterate over the folds and assign the corresponding fold number to each row in the dataframe
for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['primary_label'])):
    df.loc[val_idx, 'fold'] = fold



In [9]:
oof_df = pd.read_csv('/kaggle/working/WSL/exp20_exp10_wsl_relabel_sigmoid/oof_relabel.csv')

# Training

In [10]:
import gc
def gc_collect():
    gc.collect()
    torch.cuda.empty_cache()

In [11]:
def drop_weight(weight):
    target_string='model.classifier'
    drop_keys = [key for key in weight.keys() if target_string in key]
    target_string='att_block'
    drop_keys += [key for key in weight.keys() if target_string in key]
    for key in drop_keys:
        if key in weight:
            del weight[key]
    return weight

In [None]:
oof_pred = []; oof_true = []; oof_val = []; oof_ids = []; oof_folds = [] 

num_classes = CFG.num_classes
df = df.copy()
for fold in range(CFG.num_fold):
    scaler = torch.cuda.amp.GradScaler()
    # Check if the fold is selected
    if fold not in CFG.selected_folds:
        continue
    
    # Initialize Weights and Biases
    if CFG.wandb:
        run = wandb_init(fold)
    
    # Compute batch size and number of samples to drop
    infer_bs = CFG.valid_bs
    drop_remainder = CFG.drop_remainder
    
    # Split dataset with cv filter
    if CFG.cv_filter:
        df = com.filter_data(df, thr=5)
        train_df = df.query("fold!=@fold | ~cv").reset_index(drop=True)
        valid_df = df.query("fold==@fold & cv").reset_index(drop=True)
    else:
        train_df = df.query("fold!=@fold").reset_index(drop=True)
        valid_df = df.query("fold==@fold").reset_index(drop=True)
    
    # Upsample train data
    train_df = com.upsample_data(train_df, thr=CFG.upsample_thr)
#     train_df = downsample_data(train_df, thr=500)

    # Get file paths and labels
    train_paths = train_df.filepath.values; train_labels = train_df.target.values
    valid_paths = valid_df.filepath.values; valid_labels = valid_df.target.values

    # Shuffle the file paths and labels
    index = np.arange(len(train_paths))
    np.random.shuffle(index)
    train_paths  = train_paths[index]
    train_labels = train_labels[index]

    # wav
    train_ftype = list(map(lambda x: '.wav' in x, train_paths))
    valid_ftype = list(map(lambda x: '.wav' in x, valid_paths))

    # Compute the number of training and validation samples
    num_train = len(train_paths); num_valid = len(valid_paths)
        
    # Log the number of training and validation samples if Weights and Biases is being used
    if CFG.wandb:
        wandb.log({'num_train':num_train,
                   'num_valid':num_valid})
        
    # Build the training and validation datasets
    # For debugging
    if CFG.debug:
        min_samples = CFG.batch_size
        train_ds = prep.BirdDataset(train_df.iloc[:min_samples], oof_df=oof_df, is_train=True)
        valid_ds = prep.BirdDataset(valid_df, is_train=False)
    else:
        train_ds = prep.BirdDataset(train_df, oof_df=oof_df, is_train=True)
        valid_ds = prep.BirdDataset(valid_df, is_train=False)
    # dataloader
    train_dataloader, val_dataloader = modeler.make_dataloder(train_ds, valid_ds)
    
    wav_to_logmel = Wav2Logmel()
    # Clear the session and build the model
    model = BirdCLEF23Net(num_classes=CFG.num_classes)
    # Load birdclef pretrained weights
    if CFG.pretrain == True:
        weight=torch.load(f'{CFG.pretrained_model_path}/fold-{fold}.pth')
        #weight=drop_weight(weight)
        print('load pre-trained model : ', f'{CFG.pretrained_model_path}/fold-{fold}.pth')
        print(model.load_state_dict(weight, strict=False))
    model.to(device)
    wav_to_logmel.to(device)
    
    print('#' * 25)
    print('#### Training')
    print('#### Fold: %i | Image Size: (%i, %i) | Model: %s | Batch Size: %i | Scheduler: %s' %
        (fold + 1, *CFG.img_size, CFG.model_name, CFG.batch_size, CFG.scheduler))
    print('#### Num Train: {:,} | Num Valid: {:,}'.format(len(train_paths), len(valid_paths)))

    optimizer = com.get_optimizer(model)
    # TODO com.get_scheduler
    scheduler = CosineLRScheduler(optimizer, t_initial=CFG.epochs, lr_min=CFG.lr_min, 
                                  warmup_t=CFG.warmup_t, warmup_lr_init=CFG.warmup_lr_init, warmup_prefix=True)
    criterion = com.get_criterion()

    best_score = -1
    best_epoch = -1

    for epoch in range(2 if CFG.debug else CFG.epochs):
        # Training
        model.train()
        epoch_loss = 0
        for batch_idx, (inputs, sample_info) in enumerate(tqdm(train_dataloader)):
            inputs, targets = inputs.to(device), sample_info['onehot_target'].to(device)
            #print(targets)
            #targets = F.one_hot(targets, num_classes=CFG.num_classes).float()
            optimizer.zero_grad()
            logmel = wav_to_logmel(inputs)
            with torch.cuda.amp.autocast():
                outputs, mix_targets = model(logmel, targets)
                loss = modeler.loss_fn(outputs, mix_targets)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            if np.isinf(loss.item()) or np.isnan(loss.item()):
                print(f'Bad loss, skipping the batch {batch_idx}')
                del loss, outputs, mix_targets
                gc_collect()
                continue
            epoch_loss += loss.item()
            # wandb logger (Train loss)
            if CFG.wandb:
                run.log({'loss': loss.item()})
        scheduler.step(epoch+1)

        # Validation
        model.eval()
        val_loss = 0
        val_preds = []
        val_true = []
        with torch.no_grad():
            for inputs, sample_info in tqdm(val_dataloader):
                inputs, targets = inputs.to(device), sample_info['target'].to(device)
                targets = F.one_hot(targets, num_classes=CFG.num_classes).float()
                targets = targets[0].unsqueeze(0)
                logmel = wav_to_logmel(inputs)
                outputs = model(logmel)
                
                # for loss
                outputs["logit"] = outputs["logit"].mean(dim=0, keepdim=True)
                outputs["framewise_logit"] = outputs["framewise_logit"].mean(dim=0, keepdim=True)
                
                loss = modeler.loss_fn(outputs, targets)
                outputs = outputs["clipwise_output"]
                outputs = outputs.mean(dim=0, keepdim=True)
                #outputs = torch.softmax(outputs, dim=1)
                val_loss += loss.item()
                val_preds.append(outputs.detach().cpu().numpy())
                val_true.append(targets.detach().cpu().numpy())

        val_preds = np.vstack(val_preds)
        val_true = np.vstack(val_true)
        # Metrics
        val_score = com.padded_cmap(val_true, val_preds)
        # Checkpoint
        if val_score > best_score:
            best_score = val_score
            best_epoch = epoch + 1
            torch.save(model.state_dict(), f'fold-{fold}.pth')
            if CFG.wandb:
                art = wandb.Artifact("birdclef-2023", type="model")
                art.add_file(f'fold-{fold}.pth')
                run.log_artifact(art)

        print(f'Epoch: {epoch + 1} | Train Loss: {epoch_loss / len(train_dataloader)} | '
            f'Val Loss: {val_loss / len(val_dataloader)} | Val Padded_cmAP : {val_score}')
        
        # wandb logger
        lr = scheduler.get_epoch_values(epoch)[0]
        if CFG.wandb:
            run.log({'train_loss': epoch_loss / len(train_dataset),
                    'lr': lr,
                    'epoch': epoch+1,
                    'valid_loss': val_loss / len(val_dataset),
                    'valid_padded_cmAP': val_score,})
        
        
    # Load best checkpoint
    print('# Loading best model')
    model.load_state_dict(torch.load(f'fold-{fold}.pth'), strict=False)

    # Predict on the validation data for oof result
    print('# Infering OOF')
    model.eval()
    oof_pred_ = []
    with torch.no_grad():
        for inputs, sample_info in tqdm(val_dataloader):
            inputs, targets = inputs.to(device), sample_info['target'].to(device)
            targets = targets[0].unsqueeze(0)
            logmel = wav_to_logmel(inputs)
            outputs = model(logmel)
            outputs = outputs["clipwise_output"]
            outputs = outputs.mean(dim=0, keepdim=True) 
            oof_pred_.append(outputs.detach().cpu().numpy())

    oof_pred_ = np.concatenate(oof_pred_, axis=0)
    # oof_pred : 5
    oof_pred.append(oof_pred_)

    # Get ids and targets
    oof_true.append(valid_labels)
    oof_folds.append(np.ones_like(oof_true[-1], dtype='int8') * fold)
    oof_ids.append(valid_paths)

    # Save valid data prediction
    y_true = np.array(oof_true[-1])
    y_pred = np.argmax(oof_pred[-1], axis=-1)
    
    valid_df['pred'] = y_pred
    valid_df['miss'] = y_true != y_pred
    valid_df[CFG.class_names] = oof_pred[-1].tolist()
    # Log the metrics
    scores = {}
    cmAP = com.padded_cmap(com.one_hot_encode(y_true), oof_pred[-1])
    oof_val.append(best_score)
    print('\n>>> FOLD %i Padded_cmAP = %.3f' % (fold+1, cmAP))
    scores.update({'epoch': best_epoch,
                   'cmAP': cmAP,})
    # wandb logger 
    if CFG.wandb:
        run.log(scores)
    # Show training plot
    # if CFG.training_plot:
    #     plot_history(history)
    # Log metrics, media to wandb
    if CFG.wandb:
        print('# WandB')
        log_wandb(valid_df)
        wandb.run.finish()
        #display(ipd.IFrame(run.url, width=1080, height=720))

#########################
#### Training
#### Fold: 1 | Image Size: (224, 313) | Model: tf_efficientnet_b0_ns | Batch Size: 128 | Scheduler: cos
#### Num Train: 19,627 | Num Valid: 3,381


100%|██████████| 154/154 [03:38<00:00,  1.42s/it]
100%|██████████| 3381/3381 [02:10<00:00, 25.99it/s]


Epoch: 1 | Train Loss: 0.20482072761500036 | Val Loss: 0.00709227334814853 | Val Padded_cmAP : 0.47898179652153794


100%|██████████| 154/154 [03:34<00:00,  1.40s/it]
100%|██████████| 3381/3381 [02:11<00:00, 25.79it/s]


Epoch: 2 | Train Loss: 0.007682143671244576 | Val Loss: 0.007576444043996321 | Val Padded_cmAP : 0.4795735243990707


100%|██████████| 154/154 [03:33<00:00,  1.39s/it]
100%|██████████| 3381/3381 [02:10<00:00, 25.83it/s]


Epoch: 3 | Train Loss: 0.006228952689773657 | Val Loss: 0.006414445008034715 | Val Padded_cmAP : 0.48205305415492794


100%|██████████| 154/154 [03:36<00:00,  1.40s/it]
100%|██████████| 3381/3381 [02:15<00:00, 24.96it/s]


Epoch: 4 | Train Loss: 0.005368861572381544 | Val Loss: 0.005898355355685154 | Val Padded_cmAP : 0.48324587366185606


100%|██████████| 154/154 [03:25<00:00,  1.33s/it]
100%|██████████| 3381/3381 [02:15<00:00, 24.86it/s]


Epoch: 5 | Train Loss: 0.004917382776688833 | Val Loss: 0.005654983855486445 | Val Padded_cmAP : 0.48641689604076166


100%|██████████| 154/154 [03:36<00:00,  1.41s/it]
100%|██████████| 3381/3381 [02:12<00:00, 25.52it/s]


Epoch: 6 | Train Loss: 0.004613292258377973 | Val Loss: 0.005065056068684462 | Val Padded_cmAP : 0.4942240680922568


100%|██████████| 154/154 [03:34<00:00,  1.39s/it]
100%|██████████| 3381/3381 [02:15<00:00, 24.88it/s]


Epoch: 7 | Train Loss: 0.004390784581615167 | Val Loss: 0.005084549806818159 | Val Padded_cmAP : 0.5071970338461147


100%|██████████| 154/154 [03:28<00:00,  1.36s/it]
100%|██████████| 3381/3381 [02:11<00:00, 25.77it/s]


Epoch: 8 | Train Loss: 0.004252159098683336 | Val Loss: 0.0048466651691553555 | Val Padded_cmAP : 0.5242494587880803


100%|██████████| 154/154 [03:31<00:00,  1.37s/it]
100%|██████████| 3381/3381 [02:12<00:00, 25.59it/s]


Epoch: 9 | Train Loss: 0.004092655217202453 | Val Loss: 0.0044853306441794815 | Val Padded_cmAP : 0.5444131419448279


100%|██████████| 154/154 [03:28<00:00,  1.35s/it]
100%|██████████| 3381/3381 [02:12<00:00, 25.45it/s]


Epoch: 10 | Train Loss: 0.003924789830933434 | Val Loss: 0.004265571059963848 | Val Padded_cmAP : 0.5716731751697041


100%|██████████| 154/154 [03:31<00:00,  1.38s/it]
100%|██████████| 3381/3381 [02:12<00:00, 25.43it/s]


Epoch: 11 | Train Loss: 0.0037608359858708144 | Val Loss: 0.0038926981734481707 | Val Padded_cmAP : 0.5945026072610843


100%|██████████| 154/154 [03:31<00:00,  1.37s/it]
100%|██████████| 3381/3381 [02:14<00:00, 25.11it/s]


Epoch: 12 | Train Loss: 0.0036117802827631113 | Val Loss: 0.0037840748223046585 | Val Padded_cmAP : 0.6271441501170557


100%|██████████| 154/154 [03:27<00:00,  1.35s/it]
100%|██████████| 3381/3381 [02:11<00:00, 25.63it/s]


Epoch: 13 | Train Loss: 0.0034504002504437774 | Val Loss: 0.003559564810601815 | Val Padded_cmAP : 0.6503882747184639


100%|██████████| 154/154 [03:33<00:00,  1.39s/it]
100%|██████████| 3381/3381 [02:12<00:00, 25.57it/s]


Epoch: 14 | Train Loss: 0.0032947192334436943 | Val Loss: 0.0033788350510881877 | Val Padded_cmAP : 0.6752791229062022


100%|██████████| 154/154 [03:29<00:00,  1.36s/it]
100%|██████████| 3381/3381 [02:15<00:00, 25.00it/s]


Epoch: 15 | Train Loss: 0.0031526231028550823 | Val Loss: 0.0032826007527046266 | Val Padded_cmAP : 0.6947842412226909


100%|██████████| 154/154 [03:28<00:00,  1.36s/it]
100%|██████████| 3381/3381 [02:13<00:00, 25.30it/s]


Epoch: 16 | Train Loss: 0.003045216314615561 | Val Loss: 0.002976332666645952 | Val Padded_cmAP : 0.7149465701021346


100%|██████████| 154/154 [03:28<00:00,  1.36s/it]
100%|██████████| 3381/3381 [02:12<00:00, 25.50it/s]


Epoch: 17 | Train Loss: 0.0029220162488689475 | Val Loss: 0.002931981924436727 | Val Padded_cmAP : 0.7301953649214299


100%|██████████| 154/154 [03:38<00:00,  1.42s/it]
100%|██████████| 3381/3381 [02:14<00:00, 25.23it/s]


Epoch: 18 | Train Loss: 0.0028047289536032196 | Val Loss: 0.0027994832013883138 | Val Padded_cmAP : 0.7408959480900511


100%|██████████| 154/154 [03:32<00:00,  1.38s/it]
100%|██████████| 3381/3381 [02:14<00:00, 25.11it/s]


Epoch: 19 | Train Loss: 0.0027251668779372977 | Val Loss: 0.0026816362637048854 | Val Padded_cmAP : 0.7556994291034954


100%|██████████| 154/154 [03:32<00:00,  1.38s/it]
100%|██████████| 3381/3381 [02:18<00:00, 24.48it/s]


Epoch: 20 | Train Loss: 0.002605874366550283 | Val Loss: 0.0026009177837474575 | Val Padded_cmAP : 0.7691033815316051


100%|██████████| 154/154 [03:53<00:00,  1.51s/it]
100%|██████████| 3381/3381 [02:17<00:00, 24.57it/s]


Epoch: 21 | Train Loss: 0.0025291074983557903 | Val Loss: 0.002456205199946683 | Val Padded_cmAP : 0.7766515168577732


100%|██████████| 154/154 [03:42<00:00,  1.45s/it]
100%|██████████| 3381/3381 [02:19<00:00, 24.21it/s]


Epoch: 22 | Train Loss: 0.0024299657504473415 | Val Loss: 0.0024432983112191585 | Val Padded_cmAP : 0.7846923422814385


100%|██████████| 154/154 [03:52<00:00,  1.51s/it]
100%|██████████| 3381/3381 [02:21<00:00, 23.94it/s]


Epoch: 23 | Train Loss: 0.002370949623676912 | Val Loss: 0.002425404033127289 | Val Padded_cmAP : 0.7899665955890927


100%|██████████| 154/154 [03:43<00:00,  1.45s/it]
100%|██████████| 3381/3381 [02:24<00:00, 23.45it/s]


Epoch: 24 | Train Loss: 0.0023049568922210535 | Val Loss: 0.0023497575331567233 | Val Padded_cmAP : 0.7948252559751373


100%|██████████| 154/154 [03:48<00:00,  1.49s/it]
100%|██████████| 3381/3381 [02:19<00:00, 24.23it/s]


Epoch: 25 | Train Loss: 0.002256341871212829 | Val Loss: 0.0022739530059930056 | Val Padded_cmAP : 0.8043681414400825


100%|██████████| 154/154 [03:46<00:00,  1.47s/it]
100%|██████████| 3381/3381 [02:20<00:00, 24.04it/s]


Epoch: 26 | Train Loss: 0.002181165970335775 | Val Loss: 0.002245945340603743 | Val Padded_cmAP : 0.8072244999030425


100%|██████████| 154/154 [03:42<00:00,  1.45s/it]
100%|██████████| 3381/3381 [02:22<00:00, 23.80it/s]


Epoch: 27 | Train Loss: 0.002127913798223697 | Val Loss: 0.002161794227036006 | Val Padded_cmAP : 0.8114304556097894


100%|██████████| 154/154 [03:43<00:00,  1.45s/it]
100%|██████████| 3381/3381 [02:16<00:00, 24.75it/s]


Epoch: 28 | Train Loss: 0.0020857448054646902 | Val Loss: 0.002168644317194338 | Val Padded_cmAP : 0.8162885455891112


100%|██████████| 154/154 [03:45<00:00,  1.47s/it]
100%|██████████| 3381/3381 [02:19<00:00, 24.28it/s]


Epoch: 29 | Train Loss: 0.0020333638122946316 | Val Loss: 0.002120619838535164 | Val Padded_cmAP : 0.817208803833972


100%|██████████| 154/154 [03:48<00:00,  1.48s/it]
100%|██████████| 3381/3381 [02:19<00:00, 24.25it/s]


Epoch: 30 | Train Loss: 0.0019969964012890667 | Val Loss: 0.002145460947286099 | Val Padded_cmAP : 0.8214279783136544
# Loading best model
# Infering OOF


100%|██████████| 3381/3381 [02:18<00:00, 24.36it/s]
  self[col] = igetitem(value, i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value



>>> FOLD 1 Padded_cmAP = 0.821
# WandB


wandb : 100%|██████████| 1082/1082 [01:07<00:00, 16.04it/s]


VBox(children=(Label(value='3107.323 MB of 3107.323 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.…

0,1
cmAP,▁
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▁▂▄▅▇██████▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁
num_train,▁
num_valid,▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid_loss,▇█▇▆▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
valid_padded_cmAP,▁▁▁▁▁▁▂▂▂▃▃▄▅▅▅▆▆▆▇▇▇▇▇▇██████

0,1
cmAP,0.82143
epoch,30.0
loss,0.00193
lr,0.0001
num_train,19627.0
num_valid,3381.0
train_loss,0.002
valid_loss,0.00215
valid_padded_cmAP,0.82143


#########################
#### Training
#### Fold: 2 | Image Size: (224, 313) | Model: tf_efficientnet_b0_ns | Batch Size: 128 | Scheduler: cos
#### Num Train: 19,629 | Num Valid: 3,382


100%|██████████| 154/154 [06:30<00:00,  2.54s/it]
100%|██████████| 3382/3382 [03:27<00:00, 16.28it/s]


Epoch: 1 | Train Loss: 0.20018198270374885 | Val Loss: 0.007151501147915834 | Val Padded_cmAP : 0.4794772994799738


  1%|          | 1/154 [00:09<25:11,  9.88s/it]

# Performance

In [None]:
def get_id(row):
    row['filename'] = row['filepath'].split('/',5)[-1]
    return row

In [None]:
from sklearn.metrics import average_precision_score

# OOF Data
y_pred = np.concatenate(oof_pred)
y_true = np.concatenate(oof_true)
ids = np.concatenate(oof_ids)
folds = np.concatenate(oof_folds)

# Overall cmAP
cmAP = com.padded_cmap(com.one_hot_encode(y_true), y_pred)

# Overall AUC in PR curve
# y_true_one_hot = torch.nn.functional.one_hot(torch.tensor(y_true))
# y_pred_tensor = torch.tensor(y_pred)
#auc = average_precision_score(y_true_one_hot.numpy(), y_pred_tensor.numpy(), average='macro')

print('>>> Overall cmAP: ', cmAP)
#print('>>> Overall AUC(PR): ', auc)

## save oof

In [None]:
# # Save OOF data to disk
# columns = ['filepath', 'fold', 'true', 'pred', *CFG.class_names]
# df_oof = pd.DataFrame(np.concatenate([ids[:,None], folds, y_true,
#                                       np.argmax(y_pred,axis=1)[:,None], y_pred], axis=1), columns=columns)
# df_oof['class_name'] = df_oof.true.map(CFG.label2name)
# df_oof['miss'] = df_oof.true!=df_oof.pred
# tqdm.pandas(desc='id ')
# df_oof = df_oof.progress_apply(get_id,axis=1)
# df_oof.to_csv('oof.csv',index=False)
# display(df_oof.head(2))

# Error Analysis

In [None]:
# print('Miss Total:')
# display(df_oof.query("miss==True").shape[0])

# print()
# print('Miss Distribution Top10:')
# display(df_oof.query("miss==True").class_name.value_counts()[:10])