In [1]:
import sys
from collections import Counter

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

import librosa
import librosa.display
import IPython.display as display

sys.path.append('../src')
import const
from metrics import row_wise_micro_averaged_f1_score, micro_f1_similarity

pd.options.display.max_rows=120

In [2]:
def fill_dropped(dropped_array, drop_idx):
    filled_array = np.zeros(
        (len(dropped_array) + len(drop_idx), dropped_array.shape[1])
    )
    idx_array = np.arange(len(filled_array))
    use_idx = np.delete(idx_array, drop_idx)
    filled_array[use_idx, :] = dropped_array
    return filled_array

In [3]:
def extract_label(preds, th=0.5):
    events = preds >= th
    nocall_col = np.zeros((len(preds), 1)).astype(bool)
    nocall_col[events.sum(1) == 0] = True
    events = np.concatenate([events, nocall_col], axis=1)
    
    predict_labels = []
    for i in range(len(events)):
        event = events[i, :]
        labels = np.argwhere(event).reshape(-1).tolist()
        
        row_labels = []
        for label in labels:
            row_labels.append(const.INV_BIRD_CODE[label])
        predict_labels.append(" ".join(row_labels))

    return predict_labels

In [4]:
def plot_oof(idx, th=0.27):
    plt.figure(figsize=(16, 6))
    plt.subplot(2, 1, 1)
    plt.plot(oof[idx, :])
    plt.plot([0, 397], [th, th], "--", c="orange", alpha=0.3)
    
    top5_bc = np.argsort(oof[idx, :])[-5:]
    
    for bc in range(len(oof[idx, :])):
        if bc in top5_bc:
            plt.text(bc, oof[idx, bc] - oof[idx, :].max() * 0.05, s=const.INV_BIRD_CODE[bc])
    plt.ylim([0, 1.0])

    
    plt.subplot(2, 1, 2)
    plt.plot(post_oof[idx, :])
    plt.plot([0, 397], [th, th], "--", c="orange", alpha=0.3)

    for bc in range(len(post_oof[idx, :])):
        if bc in top5_bc:
            plt.text(bc, post_oof[idx, bc] - post_oof[idx, :].max() * 0.05, s=const.INV_BIRD_CODE[bc])
    plt.ylim([0, 1.0])
    
    
def plot_oof_mean(df):
    idx = df.index.values
    mean_ = oof[idx, :].mean(0) 
    quantile = np.quantile(mean_, 0.99)
    
    post_mean_ = post_oof[idx, :].mean(0)
    post_quantile = np.quantile(post_mean_, 0.99)

    plt.figure(figsize=(16, 5))
    plt.subplot(2, 1, 1)
    plt.plot(mean_)
    plt.plot([0, 397], [quantile, quantile], "--", c="orange", alpha=0.3)

    top5_bc = np.argsort(mean_)[-5:]
    top5_birds = []
    for bc in range(len(mean_)):
        if bc in top5_bc:
            plt.text(bc, mean_[bc] - mean_.max() * 0.05, s=const.INV_BIRD_CODE[bc])
            top5_birds.append(const.INV_BIRD_CODE[bc])
            
    plt.subplot(2, 1, 2)
    plt.plot(post_mean_)
    plt.plot([0, 397], [post_quantile, post_quantile], "--", c="orange", alpha=0.3)

    top5_bc = np.argsort(post_mean_)[-5:]
    top5_birds = []
    for bc in range(len(post_mean_)):
        if bc in top5_bc:
            plt.text(bc, post_mean_[bc] - post_mean_.max() * 0.05, s=const.INV_BIRD_CODE[bc])
            top5_birds.append(const.INV_BIRD_CODE[bc])

    plt.show()

In [5]:
def post_process_v2_plus(preds, df, c=2, q=0.99, w=1.5, min_th=0.1, max_th=0.2):
    post_preds = preds.copy()
    
    gp = df.groupby("audio_id")
    for audio_id, audio_df in gp:
        idxs = audio_df.index.values
        
        audio_preds = preds[idxs, :]
        audio_preds_mean = np.mean(audio_preds, axis=0)
        audio_preds_mean_ = audio_preds_mean / audio_preds_mean.max()
        quantile = np.quantile(audio_preds_mean_, q)

        for bc in np.argsort(audio_preds_mean_)[::-1]:
            v = audio_preds_mean_[bc]
            if v >= (quantile * c):
                for idx in idxs:
                    if (np.argmax(post_preds[idx, :]) == bc and np.max(post_preds[idx, :]) >= 0.1) or post_preds[idx, bc] >= max_th:
                        post_preds[idx, bc] *= w
            else:
                break

    return post_preds


def shift(df, pred_labels, freq):
    shift_labels = [None] * len(df)

    gp = df.groupby("audio_id")
    for audio_id, audio_df in gp:
        idxs = audio_df.index.values
        audio_shift_labels = np.roll(pred_labels[idxs[0]: idxs[-1] + 1], freq)
        
        if freq > 0:
            audio_shift_labels[:freq] = "None"
        elif freq < 0:
            audio_shift_labels[freq:] = "None"
            
        shift_labels[idxs[0]: idxs[-1] + 1] = audio_shift_labels

    return shift_labels


def intersect(labels_list1, labels_list2):
    intersect_list = []
    for idx in range(len(labels_list1)):
        lables1 = labels_list1[idx].split(" ")
        lables2 = labels_list2[idx].split(" ")
        
        intersect = np.intersect1d(lables1, lables2)
        if len(intersect) == 0:
            intersect = ["nocall"]
        intersect_list.append(" ".join(list(intersect)))
        
    return intersect_list


def add_label(labels_list1, labels_list2):
    added_labels_list = []
    for labels1, labels2 in zip(labels_list1, labels_list2):

        if labels1 == "nocall":
            if labels2 == "nocall":
                added_labels_list.append("nocall")
            else:
                added_labels_list.append(labels2)
        
        else:
            if labels2 == "nocall":
                added_labels_list.append(labels1)
            else:
                added_labels = list(set((labels1 + " " + labels2).split(" ")))
                added_labels_list.append(" ".join(added_labels))
    
    return added_labels_list


def post_process_v10(preds, pred_labels, df, window_size=1, c=2.0, min_th=0.1, max_th=0.5):
    first_labels = []
    for idx in df.index:
        row_prob = preds[idx, :]
        second_prob = np.sort(row_prob)[-2]

        first_label_bc = np.argmax(row_prob)
        v = row_prob[first_label_bc]
        if (v >= (second_prob * c) and v >= min_th) or v >= max_th:
            first_labels.append(const.INV_BIRD_CODE[first_label_bc])
        else:
            first_labels.append("nocall")

    final_labels_list = pred_labels.copy()
    for w in range(window_size):
        for freq in [w + 1, -(w + 1)]:
            shift_labels = shift(df, pred_labels, freq)
            shift_intersect_labels = intersect(first_labels, shift_labels)
            final_labels_list = add_label(final_labels_list, shift_intersect_labels)
        
    return final_labels_list


def post_process_v14(post_labels, avg_labels, df):
    final_labels = post_labels.copy()
    
    gp = df.groupby("audio_id")
    
    for audio_id, audio_df in gp:
        idxs = audio_df.index.values
        
        multi_label_counter = 0
        multi_label_idxs = []
        for idx in idxs:
            if len(avg_labels[idx].split(" ")) >= 2:
                multi_label_idxs.append(idx)
                multi_label_counter += 1
                
        if multi_label_counter >= 10:
            for ml_idx in multi_label_idxs:
                row_post_labels = post_labels[ml_idx].split(" ")
                row_avg_labels = avg_labels[ml_idx].split(" ")

                if len(row_post_labels) == 1 and  (row_post_labels[0] in row_avg_labels or row_post_labels[0] == "nocall"):
                    final_labels[ml_idx] = avg_labels[ml_idx]
            
    return final_labels


def post_process_v12(preds, df, window_size=5, c=2, q=0.99, w=1.5, th=0.28):
    post_preds = preds.copy()
    
    gp = df.groupby("audio_id")
    for audio_id, audio_df in gp:
        idxs = audio_df.index.values
        
        for i, idx in enumerate(idxs):
            if i < window_size:
                start = idxs[0]
                end = min(idx + window_size + 1, idxs[-1] + 1)
            else:
                start = idx - window_size
                end = min(idx + window_size + 1, idxs[-1] + 1)

            marginal_preds = preds[start: end, :]
            marginal_preds_mean = np.mean(marginal_preds, axis=0)
            quantile = np.quantile(marginal_preds_mean, q)
            
            for bc in np.argsort(marginal_preds_mean)[::-1]:
                v = marginal_preds_mean[bc]
                if (v >= max(0.1, (quantile * c)) and np.argmax(post_preds[idx, :]) == bc) or post_preds[idx, bc] >= th:
                    post_preds[idx, bc] *= w
                else:
                    break

    return post_preds


def post_process_v15(preds, max_sample=30, c=2.0, q=0.99, p=0.3):
    post_preds = preds.copy()

    for bc in range(preds.shape[1]):
        bird = const.INV_BIRD_CODE[bc]
        sample_num = const.BIRD_SAMPLE_NUM[bird]
        
        if sample_num <= max_sample:
            prob = preds[:, bc]
            quantile = np.quantile(prob, q) * c

            post_preds[:, bc] = np.where(prob >= quantile, p, prob)
    
    return post_preds


def voting(oof_labels_list, vote_lim):
    final_oof_labels = []
    
    for idx in range(len(oof_labels_list[0])):
        birds = []

        for labels in oof_labels_list:
            birds.extend(labels[idx].split(" "))

        birds = [x for x in birds if "nocall" != x and "" != x]
        count_birds = Counter(birds)

        final_birds = []
        for key, value in count_birds.items():
            if value >= vote_lim:
                final_birds.append(key)

        if len(final_birds) == 0:
            final_birds.append("nocall")

        final_oof_labels.append(" ".join(final_birds))

    return final_oof_labels

In [6]:
valid_df = pd.read_csv('../data/input/train_soundscape_labels.csv')
use_idx = valid_df[~valid_df["audio_id"].isin([7019, 7954, 14473])].index
drop_idx = valid_df[valid_df["audio_id"].isin([7019, 7954, 14473])].index
valid_df = valid_df.iloc[use_idx].reset_index(drop=True)

val_y_labels = valid_df["birds"].tolist()

In [7]:
oof163 = np.load("../logs/exp_163_20210529161018_0.648/oof.npy")
oof163 = fill_dropped(oof163, drop_idx)
oof164 = np.load("../logs/exp_164_20210529204320_0.654/oof.npy")
oof164 = fill_dropped(oof164, drop_idx)
oof165 = np.load("../logs/exp_165_20210530010704_0.654/oof.npy")
oof165 = fill_dropped(oof165, drop_idx)
oof166 = np.load("../logs/exp_166_20210530055519_0.659/oof.npy")
oof166 = fill_dropped(oof166, drop_idx)
oof167 = np.load("../logs/exp_167_20210529114938_0.638/oof.npy")
oof167 = fill_dropped(oof167, drop_idx)
oof168 = np.load("../logs/exp_168_20210529115009_0.645/oof.npy")
oof168 = fill_dropped(oof168, drop_idx)
oof169 = np.load("../logs/exp_169_20210529115052_0.658/oof.npy")
oof169 = fill_dropped(oof169, drop_idx)
oof170 = np.load("../logs/exp_170_20210529115126_0.660/oof.npy")
oof170 = fill_dropped(oof170, drop_idx)
oof171 = np.load("../logs/exp_171_20210530010436_0.634/oof.npy")
oof171 = fill_dropped(oof171, drop_idx)
oof172 = np.load("../logs/exp_172_20210530010440_0.644/oof.npy")
oof172 = fill_dropped(oof172, drop_idx)
oof173 = np.load("../logs/exp_173_20210530010446_0.637/oof.npy")
oof173 = fill_dropped(oof173, drop_idx)
oof174 = np.load("../logs/exp_174_20210530010451_0.647/oof.npy")
oof174 = fill_dropped(oof174, drop_idx)
oof175 = np.load("../logs/exp_175_20210530105944_0.666/oof.npy")
oof175 = fill_dropped(oof175, drop_idx)
oof177 = np.load("../logs/exp_177_20210530174908_0.642/oof.npy")
oof177 = fill_dropped(oof177, drop_idx)
oof178 = np.load("../logs/exp_178_20210530130533_0.647/oof.npy")
oof178 = fill_dropped(oof178, drop_idx)
oof179 = np.load("../logs/exp_179_20210530130536_0.654/oof.npy")
oof179 = fill_dropped(oof179, drop_idx)
oof180 = np.load("../logs/exp_180_20210530130541_0.659/oof.npy")
oof180 = fill_dropped(oof180, drop_idx)
oof181 = np.load("../logs/exp_181_20210530130547_0.651/oof.npy")
oof181 = fill_dropped(oof181, drop_idx)
oof182 = np.load("../logs/exp_182_20210531074102_0.668/oof.npy")
oof182 = fill_dropped(oof182, drop_idx)
oof184 = np.load("../logs/exp_184_20210531170101_0.664/oof.npy")
oof184 = fill_dropped(oof184, drop_idx)
oof185 = np.load("../logs/exp_185_20210531221305_0.633/oof.npy")
oof185 = fill_dropped(oof185, drop_idx)
oof186 = np.load("../logs/exp_186_20210601030001_0.656/oof.npy")
oof186 = fill_dropped(oof186, drop_idx)



# nocall classification
# oof59 = np.load("../logs/exp_059_20210505100513_0.666/oof.npy")
# oof70 = np.load("../logs/exp_070_20210508193023_0.882/oof.npy")
# oof79 = np.load("../logs/exp_079_20210511132329_0.874/oof.npy")
# oof83 = np.load("../logs/exp_083_20210512081958_0.827/oof.npy")
oof99 = np.load("../logs/exp_099_20210515202257_0.859/oof.npy")

In [75]:
oof_list = [
    oof163,
    oof164,
    oof165,
    oof166,
    oof167,
    oof168,
    oof169,
    oof170,
    oof171,
#     oof172,
#     oof173,
    oof174,
]

oof_list2 = [
    oof184,
    oof185,
    oof186
]

concat_oof = []
for oof_ in oof_list:
    concat_oof.append(oof_[np.newaxis, :, :])
concat_oof = np.concatenate(concat_oof)

concat_oof2 = []
for oof_ in oof_list2:
    concat_oof2.append(oof_[np.newaxis, :, :])
concat_oof2 = np.concatenate(concat_oof2)

## voting候補1
- Main modelsのSimple AVG

In [76]:
oof1 = np.mean(concat_oof, axis=0)[use_idx, :]
oof_labels1 = extract_label(oof1, th=0.27)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels1)

0.7239869281045727

## voting候補2
- Main modelsのPostProcessV2,V10, V15

In [77]:
post_oof2 = post_process_v15(oof1, max_sample=30, c=2.0, q=0.99, p=0.3)
post_oof2_ = post_process_v2_plus(post_oof2, valid_df, c=2.5, q=0.99, w=2.0, min_th=0.10, max_th=0.20)
oof_labels2 = extract_label(post_oof2_, th=0.61)
oof_labels2= post_process_v10(post_oof2_, oof_labels2, valid_df, window_size=5, c=1.2, min_th=0.15, max_th=0.4)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels2)

0.7661111111111084

## voting候補3
- efficientnetv2のSimple AVG

In [78]:
oof3 = np.mean(concat_oof2, axis=0)[use_idx, :]
oof_labels3 = extract_label(oof3, th=0.29)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels3)

0.6985294117647035

## voting候補4 

In [79]:
post_oof4 = post_process_v12(oof1, valid_df, window_size=1, c=2, q=0.99, w=1.5, th=0.28)
oof_labels4 = extract_label(post_oof4, th=0.56)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels4)

0.7033006535947688

## voting候補5

In [80]:
oof_labels5 = extract_label(post_oof2, th=0.27)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels5)

0.7182679738562067

## voting候補6

In [81]:
oof_labels6 = extract_label(post_oof2_, th=0.61)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels6)

0.7195588235294093

## voting候補7
- V15 -> v12

In [82]:
post_oof7 = post_process_v15(oof1, max_sample=30, c=1.2, q=0.99, p=0.5)
post_oof7 = post_process_v12(post_oof7, valid_df, window_size=1, c=1.5, q=0.99, w=1.5, th=0.20)
oof_labels7 = extract_label(post_oof7, th=0.40)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels7)

0.7100653594771217

## voting候補8
- Optuna

In [109]:
post_oof8 = post_process_v15(
    oof1,
    max_sample=30,
    c=3.2790671924149883,
    q=0.9529061959822951,
    p=0.06608443257219929,
)
post_oof8_ = post_process_v2_plus(
    post_oof8,
    valid_df,
    c=8.935189493308814,
    q=0.977840897296335,
    w=7.115016451742793,
    min_th=0.8108624438465178,
    max_th=0.01930552097040787,
)
oof_labels8 = extract_label(post_oof8_, th=0.37)
oof_labels8= post_process_v10(
    post_oof8_,
    oof_labels8,
    valid_df,
    window_size=4,
    c=2.780462776847464,
    min_th=0.40649648362678825,
    max_th=0.11942033308662126,
)
row_wise_micro_averaged_f1_score(val_y_labels, oof_labels8)

0.7806862745098008

## voting

In [110]:
voting_list = [
    oof_labels1,
    oof_labels2,
    oof_labels3,
    oof_labels4,
    oof_labels5,
    oof_labels6,
    oof_labels7,
    oof_labels8,
]

voting_labels = voting(voting_list, vote_lim=3)

 ## Custom voting

In [111]:
custom_voting_labels = oof_labels2.copy()
sub_labels = [ol for i, ol in enumerate(voting_list) if i != 1]

for idx in range(len(valid_df)):
    custom_voting_label = custom_voting_labels[idx]
    sub_label = [l[idx] for l in sub_labels]
    
    counter = dict(Counter(sub_label))
    
    c1 = len(counter) == 1
#     c2 = max(list(counter.values())) >= len(sub_labels) - 1
    c3 = custom_voting_label != sub_label[0]
    if  c1 and c3:
        custom_voting_labels[idx] = sub_label[0]

In [112]:
valid_df["main_avg"] = oof_labels1
valid_df["main_v2_10_15"] = oof_labels2
valid_df["ev2_avg"] = oof_labels3
valid_df["main_v12"] = oof_labels4
valid_df["main_v15"] = oof_labels5
valid_df["main_v2_10"] = oof_labels6
valid_df["main_v15_12"] = oof_labels7
valid_df["main_optuna"] = oof_labels8

valid_df["voting"] = voting_labels
valid_df["custom_voting"] = custom_voting_labels

In [113]:
def calc_f1(colname, df=valid_df):
    f1_array = np.zeros(len(df))
    for idx in df.index:
        true_label = df.loc[idx, "birds"]
        oof_label = df.loc[idx, colname]
        f1_array[idx] = micro_f1_similarity(true_label, oof_label)
    
    return f1_array

valid_df["main_avg_f1"] = calc_f1("main_avg")
valid_df["main_v2_10_15_f1"] = calc_f1("main_v2_10_15")
valid_df["ev2_avg_f1"] = calc_f1("ev2_avg")
valid_df["main_v12_f1"] = calc_f1("main_v12")
valid_df["main_v15_f1"] = calc_f1("main_v15")
valid_df["main_v2_10_f1"] = calc_f1("main_v2_10")
valid_df["main_v15_12_f1"] = calc_f1("main_v15_12")
valid_df["main_optuna_f1"] = calc_f1("main_optuna")

valid_df["voting_f1"] = calc_f1("voting")
valid_df["custom_voting_f1"] = calc_f1("custom_voting")

In [114]:
for col in valid_df.columns:
    if "voting_f1" in col:
        print(f"{col}: {valid_df[col].mean():.4f}")

voting_f1: 0.7308
custom_voting_f1: 0.7666


In [115]:
low_bird_sample

['crfpar',
 'flrtan1',
 'grhcha1',
 'heptan',
 'hofwoo1',
 'runwre1',
 'stvhum2',
 'wegspa1',
 'whcpar']

In [116]:
oof_labels2_ = oof_labels2.copy()

low_bird_sample = [b for b, n in const.BIRD_SAMPLE_NUM.items() if n <= 30]

for idx, (l1, l2) in enumerate(zip(oof_labels2_, oof_labels8)):
    l1_ = l1.split(" ")
    l2_ = l2.split(" ")
    
    for lb in l2_:
        if lb in low_bird_sample:
            print(lb)
            if l1_[0] == "nocall":
                oof_labels2_[idx] = lb
            else:
                oof_labels2_[idx] += f" {lb}"

In [118]:
oof_labels8[240: 360]

['rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'nocall',
 'rucwar',
 'rucwar',
 'rucwar',
 'grekis rucwar',
 'rucwar',
 'rucwar',
 'grekis',
 'rucwar',
 'rucwar',
 'grekis',
 'grekis',
 'rucwar',
 'rucwar',
 'rucwar',
 'nocall',
 'clcrob',
 'nocall',
 'rucwar',
 'whiwre1',
 'whiwre1',
 'nocall',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar spvear1',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'melbla1',
 'melbla1',
 'rucwar',
 'rucwar',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'nocall',
 'rucwar',
 'nocall',
 'nocall',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 'rucwar',
 '

In [22]:
usecols = [
    "row_id", "seconds", "birds",

    "main_avg", "main_avg_f1",
    "main_v2_10_15", "main_v2_10_15_f1",
#     "ev2_avg", "ev2_avg_f1",
#     "main_v12", "main_v12_f1",
#     "main_v15", "main_v15_f1",
#     "main_v2_10", "main_v2_10_f1",
#     "main_v15_12", "main_v15_12_f1",
    "main_optuna", "main_optuna_f1",

    "voting", "voting_f1",
    "custom_voting", "custom_voting_f1"
]

subset_cols = [col for col in usecols if "f1" in col ]

In [23]:
valid_df.groupby("audio_id")["main_v2_10_15_f1", "main_optuna_f1"].mean()

  valid_df.groupby("audio_id")["main_v2_10_15_f1", "main_optuna_f1"].mean()


Unnamed: 0_level_0,main_v2_10_15_f1,main_optuna_f1
audio_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2782,0.61,0.719444
7843,0.966667,0.966667
10534,0.7,0.713889
11254,0.9,0.916667
18003,0.794444,0.819444
20152,0.975,0.958333
21767,0.397222,0.472222
26709,0.516667,0.558333
26746,0.719444,0.677778
28933,0.519444,0.551389


## audio_id: 2782

In [119]:
idx = valid_df[valid_df["audio_id"] == 2782].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
960,2782_SSW_5,5,bkcchi grycat reevir1,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5
961,2782_SSW_10,10,eawpew grycat reevir1,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5
962,2782_SSW_15,15,bkcchi eawpew grycat reevir1,nocall,0.0,grycat,0.4,grycat,0.4,nocall,0.0,grycat,0.4
963,2782_SSW_20,20,eawpew grycat reevir1,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5
964,2782_SSW_25,25,eawpew grycat reevir1,grycat,0.5,grycat,0.5,eawpew,0.5,grycat,0.5,grycat,0.5
965,2782_SSW_30,30,bkcchi reevir1,reevir1,0.666667,reevir1,0.666667,reevir1,0.666667,reevir1,0.666667,reevir1,0.666667
966,2782_SSW_35,35,eawpew grycat reevir1,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5,grycat,0.5
967,2782_SSW_40,40,bkcchi eawpew grycat reevir1,grycat,0.4,grycat,0.4,grycat,0.4,grycat,0.4,grycat,0.4
968,2782_SSW_45,45,eawpew reevir1,eawpew,0.666667,eawpew,0.666667,eawpew reevir1,1.0,eawpew,0.666667,eawpew,0.666667
969,2782_SSW_50,50,bkcchi eawpew reevir1,nocall,0.0,reevir1,0.5,eawpew reevir1,0.8,nocall,0.0,reevir1,0.5


In [120]:
# plot_oof(973)

In [121]:
# plot_oof_mean(df)

## audio_id: 7843

In [122]:
idx = valid_df[valid_df["audio_id"] == 7843].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1080,7843_SSW_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1081,7843_SSW_10,10,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1082,7843_SSW_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1083,7843_SSW_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1084,7843_SSW_25,25,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1085,7843_SSW_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1086,7843_SSW_35,35,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1087,7843_SSW_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1088,7843_SSW_45,45,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1089,7843_SSW_50,50,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0


In [123]:
# plot_oof(1185)

In [124]:
# plot_oof_mean(df)

## audio_id: 10534

In [125]:
idx = valid_df[valid_df["audio_id"] == 10534].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1200,10534_SSW_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1201,10534_SSW_10,10,swaspa,swaspa,1.0,swaspa,1.0,swaspa,1.0,swaspa,1.0,swaspa,1.0
1202,10534_SSW_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1203,10534_SSW_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1204,10534_SSW_25,25,swaspa,swaspa,1.0,swaspa,1.0,swaspa,1.0,swaspa,1.0,swaspa,1.0
1205,10534_SSW_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1206,10534_SSW_35,35,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1207,10534_SSW_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1208,10534_SSW_45,45,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1209,10534_SSW_50,50,blujay,nocall,0.0,nocall,0.0,blujay,1.0,nocall,0.0,nocall,0.0


In [126]:
# plot_oof(1288)

In [127]:
# plot_oof_mean(df)

## audio_id: 11254

In [128]:
idx = valid_df[valid_df["audio_id"] == 11254].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
0,11254_COR_5,5,rubwre1,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0
1,11254_COR_10,10,nocall,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0
2,11254_COR_15,15,rubwre1,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0
3,11254_COR_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
4,11254_COR_25,25,rubwre1,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0
5,11254_COR_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
6,11254_COR_35,35,nocall,nocall,1.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0,wbwwre1,0.0
7,11254_COR_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
8,11254_COR_45,45,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
9,11254_COR_50,50,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0


In [129]:
# plot_oof(26)

In [130]:
# plot_oof_mean(df)

## audio_id: 18003

In [131]:
idx = valid_df[valid_df["audio_id"] == 18003].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
120,18003_COR_5,5,rucwar,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
121,18003_COR_10,10,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
122,18003_COR_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
123,18003_COR_20,20,rucwar,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
124,18003_COR_25,25,rucwar,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
125,18003_COR_30,30,rucwar,nocall,0.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
126,18003_COR_35,35,rucwar,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
127,18003_COR_40,40,rucwar,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
128,18003_COR_45,45,rucwar,nocall,0.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
129,18003_COR_50,50,rucwar,bucmot2,0.0,nocall,0.0,rucwar,1.0,bucmot2,0.0,nocall,0.0


In [132]:
# plot_oof(237)

In [133]:
# plot_oof_mean(df)

## audio_id: 20152

In [134]:
idx = valid_df[valid_df["audio_id"] == 20152].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1320,20152_SSW_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1321,20152_SSW_10,10,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1322,20152_SSW_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1323,20152_SSW_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1324,20152_SSW_25,25,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1325,20152_SSW_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1326,20152_SSW_35,35,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1327,20152_SSW_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1328,20152_SSW_45,45,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1329,20152_SSW_50,50,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0


In [135]:
# plot_oof(1427)

In [136]:
# plot_oof_mean(df)

## audio_id: 21767

In [137]:
idx = valid_df[valid_df["audio_id"] == 21767].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
240,21767_COR_5,5,grekis rucwar,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667
241,21767_COR_10,10,grekis rucwar,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667
242,21767_COR_15,15,rucwar,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
243,21767_COR_20,20,rucwar,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0,rucwar,1.0
244,21767_COR_25,25,rucwar,nocall,0.0,rucwar,1.0,rucwar,1.0,nocall,0.0,rucwar,1.0
245,21767_COR_30,30,grekis rucwar,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667,rucwar,0.666667
246,21767_COR_35,35,rucwar,nocall,0.0,grekis,0.0,rucwar,1.0,nocall,0.0,grekis,0.0
247,21767_COR_40,40,grekis rucwar,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
248,21767_COR_45,45,rucwar,nocall,0.0,rucwar,1.0,rucwar,1.0,nocall,0.0,rucwar,1.0
249,21767_COR_50,50,nocall,rucwar,0.0,rucwar,0.0,rucwar,0.0,rucwar,0.0,rucwar,0.0


In [43]:
# plot_oof(350)

In [44]:
# plot_oof_mean(df)

## audio_id: 26709

In [45]:
idx = valid_df[valid_df["audio_id"] == 26709].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1440,26709_SSW_5,5,amegfi,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1441,26709_SSW_10,10,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1442,26709_SSW_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1443,26709_SSW_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1444,26709_SSW_25,25,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1445,26709_SSW_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1446,26709_SSW_35,35,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1447,26709_SSW_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1448,26709_SSW_45,45,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1449,26709_SSW_50,50,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0


In [46]:
# plot_oof(1492)

In [47]:
# plot_oof_mean(df)

## audio_id: 26746

In [48]:
idx = valid_df[valid_df["audio_id"] == 26746].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
360,26746_COR_5,5,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
361,26746_COR_10,10,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
362,26746_COR_15,15,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
363,26746_COR_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
364,26746_COR_25,25,nocall,nocall,1.0,bobfly1,0.0,bobfly1,0.0,bobfly1,0.0,bobfly1,0.0
365,26746_COR_30,30,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
366,26746_COR_35,35,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
367,26746_COR_40,40,bobfly1 rucwar,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
368,26746_COR_45,45,rucwar,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
369,26746_COR_50,50,rucwar,rucwar,1.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0


In [49]:
# plot_oof(386)

In [50]:
# plot_oof_mean(df)

## audio_id: 28933

In [51]:
idx = valid_df[valid_df["audio_id"] == 28933].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1560,28933_SSW_5,5,sonspa,sonspa,1.0,nocall,0.0,sonspa,1.0,sonspa,1.0,nocall,0.0
1561,28933_SSW_10,10,rewbla,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1562,28933_SSW_15,15,sonspa,nocall,0.0,nocall,0.0,sonspa,1.0,nocall,0.0,nocall,0.0
1563,28933_SSW_20,20,sonspa,nocall,0.0,nocall,0.0,sonspa,1.0,nocall,0.0,nocall,0.0
1564,28933_SSW_25,25,sonspa,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1565,28933_SSW_30,30,sonspa,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1566,28933_SSW_35,35,sonspa,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1567,28933_SSW_40,40,sonspa,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1568,28933_SSW_45,45,sonspa,nocall,0.0,sonspa,1.0,sonspa,1.0,nocall,0.0,sonspa,1.0
1569,28933_SSW_50,50,sonspa,sonspa,1.0,sonspa,1.0,sonspa,1.0,sonspa,1.0,sonspa,1.0


In [52]:
# plot_oof(1563)

In [53]:
# plot_oof_mean(df)

## audio_id: 31928

In [54]:
idx = valid_df[valid_df["audio_id"] == 31928].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
480,31928_COR_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
481,31928_COR_10,10,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
482,31928_COR_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
483,31928_COR_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
484,31928_COR_25,25,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
485,31928_COR_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
486,31928_COR_35,35,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
487,31928_COR_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
488,31928_COR_45,45,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
489,31928_COR_50,50,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0


In [55]:
# plot_oof_mean(df)

## audio_id: 42907

In [56]:
idx = valid_df[valid_df["audio_id"] == 42907].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1680,42907_SSW_5,5,reevir1,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0
1681,42907_SSW_10,10,reevir1,nocall,0.0,reevir1,1.0,reevir1,1.0,nocall,0.0,reevir1,1.0
1682,42907_SSW_15,15,reevir1,nocall,0.0,reevir1,1.0,reevir1,1.0,nocall,0.0,reevir1,1.0
1683,42907_SSW_20,20,reevir1,nocall,0.0,reevir1,1.0,reevir1,1.0,nocall,0.0,reevir1,1.0
1684,42907_SSW_25,25,reevir1,nocall,0.0,reevir1,1.0,reevir1,1.0,nocall,0.0,reevir1,1.0
1685,42907_SSW_30,30,reevir1,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0
1686,42907_SSW_35,35,reevir1,nocall,0.0,reevir1,1.0,reevir1,1.0,nocall,0.0,reevir1,1.0
1687,42907_SSW_40,40,reevir1,nocall,0.0,reevir1,1.0,reevir1,1.0,nocall,0.0,reevir1,1.0
1688,42907_SSW_45,45,reevir1,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0
1689,42907_SSW_50,50,reevir1,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0,reevir1,1.0


In [57]:
# plot_oof_mean(df)

## audio_id: 44957

In [58]:
idx = valid_df[valid_df["audio_id"] == 44957].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
600,44957_COR_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
601,44957_COR_10,10,bobfly1,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
602,44957_COR_15,15,nocall,bobfly1,0.0,bobfly1,0.0,bobfly1,0.0,bobfly1,0.0,bobfly1,0.0
603,44957_COR_20,20,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
604,44957_COR_25,25,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
605,44957_COR_30,30,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
606,44957_COR_35,35,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
607,44957_COR_40,40,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
608,44957_COR_45,45,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0
609,44957_COR_50,50,bobfly1,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0,bobfly1,1.0


In [59]:
# plot_oof_mean(df)

## audio_id: 50878

In [60]:
idx = valid_df[valid_df["audio_id"] == 50878].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
720,50878_COR_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
721,50878_COR_10,10,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
722,50878_COR_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,grhcha1,0.0,nocall,1.0
723,50878_COR_20,20,nocall,grhcha1,0.0,nocall,1.0,nocall,1.0,grhcha1,0.0,nocall,1.0
724,50878_COR_25,25,nocall,grhcha1,0.0,nocall,1.0,nocall,1.0,grhcha1,0.0,nocall,1.0
725,50878_COR_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
726,50878_COR_35,35,nocall,grhcha1,0.0,nocall,1.0,nocall,1.0,grhcha1,0.0,nocall,1.0
727,50878_COR_40,40,nocall,grhcha1,0.0,nocall,1.0,nocall,1.0,grhcha1,0.0,nocall,1.0
728,50878_COR_45,45,nocall,grhcha1,0.0,nocall,1.0,nocall,1.0,grhcha1,0.0,nocall,1.0
729,50878_COR_50,50,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0


In [61]:
# plot_oof_mean(df)

## audio_id: 51010

In [62]:
idx = valid_df[valid_df["audio_id"] == 51010].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1800,51010_SSW_5,5,grycat norfli,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1801,51010_SSW_10,10,grycat norfli,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1802,51010_SSW_15,15,woothr,amerob,0.0,nocall,0.0,nocall,0.0,amerob,0.0,nocall,0.0
1803,51010_SSW_20,20,norfli,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1804,51010_SSW_25,25,norfli,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1805,51010_SSW_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1806,51010_SSW_35,35,rewbla,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1807,51010_SSW_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1808,51010_SSW_45,45,norfli,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
1809,51010_SSW_50,50,norfli,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0


In [63]:
# plot_oof_mean(df)

## audio_id: 54955

In [64]:
idx = valid_df[valid_df["audio_id"] == 54955].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
1920,54955_SSW_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1921,54955_SSW_10,10,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1922,54955_SSW_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1923,54955_SSW_20,20,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1924,54955_SSW_25,25,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1925,54955_SSW_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1926,54955_SSW_35,35,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1927,54955_SSW_40,40,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1928,54955_SSW_45,45,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
1929,54955_SSW_50,50,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0


In [65]:
# plot_oof_mean(df)

## audio_id: 57610

In [66]:
idx = valid_df[valid_df["audio_id"] == 57610].index.values
df = valid_df.loc[idx]
df[usecols].style.background_gradient(cmap='Reds_r', low=1.0, high=0.0, subset=subset_cols)

Unnamed: 0,row_id,seconds,birds,main_avg,main_avg_f1,main_v2_10_15,main_v2_10_15_f1,main_optuna,main_optuna_f1,voting,voting_f1,custom_voting,custom_voting_f1
840,57610_COR_5,5,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
841,57610_COR_10,10,nocall,orbspa1,0.0,nocall,1.0,orbspa1,0.0,orbspa1,0.0,nocall,1.0
842,57610_COR_15,15,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
843,57610_COR_20,20,rubwre1,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0
844,57610_COR_25,25,rubwre1,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0
845,57610_COR_30,30,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
846,57610_COR_35,35,nocall,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0,nocall,1.0
847,57610_COR_40,40,rubwre1,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0,nocall,0.0
848,57610_COR_45,45,rubwre1,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0,rubwre1,1.0
849,57610_COR_50,50,nocall,orbspa1,0.0,nocall,1.0,nocall,1.0,orbspa1,0.0,nocall,1.0


In [67]:
# plot_oof(861)

In [68]:
# plot_oof_mean(df)