In [1]:
import sys

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

sys.path.append('../src')
import const
from metrics import row_wise_micro_averaged_f1_score

In [2]:
train_df = pd.read_csv('../data/input/train_soundscape_labels.csv')
oof = np.load("../logs/exp_002_20210408160320_0.700/oof.npy")

In [3]:
events = oof >= 0.7
nocall_col = np.zeros((len(oof), 1)).astype(bool)
nocall_col[events.sum(1) == 0] = True
events = np.concatenate([events, nocall_col], axis=1)
oof_labels = list(
    map(
        lambda x: const.INV_BIRD_CODE[x],
        np.argwhere(events)[:, 1].reshape(-1).tolist(),
    )
)

In [4]:
train_df["oof_birds"] = oof_labels

In [5]:
train_df["nocall_birds"] = 0
train_df.loc[train_df[train_df["birds"] == "nocall"].index, "nocall_birds"] = 1

train_df["nocall_oof_birds"] = 0
train_df.loc[train_df[train_df["oof_birds"] == "nocall"].index, "nocall_oof_birds"] = 1

In [6]:
train_df["acc"] = train_df["birds"] == train_df["oof_birds"]
train_df.head()

Unnamed: 0,row_id,site,audio_id,seconds,birds,oof_birds,nocall_birds,nocall_oof_birds,acc
0,7019_COR_5,COR,7019,5,nocall,nocall,1,1,True
1,7019_COR_10,COR,7019,10,nocall,nocall,1,1,True
2,7019_COR_15,COR,7019,15,nocall,nocall,1,1,True
3,7019_COR_20,COR,7019,20,nocall,nocall,1,1,True
4,7019_COR_25,COR,7019,25,nocall,nocall,1,1,True


In [7]:
train_df["diff"] = (train_df["birds"].apply(lambda x: 1 if x == "nocall" else 0) - 
                                 train_df["oof_birds"].apply(lambda x: 1 if x == "nocall" else 0))

In [8]:
train_df["diff"].value_counts()

 0    1719
-1     679
 1       2
Name: diff, dtype: int64

つまり、何かしらの鳥の鳴き声が聞こえているのに `nocall`と予測しているパターンが多いのか

In [9]:
train_df[~train_df["acc"]]

Unnamed: 0,row_id,site,audio_id,seconds,birds,oof_birds,nocall_birds,nocall_oof_birds,acc,diff
240,11254_COR_5,COR,11254,5,rubwre1,nocall,0,1,False,-1
242,11254_COR_15,COR,11254,15,rubwre1,nocall,0,1,False,-1
267,11254_COR_140,COR,11254,140,obnthr1,nocall,0,1,False,-1
268,11254_COR_145,COR,11254,145,obnthr1,nocall,0,1,False,-1
269,11254_COR_150,COR,11254,150,obnthr1,nocall,0,1,False,-1
...,...,...,...,...,...,...,...,...,...,...
2390,54955_SSW_555,SSW,54955,555,chswar,nocall,0,1,False,-1
2391,54955_SSW_560,SSW,54955,560,grycat,nocall,0,1,False,-1
2394,54955_SSW_575,SSW,54955,575,chswar,nocall,0,1,False,-1
2396,54955_SSW_585,SSW,54955,585,grycat,nocall,0,1,False,-1


In [10]:
for audio_id in train_df["audio_id"].unique():
    print(f"audio_id: {audio_id}")
    print("-" * 50)
    print(train_df[train_df["audio_id"] == audio_id]['birds'].value_counts())
    print("\n")
    print(train_df[(~train_df["acc"]) & (train_df["audio_id"] == audio_id)]["diff"].value_counts())
    print("\n" * 2)

audio_id: 7019
--------------------------------------------------
nocall    120
Name: birds, dtype: int64


Series([], Name: diff, dtype: int64)



audio_id: 7954
--------------------------------------------------
nocall    120
Name: birds, dtype: int64


Series([], Name: diff, dtype: int64)



audio_id: 11254
--------------------------------------------------
nocall            105
obnthr1             6
rubwre1             5
brnjay              3
brnjay sthwoo1      1
Name: birds, dtype: int64


-1    12
Name: diff, dtype: int64



audio_id: 18003
--------------------------------------------------
rucwar                    84
nocall                    24
hofwoo1                    3
rucwar runwre1             3
hofwoo1 rucwar             2
grekis rucwar              1
runwre1                    1
hofwoo1 rucwar runwre1     1
rtlhum rucwar              1
Name: birds, dtype: int64


-1    72
 0     6
Name: diff, dtype: int64



audio_id: 21767
--------------------------------------------

In [11]:
def f(row_idx):
    preds = oof[row_idx, :]
    
    class_idx = np.argmax(preds)
    class_preds = np.max(preds)
    
    return const.INV_BIRD_CODE[class_idx], class_preds

In [15]:
f(2396)

('sltred', 0.0197440255433321)

In [13]:
train_df

Unnamed: 0,row_id,site,audio_id,seconds,birds,oof_birds,nocall_birds,nocall_oof_birds,acc,diff
0,7019_COR_5,COR,7019,5,nocall,nocall,1,1,True,0
1,7019_COR_10,COR,7019,10,nocall,nocall,1,1,True,0
2,7019_COR_15,COR,7019,15,nocall,nocall,1,1,True,0
3,7019_COR_20,COR,7019,20,nocall,nocall,1,1,True,0
4,7019_COR_25,COR,7019,25,nocall,nocall,1,1,True,0
...,...,...,...,...,...,...,...,...,...,...
2395,54955_SSW_580,SSW,54955,580,nocall,nocall,1,1,True,0
2396,54955_SSW_585,SSW,54955,585,grycat,nocall,0,1,False,-1
2397,54955_SSW_590,SSW,54955,590,grycat,nocall,0,1,False,-1
2398,54955_SSW_595,SSW,54955,595,nocall,nocall,1,1,True,0


In [18]:
np.argsort(oof[2396, :])[::-1]

array([309,  58, 183, 112,  74, 332,  81,  23, 133, 105, 329, 366, 229,
       250, 213, 101,   4, 174, 134,  22, 369, 212,   8, 142,  32, 243,
       170, 394, 194, 236,  80,  48, 286, 264, 120,  36, 121, 285, 207,
       350, 349, 218, 360, 318, 132,   9,  29, 168, 158, 173, 296, 225,
       179, 160, 210,  82, 188, 228,  69, 270, 333, 281,  27, 359,  94,
        12, 197, 320, 153, 209, 165, 123, 206, 113,  31, 330, 180,  60,
       151, 219, 295, 190, 122,  49, 347, 368, 265,  96, 310, 157, 247,
        99, 214, 344, 171, 193, 381, 107, 284, 198, 273, 131, 392, 127,
        83, 222, 263,  40,  68, 208, 259, 356, 357, 287,  24, 378, 146,
        11, 306, 248, 182, 191,   5, 353, 372, 371, 315, 326, 367, 215,
       340, 275,  38, 110, 126, 274,  34,  50, 363, 220, 195, 313,  45,
       196, 267, 164, 184, 253,   7, 118, 124,  19, 346,  55, 129, 375,
       336, 298,  61,  51,   1, 331, 115,   0,  89,  35, 221, 114, 282,
        47,  18,  56, 382, 108, 223, 376,  37, 276,  97,  14,  7

In [21]:
const.INV_BIRD_CODE[309]

'sltred'