In [None]:
###############################################
'''
                 PIPE LINE 3
          모델 불러와서 평가하기
                                              '''
###############################################

'''
PipeLine 으로 따로따로 작업하려면

class MelSpecComputer
def mono_to_color
class BirdCLEFDataset
불러올것


'''

In [None]:
import numpy as np
import librosa as lb
import librosa.display as lbd
import soundfile as sf
from  soundfile import SoundFile
import pandas as pd
from  IPython.display import Audio
from pathlib import Path

import torch
from torch import nn, optim
from  torch.utils.data import Dataset, DataLoader
import torchvision.models as models

from matplotlib import pyplot as plt

import os, random, gc
import re, time, json
from  ast import literal_eval


from IPython.display import Audio
from sklearn.metrics import label_ranking_average_precision_score

from tqdm.notebook import tqdm
import joblib

import pretrainedmodels

from sklearn.model_selection import StratifiedKFold


In [None]:
device = 'cuda' if torch.cuda.is_available else 'cpu'
device

In [None]:
##################
## Seed Setting ##
##################

def make_seed(seed = 499):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    os.environ['PYTHONHASHSEED'] = str(seed)
    
make_seed()

In [None]:
#################
## Path manage ##
#################

Root_Path = '/Users/ansgh/PycharmProjects/kaggle_notebook/input/birdclef-2021'

Train_short_root = Path(Root_Path + str('/train_short_audio'))
Train_metadata_root = Path(Root_Path + str('/train_metadata.csv'))

Train_audio_image_save_root = Path(Root_Path + str('/audio_images'))
Train_audio_image_save_root.mkdir(exist_ok=True, parents=True)

## File 내부 확인 
file_list = os.listdir(Root_Path)
print(file_list)

In [None]:
NUM_CLASSES = 397
SR = 32_000
DURATION = 5
THRESH = 0.25


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE)

TEST_AUDIO_ROOT = Path("../input/birdclef-2021/test_soundscapes")
SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
TARGET_PATH = None
    
    
## Test Audio 에 아무것도 없으면(룰 상 제출용 커밋할때만 test_soundscapes에 접근가능하기때문에 연습때는 train_soundscapes꺼 씀)
if not len(list(TEST_AUDIO_ROOT.glob("*.ogg"))):
    TEST_AUDIO_ROOT = Path("../input/birdclef-2021/train_soundscapes")
    SAMPLE_SUB_PATH = None
    # SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
    TARGET_PATH = Path("../input/birdclef-2021/train_soundscape_labels.csv")

In [None]:
#######################################
##                                   ##  
## 음성처리 위한 Melspectrogram 계산 ## 
##                                   ##
#######################################

class MelSpecComputer:
    def __init__(self, sr, n_mels, fmin, fmax, **kwargs):
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax
        kwargs['n_fft'] = kwargs.get('n_fft', self.sr//10) ##SR//10으로 n_fft 설정
        kwargs['hop_length'] = kwargs.get('hop_length', self.sr//(10*4)) ##hop length == fft마다 겹치는 길이
        self.kwargs = kwargs
    
    
    def __call__(self, audio):
        
        melspec = lb.feature.melspectrogram(
            audio, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs
        )
        melspec = lb.power_to_db(melspec).astype(np.float32)
        
        return melspec

In [None]:
#########################################
##          Mel Image normalize        ##   
## 이미지 노말라이징 안하면 너무 밝음  ##
#########################################

def mono_to_color(X, eps=1e-6, mean=None, std=None):
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)
    
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V

In [None]:
######################################
##          Crop Padding            ##   
## 규격에 맞게 사이즈 trimming 하기 ##
######################################

def crop_or_pad(y, length, is_train=True, start=None):
    # y가 규격보다 짧으면
    if len(y) < length:
        y = np.concatenate([y, np.zeros(length - len(y))]) ## 부족한 길이를 0우로 padding 함
        
        n_repeats = length // len(y)
        epsilon = length % len(y)
        
        y= np.concatenate([y]*n_repeats + [y[:epsilon]]) ##적당히 채워줌
        
    # y가 규격보다 길면    
    elif len(y) > length:
        if not is_train:
            start = start or 0
        else:
            start = start or np.random.randint(len(y) - length)
            
        y = y[start : start + length] ##이러면 length짜리 y로 변경됨
        
        
    return y

In [None]:
###################################
##        Dataset 만들기          ##
##    numpy 3층으로 image 쌓고    ##
##     label은  smoothing 해서    ##
##         image, label 반환      ##
####################################

class BirdClefDataset(Dataset):
    
    def __init__(self, audio_image_store, meta, sr=SR, is_train=True, num_classes=Num_classes, duration=Duration):
        
        self.audio_image_store = audio_image_store ##audio np array랑 filename 저장된 dic
        self.meta = meta.copy().reset_index(drop=True) ##meta는 df
        self.sr = sr
        self.is_train = is_train
        self.num_classes = num_classes
        self.duration = duration
        self.audio_length = self.duration * self.sr
        
    
    def normalize(self, image):
        image = image.astype('float32', copy=False) / 255.0
        image = np.stack([image, image, image])
        return image
    
    def __len__(self):
        return len(self.meta)
    
    
    def find_mid_img(self, image):
        list = []
        for i in range(len(image)):
            list.append(image[i].sum())
        
        list = sorted(list)
        list[len(image)//2]
    
        for i in range(len(image)):
            list[i] == image[i].sum()
            return image[i]
    
    
    
    ##이 dataset의 단점이 있는 파트
    def __getitem__(self, idx):
        row = self.meta.iloc[idx]  #df에서 데이터 하나 불러옴
        image = self.audio_image_store[row.filename] #불러온 데이터의 filename으로 audio_image_store에서 image에 해당하는 np array 받아서 image에 저장
        
        
        #방법 1
        #image에 저장된 np array는 (x, 128, 281)로써, 음성(.ogg)가 25초라면 7초씩 저장했으므로 x==4, 이미지크기==128,281 이라는 뜻. len(image)는 맨 앞 x를 반환한다.
        #즉 윗줄이 뜻하는 것은 AudioToImage 클래스로 image에 저장시킨 7초짜리 x개의 영상중 랜덤으로 하나만 쓰겠다는 뜻이다.
        #이것은 단점이 되는데 만약 랜덤하게 뽑은 음성이 표본을 제대로 반영하지 못할수 있기 때문이다.
        image = image[np.random.choice(len(image))] 
        
        
        #방법2
        # 랜덤하게 안뽑고 이미지 중에서 노이즈는 상수라고 쳤을때 신호가 있으면 값이 더 커질것.
        # 따라서 중간값 혹은 최댓값을 갖게 뽑아서 써보자
        ##image = self.find_mid_img(image)
        
        
        image = self.normalize(image)
        
        ## Label smoothing. 해당 라벨은 0.995, 나머지는 0.0025로 초기화
        t = np.zeros(self.num_classes, dtype=np.float32) + 0.0025 
        t[row.label_id] = 0.995
        
        return image, t

In [None]:
data = pd.DataFrame(
     [(path.stem, *path.stem.split("_"), path) for path in Path(TEST_AUDIO_ROOT).glob("*.ogg")],
    columns = ["filename", "id", "site", "date", "filepath"]
)
print(data.shape)
data.head()

In [None]:
## 메타데이터(정답지) 불러오기
df_train = pd.read_csv("../input/birdclef-2021/train_metadata.csv")

LABEL_IDS = {label: label_id for label_id,label in enumerate(sorted(df_train["primary_label"].unique()))}
INV_LABEL_IDS = {val: key for key,val in LABEL_IDS.items()}

In [None]:
test_data = BirdCLEFDataset(data=data)
len(test_data), test_data[0].shape, test_data[1].shape
# 20 -> train_soundscape에 20개 있음
# 120 -> 10분(각 train_soundscape음성) / 5초
# 3 - > normalize에서 3쌓음
# 128, 201 -> 이미지 사이즈

In [None]:
def load_net(checkpoint_path, num_classes=NUM_CLASSES):
    net = models.resnet152(pretrained = False)
    net.fc = nn.Linear(net.fc.in_features, num_classes)
    dummy_device = torch.device("cpu")
    d = torch.load(checkpoint_path, map_location=dummy_device)
    for key in list(d.keys()):
        d[key.replace("model.", "")] = d.pop(key)
    net.load_state_dict(d)
    net = net.to(DEVICE)
    net = net.eval()
    return net

In [None]:
checkpoint_paths = [
    Path("../input/bird-clef/resnet152_sr32000_d7_v1_v1/birdclef_resnet152_fold0_epoch_00_f1_val_01435_20210513145723.pth"),
]


nets = [
        load_net(checkpoint_path.as_posix()) for checkpoint_path in checkpoint_paths
]

In [None]:
## Threshold 보다 높은 값만 pred라는 list에 저장

@torch.no_grad()
def get_thresh_preds(out, thresh=None):
    thresh = thresh or THRESH
    o = (-out).argsort(1)
    npreds = (out > thresh).sum(1)
    preds = []
    for oo, npred in zip(o, npreds):
        preds.append(oo[:npred].cpu().numpy().tolist())
    return preds

In [None]:
## pred라는 list를 받아서 그 안에 새이름이 있으면 keep, 없으면 'nocall'로 변경
def get_bird_names(preds):
    bird_names = []
    for pred in preds:
        if not pred:
            bird_names.append("nocall")
        else:
            bird_names.append(" ".join([INV_LABEL_IDS[bird_id] for bird_id in pred]))
    return bird_names

In [None]:
##predict 실행해서 label 확률표 반환

def predict(nets, test_data, names=True):
    preds = []
    with torch.no_grad():
        for idx in  tqdm(list(range(len(test_data)))):
            xb = torch.from_numpy(test_data[idx]).to(DEVICE)
            pred = 0.
            for net in nets:
                o = net(xb)
                o = torch.sigmoid(o)

                pred += o

            pred /= len(nets)
            
            if names:
                pred = get_bird_names(get_thresh_preds(pred))

            preds.append(pred)
    return preds

In [None]:
pred_probas = predict(nets, test_data, names=False)
print(len(pred_probas))

In [None]:
##새 이름 list로 전
preds = [get_bird_names(get_thresh_preds(pred, thresh=THRESH)) for pred in pred_probas]

In [None]:
######################################
##    submission 만들기 위한 작업   ##
######################################


def preds_as_df(data, preds):
    sub = {
        "row_id": [],
        "birds": [],
    }
    
    for row, pred in zip(data.itertuples(False), preds):
        row_id = [f"{row.id}_{row.site}_{5*i}" for i in range(1, len(pred)+1)]
        sub["birds"] += pred
        sub["row_id"] += row_id
        
    sub = pd.DataFrame(sub)
    
    if SAMPLE_SUB_PATH:
        sample_sub = pd.read_csv(SAMPLE_SUB_PATH, usecols=["row_id"])
        sub = sample_sub.merge(sub, on="row_id", how="left")
        sub["birds"] = sub["birds"].fillna("nocall")
    return sub

In [None]:
sub = preds_as_df(data, preds)
print(sub.shape)
sub

In [None]:
sub.to_csv("submission.csv", index=False)

In [None]:
######################################
##         성능 확인하기            ##
######################################


def get_metrics(s_true, s_pred):
    s_true = set(s_true.split())
    s_pred = set(s_pred.split())
    n, n_true, n_pred = len(s_true.intersection(s_pred)), len(s_true), len(s_pred)
    
    prec = n/n_pred
    rec = n/n_true
    f1 = 2*prec*rec/(prec + rec) if prec + rec else 0
    
    return {"f1": f1, "prec": prec, "rec": rec, "n_true": n_true, "n_pred": n_pred, "n": n}

In [None]:
if TARGET_PATH:
    sub_target = pd.read_csv(TARGET_PATH)
    sub_target = sub_target.merge(sub, how="left", on="row_id")
    
    print(sub_target["birds_x"].notnull().sum(), sub_target["birds_x"].notnull().sum())
    assert sub_target["birds_x"].notnull().all()
    assert sub_target["birds_y"].notnull().all()
    
    df_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_target.birds_x, sub_target.birds_y)])
    
    print(df_metrics.mean())


In [None]:
sub_target[sub_target.birds_y != "nocall"]

In [None]:
sub_target[sub_target.birds_x != "nocall"]