In [None]:
# Code modified from Attila Ambrus's notebook on Kaggle for Basic Submission
# https://www.kaggle.com/code/ambrusattila/basic-submission-without-scoring-error/notebook

In [None]:
import os
import json 
import librosa
import numpy as np
import pandas as pd

import librosa as lb
import librosa.display as lbd
import soundfile as sf
from  soundfile import SoundFile

from  IPython.display import Audio
from pathlib import Path

from matplotlib import pyplot as plt

# TF
from tensorflow import keras
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder



In [None]:
work_dir="/kaggle/working"       
mels_dir="/kaggle/working/mels"

In [None]:
input_dir="/kaggle/input/birdclef-2022"
model_dir= Path("/kaggle/input/BirdClef-Model")

#model_name= Path("Local-ResNet50V2_model.h5")
#classes_np = Path("classes.npy")
model_name_resnet= Path("ResNet50V2_model_multilabel_sigmoid_v2.h5")
model_name_Xception= Path("Xception_sigmoid_v1_train.h5")
model_name_Xception_mixup = Path("Xception_model_v2_mixup_later.h5")
model_name_EfficientNetB5 = Path("EfficientNetB5_v1.h5")
model_name_resnet_mixup = Path("ResNet50V2_model_v4_mixup.h5")
classes_np = Path("classes_only_birds.npy")


In [None]:
# https://www.kaggle.com/code/itsuki9180/birdcall-using-tpu-train/notebook
DEVICE = "TPU" # "TPU" or "GPU"
if DEVICE == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

In [None]:
class MelSpecComputer:
    def __init__(self, sr, n_mels, fmin, fmax, **kwargs):
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax
        kwargs["n_fft"] = kwargs.get("n_fft", self.sr//10)
        kwargs["hop_length"] = kwargs.get("hop_length", self.sr//(10*4))
        self.kwargs = kwargs

    def __call__(self, y):

        melspec = lb.feature.melspectrogram(
            y=y, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs,
        )

        melspec = lb.power_to_db(melspec).astype(np.float32)
        return melspec

In [None]:
def mono_to_color(X, eps=1e-6, mean=None, std=None):
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)
    
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V
 
def crop_or_pad(y, length, is_train=True, start=None):
    if len(y) < length:
        y = np.concatenate([y, np.zeros(length - len(y))])
        
        n_repeats = length // len(y)
        epsilon = length % len(y)
        
        y = np.concatenate([y]*n_repeats + [y[:epsilon]])
        
    elif len(y) > length:
        if not is_train:
            start = start or 0
        else:
            start = start or np.random.randint(len(y) - length)

        y = y[start:start + length]

    return y

In [None]:
# Mel Spec Inputs

SR = 32_000
DURATION = 7 
SEED = 666

# Prediction Threshold
PRED_THRESHOLD = 0.01

In [None]:
class AudioToImage:
    def __init__(self, sr=SR, n_mels=128, fmin=0, fmax=None, duration=DURATION, step=None, res_type="kaiser_fast", resample=True):

        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax or self.sr//2

        self.duration = duration
        self.audio_length = self.duration*self.sr
        self.step = step or self.audio_length
        
        self.res_type = res_type
        self.resample = resample

        self.mel_spec_computer = MelSpecComputer(sr=self.sr, n_mels=self.n_mels, fmin=self.fmin,
                                                 fmax=self.fmax)
        
    def audio_to_image(self, audio):
        melspec = self.mel_spec_computer(audio) 
        image = mono_to_color(melspec)
#         image = normalize(image, mean=None, std=None)
        return image

    def __call__(self, filepath, save=True):
#       max_audio_duration = 10*self.duration
#       init_audio_length = max_audio_duration*row.sr
        
#       start = 0 if row.duration <  max_audio_duration else np.random.randint(row.frames - init_audio_length)
    
      audio, orig_sr = sf.read(filepath, dtype="float32")

      if self.resample and orig_sr != self.sr:
        audio = lb.resample(audio, orig_sr, self.sr, res_type=self.res_type)
        
      audios = [audio[i:i+self.audio_length] for i in range(0, max(1, len(audio) - self.audio_length + 1), self.step)]
      audios[-1] = crop_or_pad(audios[-1] , length=self.audio_length)
        
      images = [self.audio_to_image(audio) for audio in audios]
      images = np.stack(images)
        
      if save:
        path = mels_dir+f"/{filepath}.npy"
        path = Path(path)
        print(path)
        path.parent.mkdir(exist_ok=True, parents=True)
        np.save(str(path), images)
      else:
        return images

In [None]:
def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        image = np.stack([image])
        return image

In [None]:
le = LabelEncoder()
le.classes_ = np.load(model_dir/classes_np)

def predict(image, this_model):
    im = normalize(image)
    predictions = this_model.predict(im)[0]
    #print(predictions)
    bird_idx = sorted(range(len(predictions)), key=lambda i: predictions[i])[:]
    #print(bird_idx)
    bird_name = le.inverse_transform(bird_idx)
    #le_name_mapping = dict(zip(le.classes_.astype(str), le.transform(le.classes_)))
    #print(le_name_mapping)
    #print(bird_name)
    bird_pred = {}
    for i in range(len(bird_idx)):
        bird_pred[ bird_name[i] ] = predictions[bird_idx[i]]
    # print(bird_pred)
    return bird_pred

In [None]:
# MelSpec Converter
converter = AudioToImage(step=int(DURATION*0.666*SR))
# TF model
#model_ResNet50V2 = keras.models.load_model(model_dir/model_name_resnet)
#model_Xception = keras.models.load_model(model_dir/model_name_Xception)
#model_EfficientNetb5 = keras.models.load_model(model_dir/model_name_EfficientNetB5)
model_ResNet50V2_Mixup = keras.models.load_model(model_dir/model_name_resnet_mixup)
model_Xception_Mixup   = keras.models.load_model(model_dir/model_name_Xception_mixup)


In [None]:
pred={
  'row_id':[],
  'target':[]
}

test_path=input_dir+"/test_soundscapes/"
#test_path=input_dir+"/train_audio/houfin/"
files=[f.split('.')[0] for f in sorted(os.listdir(test_path))]

birds_path=input_dir+"/scored_birds.json"
with open(birds_path) as bf:
    birds = json.load(bf)



# iterate over all test soundscapes
for f in files:
    
    #remove me 
    #f = "XC11463"
    
    p=test_path+f+'.ogg'
    
    d=librosa.get_duration(filename=p)
    
    #print(d)
    #print(p)
    
    # convert image to melspectrogram
    mels = converter(p, save=False)
    #print(mels.shape)
    #lbd.specshow(data=mels[0])
    
    

    pcs=round(d/5)
    segments = [[] for i in range(pcs)]
      
    for i in range(len(segments)):
        
        # perform inference
        #seg_pred_resnet = predict(mels[i], model_ResNet50V2)
        #seg_pred_Xcept =  predict(mels[i], model_Xception)
        #seg_pred_Effif =  predict(mels[i], model_EfficientNetb5)
        seg_pred_resnet_mixup = predict(mels[i], model_ResNet50V2_Mixup)
        seg_pred_Xcept_mixup  = predict(mels[i], model_Xception_Mixup)
        
        
        for b in birds:  
            
            #print(b)
            #print(seg_pred_resnet[b])
            #print(seg_pred_Xcept[b])
            #print(seg_pred_Effif[b])
            
            
            seg_pred = (seg_pred_resnet_mixup[b] + seg_pred_Xcept_mixup[b]) / 2
            #seg_pred = seg_pred_resnet_mixup[b]
            
            #print(seg_pred)
            
            prediction = False
            if seg_pred > PRED_THRESHOLD:
                prediction = True
            
            
            segment_end=(i+1)*5   
            row_id=f+'_'+b+'_'+str(segment_end)
            pred['row_id'].append(row_id)

            pred['target'].append(prediction)

In [None]:
cols=['row_id','target']
df_sub=pd.DataFrame(pred,columns=cols)

In [None]:
df_sub.to_csv(work_dir+"/submission.csv", index=False)