In [1]:
!pip install ../input/dataset-private/SoundFile-0.10.3.post1-py2.py3-none-any.whl

Processing /kaggle/input/dataset-private/SoundFile-0.10.3.post1-py2.py3-none-any.whl
SoundFile is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.


In [2]:
!pip install ../input/pythonspeechfeatures/python_speech_features-0.6.tar.gz

Processing /kaggle/input/pythonspeechfeatures/python_speech_features-0.6.tar.gz
  Preparing metadata (setup.py) ... [?25l- done
[?25hBuilding wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l- \ done
[?25h  Created wheel for python-speech-features: filename=python_speech_features-0.6-py3-none-any.whl size=5888 sha256=8b5dbd7696dc310b7aeaf90d50fe7f30fcba77409c08de35ead5b8daf3002a05
  Stored in directory: /root/.cache/pip/wheels/1a/a6/4e/e3b90655f091036cc887446ace17cb6cd43760a4ca9e30fda4
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


In [3]:
import os
import json
import pickle

from tqdm import tqdm

import numpy as np
import pandas as pd

from python_speech_features import fbank
import soundfile

import torch

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import model_from_config

# GET FILES

In [4]:
all_files = []

for dirname, _, filenames in os.walk('../input/birdclef-2022/test_soundscapes'):
    for filename in filenames:
        all_files.append(os.path.join(dirname, filename))

# GET ANSWERS

In [5]:
def get_audio(filename):
    
    audio, sr = soundfile.read(filename)
    
    return audio, sr


def get_mels(waveform, sr):
    
    winlen = 1. / 43.0664 # specRes_Hz from model 
    winstep = 2.9 / 1000. # tempRes_ms from model
    nfft = 1024
    preemph = 0.5
    low = 0
    high = 11025
    nfilt = 41
    targetSize = 682
    
    time_stamps = [0, 1, 2, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 42, 43, 45, 46, 47, 48, 50, 51, 52, 53, 55, 56, 57, 58]
    
    features_all = []
    
    for ts in time_stamps:
        
        wav = waveform[ts*sr:(ts+2)*sr]
    
        M, _ = fbank(wav, samplerate=sr,
                     nfilt=nfilt, nfft=nfft,
                     lowfreq=low, highfreq=high,
                     preemph=0.5,
                     winlen=winlen, winstep=winstep,
                     winfunc=lambda x: np.hanning(x))


        logM = np.log(M)
        logM = np.swapaxes(logM, 0, 1)

        cut = np.minimum(logM.shape[1], targetSize)
        background = np.float64(logM[:,:cut]).mean(axis=1)

        feature = np.float32(np.float64(logM) - background[:, np.newaxis])

        if feature.shape[1] < targetSize:
            feature = np.concatenate((feature,
                                       np.zeros((feature.shape[0],
                                                 targetSize-feature.shape[1]),
                                                dtype='float32')), axis=1)
        elif feature.shape[1] > targetSize:
            feature = feature[:,:(targetSize-feature.shape[1])]

        features_all.append(feature)
    
    
    return np.array(features_all)




class ModelTF:
    
    def __init__(self):
        model_path = '../input/dataset-private'

        model = model_from_config(json.load(open(model_path + '/cmi_mbam01.json', 'r')))
        model.load_weights(model_path + '/cmi_mbam01.h5')

        feature_layers = [layer.output for layer in model.layers[:-4]]
        self.feature_model = tf.keras.Model(inputs=[model.input], outputs=feature_layers)
        
        
    def extract_features(self, mels):
        scale = 33.15998
        X = mels[:,:40,:] / scale
        batch_X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
        
        features = self.feature_model(batch_X)[-1].numpy()
        
        return features
    

    
class pytorch_model:
    def __init__(self, device):
        filename = '../input/pretrained-pytorch/model_pytorch_all.pt'
        self.model = torch.jit.load(filename).to(device)
        self.device = device
    
    def prediction(self, features):
        
        with torch.no_grad():
            
            x = torch.tensor(features, dtype=torch.float32, device=self.device)
            pred = self.model(x)
            _, pred = torch.max(pred.data, 1) 
    
        if self.device != 'cpu':
            return pred.cpu().numpy()
        else:
            return pred.numpy()
        

In [6]:
def get_dico():
            
    with open("../input/pretrained-pytorch/dico_birds.json","r") as fp:
         dic_birds = json.load(fp)
    
    return dic_birds


dic_birds = get_dico()

In [7]:
def get_ans_dict(all_files, dic_birds):
    
    device = ["cuda:0" if torch.cuda.is_available() else "cpu"]
    if device[0] == 'cpu':
        raise 'STOP NOW'
    print(device[0])
    device = ['cpu']
    
    model_tf = ModelTF()
    model_pytorch = pytorch_model(device=device[0])
    
    big_dict = {}
    
    for f in tqdm(all_files):
        
        key = f.split('/')[-1].split('.ogg')[0]

        audio, sr = get_audio(f)
        mels = get_mels(audio, sr)
        features = model_tf.extract_features(mels)
        predictions = model_pytorch.prediction(features)
        
        end = 5
        for i in range(0, 48, 4):
            big_dict[key + '_' + str(end)] = [predictions[i], predictions[i+1], predictions[i+2], predictions[i+3]]
            end += 5
        
    
    return big_dict
    
big_dict = get_ans_dict(all_files, dic_birds)

cuda:0


2022-04-11 17:30:38.185659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-11 17:30:38.186800: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-11 17:30:38.187485: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-11 17:30:38.188274: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [8]:
df = pd.read_csv('../input/birdclef-2022/sample_submission.csv')

In [9]:
def get_key(s):
    liste = s.split('_')
    key = liste[0] + '_' + liste[1] + '_' + liste[3]
    return key

df['key'] = df['row_id'].map(lambda x: get_key(x))

def get_target(s):
    liste = s.split('_')
    target = dic_birds[liste[2]]
    return target


df['bird'] = df['row_id'].map(lambda x: get_target(x))

In [10]:
keys = df['key'].tolist()
birds = df['bird'].tolist()
predictions = []

if len(df) == 3:
    print('TEST SESSION')
    big_dict['soundscape_1000170626_5'] = list(big_dict.values())[0]
    big_dict['soundscape_1000170626_10'] = list(big_dict.values())[0]
    big_dict['soundscape_1000170626_15'] = list(big_dict.values())[0]

for i in range(len(keys)):
    
    b = birds[i]
    k = keys[i]
    
    predictions.append(b in big_dict[k])
    

TEST SESSION


In [11]:
df['target'] = predictions
df.drop(['bird', 'key'], axis=1, inplace=True)
df.to_csv("submission.csv", index=False)