In [1]:
!pip install soundfile
!pip install python_speech_features

Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25ldone
[?25h  Created wheel for python-speech-features: filename=python_speech_features-0.6-py3-none-any.whl size=5888 sha256=ebe61ff408202b3f1c903fe5b26988643a8a8c1607708a39b126926d203d4ba7
  Stored in directory: /root/.cache/pip/wheels/b0/0e/94/28cd6afa3cd5998a63eef99fe31777acd7d758f59cf24839eb
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


# IMPORT PACKAGES

In [2]:
from python_speech_features import fbank
import soundfile

import matplotlib.pyplot as plt
from IPython.display import Audio, display
import IPython

from tqdm import tqdm
import pandas as pd
import numpy as np

import json
import pickle

# USEFUL FUNCTIONS

In [3]:
def make_fbank(wav, fs=22050):
    
    winlen = 1. / 43.0664 # specRes_Hz from model 
    winstep = 2.9 / 1000. # tempRes_ms from model
    nfft = 1024
    preemph = 0.5
    
    low = 0
    high = 11025
    
    nfilt = 41
    
    M, _ = fbank(wav, samplerate=fs,
                 nfilt=nfilt, nfft=nfft,
                 lowfreq=low, highfreq=high,
                 preemph=0.5,
                 winlen=winlen, winstep=winstep,
                 winfunc=lambda x: np.hanning(x))

    
    logM = np.log(M)
    logM = np.swapaxes(logM, 0, 1)

    targetSize = 682
    
    cut = np.minimum(logM.shape[1], targetSize)
    background = np.float64(logM[:,:cut]).mean(axis=1)

    features = np.float32(np.float64(logM) - background[:, np.newaxis])
    
    if features.shape[1] < targetSize:
        features = np.concatenate((features,
                                   np.zeros((features.shape[0],
                                             targetSize-features.shape[1]),
                                            dtype='float32')), axis=1)
    elif features.shape[1] > targetSize:
        features = features[:,:(targetSize-features.shape[1])]

    return features

# KEEP SCORED BIRDS

In [4]:
metadata = pd.read_csv('../input/birdclef-2022/train_metadata.csv')
scored = json.load(open('../input/birdclef-2022/scored_birds.json', 'rb'))
metadata = metadata[metadata.primary_label.isin(scored)].reset_index(drop=True)

deleted = ['ANTHROPOGENIC:THROPOGENIC:: RAIN', 'AUTUMN', 'COURTSHIP SONG?', 'LONG SERIES OF CALLS', 'ODD CALLS', 'RAIN', 'SONG?', 'THINK IT IS A CALL', 'TRAFFIC', 'VARIOUS CALLS IN FLIGHT', 'ZREE', 'TWO SONG TYPES', 'TWO TYPES OF CALLS', 'IN FLIGHT']
remplacement = {'BEGGING CALL (?)': 'BEGGING CALL', 'SONGFLIGHT': 'SONG IN FLIGHT'}
a_voir = ['MALE', 'FEMALE', 'JUVENILE', 'LIFE STAGE UNCERTAIN', 'VESPER SINGING']
# SI 'NOCTURNAL FLIGHT CALL' alors enlever 'FLIGHT CALL'

metadata.type = metadata.type.map(lambda x: str(x).upper()[1:-1].replace("'", "").split(','))
metadata.type = metadata.type.map(lambda x: [x_.strip() for x_ in list(x) if x_.strip() not in deleted])

In [6]:
print(len(metadata))

1266


# CREATE LOG-MEL-SPECTOGRAM

In [7]:
newProb = ['akikik', 'bubsan', 'chemun', 'coopet', 'crehon', 'ercfra', 'hawpet1', 'layalb', 'lessca', 'mauala', 'puaioh', 'shtsan']

newProbSmall = ['akekee', 'akepa1', 'aniani', 'bkwpet', 'blknod', 'brnboo', 'buffle', 'bulpet', 'canvas', 'chbsan', 'golphe', 'hudgod', 'incter1', 'kauama', 'madpet', 'palila', 'pomjae', 'sooshe', 'sopsku1', 'wantat1']

metadata_used = metadata.copy()
filenames = metadata_used.filename.tolist()
primary_label = metadata_used.primary_label.tolist()

mels = []
mels_fname = []
mels_label = []

idx = -1

alerte = True


for fic in tqdm(filenames):
    
    idx += 1
    lab = primary_label[idx]
    
    if mels_label.count(lab) < 1e25: # ne pas avoir trop de fois le même label
        
        alerte = True
        
        waveform, sr = soundfile.read('../input/birdclef-2022/train_audio/' + fic)
        
        #if lab not in ['maupar', 'crehon', 'ercfra', 'hawhaw', 'hawpet1', 'puaioh', 'hawgoo'] + newProb:
        #    waveform = waveform[:sr*15+1]
        #elif lab in newProbSmall:
        #    waveform = waveform[:sr*60+1]
            
            
        taille = len(waveform)

        deb = 0
        end = 0
    
        while deb + 2 < (taille//sr):
            end = deb + 2
            e = waveform[deb*sr:end*sr]
            deb += 2

            mels.append(make_fbank(e, fs=sr))
            mels_fname.append(fic + '_' + str(end))
            mels_label.append(lab)
    elif alerte:
        #print(f'{lab} avec {mels_label.count(lab)} occurences')
        alerte = False

100%|██████████| 1266/1266 [38:02<00:00,  1.80s/it] 


In [8]:
test = pd.DataFrame()
test['label'] = mels_label
test['count'] = 1
test.groupby('label').count()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
akiapo,368
aniani,207
apapan,1248
barpet,496
crehon,58
elepai,331
ercfra,30
hawama,351
hawcre,1211
hawgoo,103


# ADD EMPTY INDICATION

In [9]:
videsDivers = ['maupar/XC123887.ogg_240', 'houfin/XC163052.ogg_10', 'maupar/XC123887.ogg_242', 'maupar/XC123887.ogg_196', 'crehon/XC122342.ogg_18', 'barpet/XC189893.ogg_14', 'houfin/XC140991.ogg_4', 'barpet/XC305567.ogg_2', 'houfin/XC141479.ogg_8', 'houfin/XC139819.ogg_14', 'elepai/XC252700.ogg_6', 'maupar/XC123887.ogg_110', 'hawgoo/XC252701.ogg_4', 'hawama/XC27323.ogg_6', 'houfin/XC163052.ogg_4', 'maupar/XC123887.ogg_220', 'yefcan/XC196124.ogg_12', 'omao/XC385071.ogg_14', 'maupar/XC123887.ogg_236', 'aniani/XC174955.ogg_8', 'omao/XC306419.ogg_2', 'aniani/XC317139.ogg_12', 'barpet/XC637020.ogg_4', 'yefcan/XC196124.ogg_8', 'maupar/XC123887.ogg_204', 'apapan/XC174948.ogg_8', 'hawgoo/XC314509.ogg_8', 'maupar/XC123887.ogg_98']
vides = videsDivers

videsHawpet1 = ['hawpet1/XC328230.ogg_2', 'hawpet1/XC328230.ogg_8', 'hawpet1/XC328234.ogg_2', 'hawpet1/XC328234.ogg_4', 'hawpet1/XC328234.ogg_10', 'hawpet1/XC328234.ogg_12', 'hawpet1/XC328234.ogg_14', 'hawpet1/XC328234.ogg_18', 'hawpet1/XC328234.ogg_16', 'hawpet1/XC328234.ogg_32', 'hawpet1/XC328234.ogg_36', 'hawpet1/XC328234.ogg_38', 'hawpet1/XC328234.ogg_40', 'hawpet1/XC328234.ogg_42', 'hawpet1/XC328234.ogg_44', 'hawpet1/XC328234.ogg_46', 'hawpet1/XC328234.ogg_48', 'hawpet1/XC75584.ogg_2', 'hawpet1/XC75584.ogg_4', 'hawpet1/XC75584.ogg_6', 'hawpet1/XC75584.ogg_8', 'hawpet1/XC75584.ogg_10', 'hawpet1/XC75584.ogg_12', 'hawpet1/XC75584.ogg_14', 'hawpet1/XC75584.ogg_16', 'hawpet1/XC75584.ogg_18', 'hawpet1/XC75584.ogg_30', 'hawpet1/XC75584.ogg_34', 'hawpet1/XC75584.ogg_36', 'hawpet1/XC75584.ogg_38', 'hawpet1/XC75584.ogg_40', 'hawpet1/XC75584.ogg_42', 'hawpet1/XC75584.ogg_44', 'hawpet1/XC75584.ogg_46', 'hawpet1/XC75584.ogg_48', 'hawpet1/XC75584.ogg_50', 'hawpet1/XC75584.ogg_54', 'hawpet1/XC75584.ogg_56', 'hawpet1/XC75584.ogg_58', 'hawpet1/XC75584.ogg_60', 'hawpet1/XC75584.ogg_62', 'hawpet1/XC75584.ogg_64', 'hawpet1/XC75584.ogg_66', 'hawpet1/XC75584.ogg_68', 'hawpet1/XC75584.ogg_70', 'hawpet1/XC75584.ogg_72', 'hawpet1/XC75584.ogg_74', 'hawpet1/XC75584.ogg_76', 'hawpet1/XC75584.ogg_78', 'hawpet1/XC75584.ogg_80', 'hawpet1/XC75584.ogg_82', 'hawpet1/XC75584.ogg_84', 'hawpet1/XC75584.ogg_96', 'hawpet1/XC75584.ogg_98', 'hawpet1/XC75584.ogg_100']
vides += videsHawpet1

videsBarpet = ['barpet/XC189893.ogg_8', 'barpet/XC189893.ogg_10', 'barpet/XC189893.ogg_12', 'barpet/XC189893.ogg_14', 'barpet/XC189894.ogg_4', 'barpet/XC295060.ogg_2', 'barpet/XC304615.ogg_6', 'barpet/XC305567.ogg_2', 'barpet/XC305567.ogg_8', 'barpet/XC305567.ogg_10', 'barpet/XC305567.ogg_12', 'barpet/XC473562.ogg_2', 'barpet/XC473562.ogg_14', 'barpet/XC601885.ogg_2', 'barpet/XC637020.ogg_2', 'barpet/XC637020.ogg_4', 'barpet/XC637020.ogg_8', 'barpet/XC637020.ogg_10', 'barpet/XC637020.ogg_12']
vides += videsBarpet

videsAniani = ['aniani/XC317139.ogg_12', 'aniani/XC317139.ogg_14', 'aniani/XC317139.ogg_4', 'aniani/XC317139.ogg_6', 'aniani/XC317139.ogg_8', 'aniani/XC314503.ogg_14', 'aniani/XC314503.ogg_12', 'aniani/XC314503.ogg_10', 'aniani/XC314503.ogg_8', 'aniani/XC314503.ogg_6', 'aniani/XC314503.ogg_2', 'aniani/XC256222.ogg_12', 'aniani/XC256222.ogg_10', 'aniani/XC256222.ogg_8', 'aniani/XC256222.ogg_6', 'aniani/XC256222.ogg_2', 'aniani/XC210205.ogg_10', 'aniani/XC210205.ogg_8', 'aniani/XC210205.ogg_2', 'aniani/XC210204.ogg_10', 'aniani/XC210203.ogg_2', 'aniani/XC210202.ogg_2', 'aniani/XC174955.ogg_12', 'aniani/XC174955.ogg_10', 'aniani/XC174955.ogg_8', 'aniani/XC174955.ogg_6', 'aniani/XC174952.ogg_8']
vides += videsAniani

videsPuaioh = ['puaioh/XC144893.ogg_4', 'puaioh/XC144892.ogg_26', 'puaioh/XC144892.ogg_20', 'puaioh/XC144892.ogg_18', 'puaioh/XC144892.ogg_14', 'puaioh/XC144892.ogg_12', 'puaioh/XC144892.ogg_8', 'puaioh/XC144892.ogg_6', 'puaioh/XC144892.ogg_4']
vides += videsPuaioh

videsIiwi = ['iiwi/XC219960.ogg_14', 'iiwi/XC27335.ogg_4', 'iiwi/XC27307.ogg_2', 'iiwi/XC256225.ogg_4', 'iiwi/XC27326.ogg_6', 'iiwi/XC27335.ogg_6', 'iiwi/XC256225.ogg_10', 'iiwi/XC27307.ogg_8', 'iiwi/XC27365.ogg_10', 'iiwi/XC27314.ogg_8', 'iiwi/XC27307.ogg_12', 'iiwi/XC219960.ogg_10', 'iiwi/XC256225.ogg_6', 'iiwi/XC27314.ogg_6', 'iiwi/XC210207.ogg_14']
vides += videsIiwi

videsAkiapo = ['akiapo/XC27376.ogg_8', 'akiapo/XC27374.ogg_12', 'akiapo/XC648120.ogg_2', 'akiapo/XC124705.ogg_10', 'akiapo/XC385387.ogg_4', 'akiapo/XC27374.ogg_8']
vides += videsAkiapo

videsOmao = ['omao/XC252711.ogg_14', 'omao/XC359187.ogg_2', 'omao/XC252707.ogg_6', 'omao/XC252704.ogg_14', 'omao/XC27353.ogg_12', 'omao/XC317146.ogg_2', 'omao/XC252711.ogg_12', 'omao/XC252709.ogg_4', 'omao/XC385071.ogg_6', 'omao/XC27353.ogg_4', 'omao/XC306419.ogg_10', 'omao/XC191210.ogg_14', 'omao/XC27375.ogg_10', 'omao/XC27353.ogg_8', 'omao/XC252705.ogg_6', 'omao/XC317146.ogg_10', 'omao/XC252711.ogg_4', 'omao/XC27341.ogg_10', 'omao/XC385071.ogg_4', 'omao/XC252705.ogg_12', 'omao/XC252711.ogg_8']
vides += videsOmao


vides = list(set(vides))
print(len(vides))

175


In [10]:
mels_label_with_vide = []

idx = -1

for fn in mels_fname:
    idx += 1
    if fn in vides:
        mels_label_with_vide.append('zVIDE')
    else:
        mels_label_with_vide.append(mels_label[idx])
        


# CREATE PICKLES

In [11]:
name = 'ALL_DATA'

with open(name + '.pickle', 'wb') as handle:
    pickle.dump((mels, mels_fname, mels_label_with_vide), handle, protocol=pickle.HIGHEST_PROTOCOL)
