In [None]:
! pip install kaggle

In [None]:
! conda install -c conda-forge librosa --yes

In [None]:
import sagemaker
import boto3
from sagemaker import get_execution_role


import pandas as pd
import librosa
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from sklearn.utils import shuffle
from collections import Counter
from tqdm import tqdm
import os

In [None]:
sagemaker_session = sagemaker.Session()
sagemaker_session.boto_region_name
role = get_execution_role()

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c birdclef-2021 -f train_metadata.csv
!unzip train_metadata.csv.zip
!rm train_metadata.csv.zip

In [None]:
meta_data_path = 'train_metadata.csv'
meta_data = pd.read_csv(meta_data_path)
meta_data = meta_data[meta_data['rating'] >= 4 ]
counter_classes = pd.DataFrame(sorted(Counter(meta_data['primary_label']).items(), key=lambda x: x[1], reverse=True))
meta_data = meta_data.merge(counter_classes, how='left', left_on='primary_label', right_on=0)

del meta_data[0]
meta_data = meta_data.rename(columns={1:'count_class'})

meta_data = meta_data[meta_data['count_class']>= 200]
meta_data = shuffle(meta_data)
audio_name = meta_data.loc[meta_data['count_class'] >= 200, 'filename'].values
audio_labels = meta_data.loc[meta_data['count_class'] >= 200, 'primary_label'].values

cls_id_map = {name:idx for idx, name in enumerate(np.unique(audio_labels))}

In [None]:
SR = 32000
SIGNAL_LENGTH = 5 
SPEC_SHAPE = (48, 128) 
FMIN = 500
FMAX = 12500
HOP_LENGHT = int(SIGNAL_LENGTH * SR / (SPEC_SHAPE[1] - 1))
DURATION  = 15

def audio_process(wave):    
    audio = librosa.feature.melspectrogram(y=wave,
                                          n_fft=1024,
                                          sr=SR,
                                          hop_length=HOP_LENGHT, 
                                          fmin=FMIN,
                                          n_mels=SPEC_SHAPE[0],
                                          fmax=FMAX)
    audio = librosa.power_to_db(audio, ref=np.max)
    audio = audio - audio.min()
    audio = audio / audio.max()
    return audio

def open_audio(path, duration=15):
    waves, rate = librosa.load(path, sr=SR, duration=duration)
    processed_audios = []
    for i in range(0, len(waves), int(SIGNAL_LENGTH * SR)):
        w = waves[i:i + int(SIGNAL_LENGTH * SR)]
        if len(w) < int(SIGNAL_LENGTH * SR):
            break
        temp = audio_process(w)
        if np.any(np.isnan(temp)) == False:
            processed_audios.append(temp)
    return processed_audios

In [None]:
!mkdir audio_files

dir_path = 'audio_files/'
data_path = 'data/'

for audio_file, target in tqdm(zip(audio_name[:10], audio_labels[:10])):
    !kaggle competitions download -c birdclef-2021 -p audio_files/ -f {'train_short_audio/' + target+'/'+audio_file} 

    audio_path = dir_path + str(audio_file)
    temp = open_audio(audio_path)

    if not os.path.exists(data_path + target):
        os.makedirs(data_path + target)

    for idx, x in enumerate(temp):
        img = Image.fromarray(x * 255.0).convert("L")
        img_save_path = data_path + target + '/' + audio_file[:-4] + f'_{idx}' '.png'
        img.save(img_save_path)
        s3.Bucket(bucket).upload_file(img_save_path,img_save_path)
    !rm {dir_path + audio_file}
    !rm {img_save_path}