In [10]:
!pip install soundfile
!pip install librosa
!pip install audiomentations

Collecting audiomentations
  Using cached audiomentations-0.15.0-py3-none-any.whl (24 kB)
Installing collected packages: audiomentations
Successfully installed audiomentations-0.15.0


In [18]:
import os, random
import csv
import matplotlib.pyplot as plt
import soundfile as sf
import pandas as pd
import librosa
import librosa.display
import numpy as np
import tensorflow as tf
import math
import shutil

from audiomentations import Compose, AddGaussianNoise, AddGaussianSNR, FrequencyMask
from datetime import datetime
from matplotlib import image
from PIL import Image
from sklearn.model_selection import train_test_split

In [27]:
PROJECT_PATH = os.path.abspath(os.path.join(os.getcwd(), ".."))
ORIGINAL_DATASET_DIRECTORY = os.path.join(PROJECT_PATH, 'dataset', 'rfcx-species-audio-detection')
DATASET_DIRECTORY = os.path.join(PROJECT_PATH, 'dataset', 'spectrogram-species-audio-detection')
DATASET_TRAIN_DIRECTORY = os.path.join(DATASET_DIRECTORY, 'train')
DATASET_VAL_DIRECTORY = os.path.join(DATASET_DIRECTORY, 'val')
DATASET_TEST_DIRECTORY = os.path.join(DATASET_DIRECTORY, 'test')
WEIGHT_FILE_NAME = "EfficientNet_Weights/EfficientNetBN_tl_best_weights.h5"
IMAGE_HEIGHT = 500
IMAGE_WIDTH = 500

# Créer une 25eme classe qui ne correspond à aucun oiseau
USE_EMPTY_CLASS = True
len_classes = 25 if USE_EMPTY_CLASS else 24
epch = 600
KERNEL_REGULARIZERS = 0.0005
ref_lr = 0.03
ref_batch_size = 16
dropout = 0.2
batch_size = 2
momentumTest = 0.95
destination_classes = [str(i) for i in range(len_classes)]
### PARAMS spectrogramm_conversion ###
# Lié à IMAGE_WIDTH et IMAGE_HEIGHT
PERCENT_PRINT = 10
# duration_cut -> Découpage des extraits en morceaux de x secondes / 0 = pas de découpage
DURATION_CUT = 2
RANDOM_CUT = True
# Un ratio de 5 permet de sauvegarder 1 enregistrement de la 25eme classe sur 5
# Evite d'avoir une 25eme classe trop chargée en données (sachant que 1 enregistrement contient au minimum 2 extraits)
RATIO_EMPTY_CLASS = 40
PRED_EMPTY_IGNORE_EXTRACT = 0.6
# minimum duration of record
MINIMAL_DURATION = 0.25
MINIMAL_ANIMAL_PRESENCE = 0.25
FREQ_MODIFIER = 0
validation_split = 0.3
USE_DATA_AUGMENTATION = False
RATIO_DATA_AUG = 2


def compute_class_images_count(base_folder: str, class_name: str):
    return sum((1 for _ in os.listdir(f'{base_folder}/{class_name}')))


def compute_all_classes_images_count(base_folder: str):
    return sum((compute_class_images_count(base_folder, c) for c in destination_classes))


def compute_train_images_count():
    return compute_all_classes_images_count(DATASET_TRAIN_DIRECTORY)


def compute_val_images_count():
    return compute_all_classes_images_count(DATASET_VAL_DIRECTORY)


def compute_total_images_count():
    return compute_val_images_count() + compute_train_images_count()


def compute_class_weight():
    class_weight = {}
    for c in destination_classes:
        class_weight[int(c)] = compute_class_images_count(DATASET_TRAIN_DIRECTORY, c)
        class_weight[int(c)] += compute_class_images_count(DATASET_VAL_DIRECTORY, c)

    # Recuperation de la classe comportortant le moins de data
    key_min = min(class_weight.keys(), key=(lambda k: class_weight[k]))
    to_divide = class_weight[key_min]

    for c in destination_classes:
        class_weight[int(c)] /= to_divide

    return class_weight


## Utils Functions

In [14]:
def save_spectrogramm(d, s, picture_path):
    xx, frequency, bins, im = plt.specgram(d, Fs=s)
    plt.axis('off')
    plt.savefig(picture_path, bbox_inches='tight', pad_inches=0)
    plt.close()
    image = Image.open(picture_path)
    image.convert('RGB').resize((IMAGE_WIDTH, IMAGE_HEIGHT)).save(picture_path)


def save_mel_spectrogramm(d, s, picture_path):
    spec = np.abs(librosa.stft(np.array(d), hop_length=512))
    spec = librosa.amplitude_to_db(spec, ref=np.max)
    librosa.display.specshow(spec, sr=s, cmap='magma')
    plt.axis('off')
    plt.savefig(picture_path, bbox_inches='tight', pad_inches=0)
    image = Image.open(picture_path)
    image.convert('RGB').resize((IMAGE_WIDTH, IMAGE_HEIGHT)).save(picture_path)


def save_random_brig(d, s, picture_patch):
    spec = np.abs(librosa.stft(np.array(d), hop_length=512))
    spec = librosa.amplitude_to_db(spec, ref=np.max)
    librosa.display.specshow(spec, sr=s, cmap='magma')
    plt.axis('off')
    plt.savefig(picture_patch, bbox_inches='tight', pad_inches=0)
    img = Image.open(picture_patch)
    imgArray = np.asarray(img)
    img2 = tf.image.random_brightness(imgArray,0.2)
    finalImag = tf.keras.preprocessing.image.array_to_img(img2)
    finalImag.save(picture_patch)

def load_data(path):
    labels = np.zeros(0, dtype=np.float32)
    data = np.zeros((0, IMAGE_HEIGHT, IMAGE_WIDTH, 4), dtype=np.float32)
    for _, directories, _ in os.walk(path):
        for directory in directories:
            directory_path = os.path.join(path, directory)
            for file in os.listdir(directory_path):
                labels = np.append(labels, int(directory))
                spectro_image = image.imread(os.path.join(directory_path, file))
                spectro_image = np.expand_dims(spectro_image, axis=0)
                data = np.concatenate((data, spectro_image), axis=0)
    return data, labels


def split_array(data_to_split, percent):
    if percent > 1:
        raise Exception("percent parameter need to be between 0 and 1")

    percent_indice = int(len(data_to_split) * percent)
    return np.array([data_to_split[i] for i in range(percent_indice)]), \
           np.array([data_to_split[i] for i in range(percent_indice, len(data_to_split))])


def build_x_y(x, y):
    return x, tf.keras.utils.to_categorical(y, len_classes)


def count_csv_lines(path):
    with open(path, mode='r') as file:
        reader = csv.DictReader(file)
        count = 0
        for _ in reader:
            count += 1
        return count


def plot_all_logs(logs):
    print(logs)
    metrics = ['loss', 'val_loss', 'categorical_accuracy', 'val_categorical_accuracy']
    for metric in metrics:
        for log in logs:
            y_coords = log['value'].history[metric]
            x_coords = list(range(len(y_coords)))
            plt.plot(x_coords, y_coords)
            plt.title(log['title'] + " - " + datetime.now().strftime("%Hh:%Mm:%Ss") + " - " + metric)
            plt.show()


In [15]:
augmentations = [
    Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        AddGaussianSNR()
    ]),
    Compose([
        FrequencyMask()
    ])  # ,
    # TODO : AddBackgroundNoise
]

initial_freq = 48000
metadata_inpath = os.path.join(ORIGINAL_DATASET_DIRECTORY, 'train_tp.csv')
audio_inpath = os.path.join(ORIGINAL_DATASET_DIRECTORY, 'train')
number_extract_created = 0


def determine_class_directory(t_min, t_max, current_duration, duration, species_id, is_train):
    class_directory = ""
    dataset_directory = DATASET_TRAIN_DIRECTORY if is_train else DATASET_VAL_DIRECTORY

    if (t_min <= current_duration <= t_max and t_max - current_duration >= MINIMAL_ANIMAL_PRESENCE) or \
            (t_min <= current_duration + duration <= t_max
             and t_max - (current_duration + duration) >= MINIMAL_ANIMAL_PRESENCE) \
            or (current_duration <= t_min and current_duration + duration >= t_max):

        class_directory = os.path.join(dataset_directory, str(species_id))

    elif USE_EMPTY_CLASS and (current_duration + duration <= t_min or current_duration >= t_max):
        class_directory = os.path.join(dataset_directory, str(len_classes - 1))

    return class_directory


def process_data_and_save_spectrogramm(row_data, is_train):
    global number_extract_created
    current_duration = 0
    it = 0
    to_data_aug = 0
    t_min = row_data["t_min"]
    t_max = row_data["t_max"]
    recording_id = row_data["recording_id"]
    row_species = row_data["species_id"]

    data, sample = sf.read(os.path.join(audio_inpath, recording_id + ".flac"))
    end_audio = (len(data) - 1) / sample

    # print(F"processing {recording_id}, species : {row_species} [{t_min},{t_max}] duration({end_audio / initial_freq})")
    while current_duration <= end_audio:

        duration = DURATION_CUT
        create_empty_extract = False

        # if RANDOM_CUT:
        #  duration += random.randint(0, len())

        class_directory = determine_class_directory(t_min, t_max, current_duration, duration, row_species, is_train)
        is_empty_extract = "24" in class_directory

        if is_empty_extract and number_extract_created % RATIO_EMPTY_CLASS == 0:
            create_empty_extract = True

        if class_directory == "":
            current_duration += duration
            continue

        if is_empty_extract and not create_empty_extract:
            current_duration += duration
            number_extract_created += 1
            continue

        # print(F"Current duration : {current_duration} => Class_directory({class_directory}) )")

        extract_path = os.path.join(class_directory, recording_id)
        extract_path_da = os.path.join(class_directory, recording_id)

        max_duration_size = len(data) - 1 if len(data) <= (int((current_duration + duration) * initial_freq)) \
            else (int((current_duration + duration) * initial_freq))

        save_mel_spectrogramm([data[j] for j in range(int(current_duration * initial_freq),
                                                      max_duration_size)],
                              sample,
                              extract_path + "_" + str(it) + ".png")

        if USE_DATA_AUGMENTATION and is_train is False and to_data_aug % RATIO_DATA_AUG == 0:
            new_data = augmentations[to_data_aug % 2](samples=data, sample_rate=sample)
            extract_path += F"_{str(it)}__{to_data_aug}.png"
            extract_path_da += F"_{str(it)}__{to_data_aug}_.png"

            save_mel_spectrogramm([new_data[j] for j in range(int(current_duration * initial_freq),
                                                              max_duration_size)],
                                  sample,
                                  extract_path)
            save_random_brig([new_data[j] for j in range(int(current_duration * initial_freq),
                                                         max_duration_size)],
                             sample,
                             extract_path_da)

            to_data_aug += 1

        current_duration += duration
        it += 1
        number_extract_created += 1

    # print(F"{end_audio - current_duration} >= Minimal duration ?? )")
    if end_audio - current_duration >= MINIMAL_DURATION:
        duration = DURATION_CUT
        row_species = row_data["species_id"]

        class_directory = determine_class_directory(t_min, t_max, current_duration, duration, row_species, is_train)
        if class_directory != "":
            extract_path = os.path.join(class_directory, recording_id)

            max_duration_size = len(data) - 1 if len(data) <= (int(end_audio * initial_freq)) \
                else (int(end_audio * initial_freq))

            save_mel_spectrogramm([data[i] for i in range(int(current_duration * initial_freq)
                                                          , max_duration_size)],
                                  sample,
                                  extract_path + "_r.png")
            number_extract_created += 1


def create_spectro_dataset():
    table_tp = pd.read_csv(metadata_inpath).sort_values("recording_id")

    df_train, df_test, _, _ = train_test_split(table_tp,
                                               table_tp["species_id"],
                                               test_size=validation_split,
                                               random_state=50,
                                               stratify=table_tp["species_id"])
    counter = 0
    for index, row in df_train.iterrows():
        print(F"{counter}/{len(df_train.index)}")
        process_data_and_save_spectrogramm(row, is_train=True)
        counter += 1

    counter = 0
    for index, row in df_test.iterrows():
        print(F"{counter}/{len(df_test.index)}")
        process_data_and_save_spectrogramm(row, is_train=False)
        counter += 1

    print('100%')


In [16]:
test_path = os.path.join(ORIGINAL_DATASET_DIRECTORY, 'test')
initial_freq = 48000


def create_test_spectro_dataset():
    one_percent = int(sum([len(files) for r, d, files in os.walk(test_path)]) / 100)
    percent = 0
    line_count = 0
    for file in os.listdir(test_path):
        file_path = (os.path.join(test_path, file))
        data, sample = sf.read(file_path)
        end_audio = (len(data) - 1) / sample

        directory_music = os.path.join(DATASET_TEST_DIRECTORY, file.replace(".flac", ""))
        if not os.path.isdir(directory_music):
            os.mkdir(directory_music)
        new_file_path = (os.path.join(directory_music, file.replace(".flac", "")))

        if line_count % one_percent == 0:
            if percent % PERCENT_PRINT == 0:
                print(str(percent) + "%")
            percent += 1

        duration = DURATION_CUT
        # if RANDOM_CUT:
        #  duration += random.randint(0, len())

        current_duration = 0
        it = 0
        while current_duration <= end_audio:
            max_duration_size = len(data) - 1 if len(data) <= (int((current_duration + duration) * initial_freq)) \
                else (int((current_duration + duration) * initial_freq))

            save_spectrogramm([data[j] for j in range(int(current_duration * initial_freq),
                                                      max_duration_size)],
                              sample, new_file_path + "_" + str(it) + ".png")
            current_duration += duration
            it += 1

        if end_audio - current_duration >= MINIMAL_DURATION:
            max_duration_size = len(data) - 1 if len(data) <= (int(end_audio * initial_freq)) \
                else (int(end_audio * initial_freq))

            save_spectrogramm([data[i] for i in range(int(current_duration * initial_freq)
                                                      , max_duration_size)],
                              sample, new_file_path + "_r.png")

        line_count += 1

    print("100%")

In [28]:
dataset_type = [
    "val_train",
    "test"
]


def clean_dataset(dataset):
    if dataset == dataset_type[0]:
        for c in destination_classes:
            folder = f'{DATASET_TRAIN_DIRECTORY}/{c}'
            if os.path.exists(folder):
                for filename in os.listdir(folder):
                    os.remove(f'{folder}/{filename}')

            folder = f'{DATASET_VAL_DIRECTORY}/{c}'
            if os.path.exists(folder):
                for filename in os.listdir(folder):
                    os.remove(f'{folder}/{filename}')
    else:
        for folder in os.listdir(DATASET_TEST_DIRECTORY):
            shutil.rmtree(os.path.join(DATASET_TEST_DIRECTORY, folder), ignore_errors=True)


def clean_or_create_empty_folder():
    empty_paths = [os.path.join(DATASET_VAL_DIRECTORY, "24"), os.path.join(DATASET_TRAIN_DIRECTORY, "24")]

    if USE_EMPTY_CLASS:
        for path in empty_paths:
            if not os.path.exists(path):
                os.mkdir(path)
    else:
        for path in empty_paths:
            if os.path.exists(path):
                os.rmdir(path)


def clean_and_create_dataset(dataset):
    if dataset == dataset_type[0]:
        if not os.path.isdir(DATASET_DIRECTORY):
            os.mkdir(DATASET_DIRECTORY)
        if not os.path.isdir(DATASET_TRAIN_DIRECTORY):
            os.mkdir(DATASET_TRAIN_DIRECTORY)
        if not os.path.isdir(DATASET_VAL_DIRECTORY):
            os.mkdir(DATASET_VAL_DIRECTORY)
    else:
        if not os.path.isdir(DATASET_TEST_DIRECTORY):
            os.mkdir(DATASET_TEST_DIRECTORY)

    print(F"Cleaning {dataset} dataset...")
    clean_dataset(dataset)

    clean_or_create_empty_folder()

    print(F"Creating {dataset} dataset...")

    if dataset == dataset_type[0]:
        create_spectro_dataset()
    else:
        create_test_spectro_dataset()



# Change dataset_type pour generer test_val ou test
# 0 == train & val
# 1 == test
clean_and_create_dataset(dataset_type[0])

Cleaning val_train dataset...
Creating val_train dataset...


FileNotFoundError: [Errno 2] File C:\Users\quent\Desktop\ESGI\DL\Species audio detection\Jupyter\dataset\rfcx-species-audio-detection\train_tp.csv does not exist: 'C:\\Users\\quent\\Desktop\\ESGI\\DL\\Species audio detection\\Jupyter\\dataset\\rfcx-species-audio-detection\\train_tp.csv'