# Projet

## Imports

In [2]:
import copy
import wave
from pathlib import Path
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Input, Conv1D, AvgPool1D, MaxPool1D, ZeroPadding1D, BatchNormalization, Flatten, Dense, Activation
from keras.utils.np_utils import to_categorical
import os
import requests
import librosa
import soundfile as sf


## Récupération des données

In [3]:
def download_file(link, directory, filename):
    if os.path.exists(os.path.join(directory, filename)):
        print("File exists, skipping")
        return

    if not os.path.exists(directory):
        path = directory.split(os.sep)
        for i in range(1, len(path) + 1):
            sub_dir = os.path.join(*path[:i])
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)

    with open(os.path.join(directory, filename), 'wb') as f:
        f.write(requests.get(link).content)
    f.close()


def get_bird_json(bird_name, recording_quality, number_of_recordings):
    api_base_link = "https://xeno-canto.org/api/2/recordings?query="
    quality_header = "q:"
    bird_name = "+".join(bird_name.split(" "))

    final_json = []

    for quality in recording_quality:
        print("Getting recordings data for", bird_name, "with quality", quality)
        api_link = api_base_link + bird_name + "+" + quality_header + quality
        json_recording = requests.get(api_link).json()["recordings"]
        final_json.extend(json_recording)

    final_json = [recording for recording in final_json if "song" in recording["type"]]
    return final_json[:number_of_recordings]


def download_from_bird_json_infos(recordings_folder, bird_json):
    len_bird_recordings = len(bird_json)
    for index, recording in enumerate(bird_json):
        print("Downloading file:", index + 1, "out of", len_bird_recordings, "(", recording["file-name"], ")")
        download_file(recording["file"], os.path.join(recordings_folder, recording["gen"] + "_" + recording["sp"]),
                      recording["q"] + "_" + recording["file-name"])

In [4]:
def split_audio_file(audio_file_path, output_folder, output_file_name, split_length=1):
    # if filename starts with splitted_ then skip
    if output_file_name.startswith("splitted_"):
        print("file already splitted, skipping")
        return
    if any("splitted_"+output_file_name in f for f in os.listdir(output_folder)):
        print("file already exists, skipping")
        return
    audio_signal, sample_rate = librosa.load(audio_file_path, sr=None)

    len_audio_signal = len(audio_signal)

    split_length_samples = split_length * sample_rate
    audio_signal = audio_signal[:len_audio_signal - len_audio_signal % split_length_samples]
    if len(audio_signal) > split_length:
        audio_signal = np.split(audio_signal, len(audio_signal) / split_length_samples)

    number_of_files = str(int(len_audio_signal / sample_rate))
    for index, y_split in enumerate(audio_signal):
        sf.write(
            os.path.join(output_folder,
                         "splitted_" + output_file_name + "_" + str(index + 1) + "_of_" + number_of_files + ".wav"),
            y_split, sample_rate)
    # os.remove(audio_file_path)


In [55]:
# parameters
main_bird = "Parus major"
main_bird_quality = ("A", "B", "C")
number_of_main_bird_recordings = 200

test_bird_1 = "Turdus merula"
test_bird_1_quality = ("A", "B", "C")
number_of_test_bird_1_recordings = 200

test_bird_2 = "Fringilla coelebs"
test_bird_2_quality = ("A", "B", "C")
number_of_test_bird_2_recordings = 200

recordings_folder = "recordings"

# dl data
main_bird_recordings_json = get_bird_json(main_bird, main_bird_quality, number_of_main_bird_recordings)
download_from_bird_json_infos(recordings_folder, main_bird_recordings_json)

test_bird_1_recordings_json = get_bird_json(test_bird_1, test_bird_1_quality, number_of_test_bird_1_recordings)
download_from_bird_json_infos(recordings_folder, test_bird_1_recordings_json)

test_bird_2_recordings_json = get_bird_json(test_bird_2, test_bird_2_quality, number_of_test_bird_2_recordings)
download_from_bird_json_infos(recordings_folder, test_bird_2_recordings_json)

# clean data

print("splitting audio into multiple recordings")
for bird_type in os.listdir(recordings_folder):
    bird_folder = os.path.join(recordings_folder, bird_type)
    print("looking into folder:", bird_folder)
    if os.path.isdir(bird_folder):
        number_of_recordings = len(os.listdir(bird_folder))
        for index, recording in enumerate(os.listdir(bird_folder)):
            print("splitting recording", str(index+1), recording, "out of", number_of_recordings, "for bird", bird_type)
            recording_path = os.path.join(bird_folder, recording)
            split_audio_file(recording_path, bird_folder, recording.split(".")[0], 3)




[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
splitting recording 5447 splitted_A_XC783413-Turdus-merula_2023_78_of_375.wav out of 5895 for bird Turdus_merula
file already splitted, skipping
splitting recording 5448 splitted_A_XC770775-Merel-zang---Bodegraven---2022-04-03---0915-copy_108_of_707.wav out of 5895 for bird Turdus_merula
file already splitted, skipping
splitting recording 5449 splitted_A_XC788378-Amsel_Aichtal_20230319_1853_Gesang_6_of_70.wav out of 5895 for bird Turdus_merula
file already splitted, skipping
splitting recording 5450 splitted_A_XC772543-55_25_of_114.wav out of 5895 for bird Turdus_merula
file already splitted, skipping
splitting recording 5451 splitted_A_XC788737-turdus-merula_2023_16_of_142.wav out of 5895 for bird Turdus_merula
file already splitted, skipping
splitting recording 5452 splitted_A_XC787811-AMSEL,-dawn--23_106_of_1931.wav out of 5895 for bird Turdus_merula
file already splitted, skipping
splitting 

## Création des jeux de données

In [56]:
# create train, test and text pointers
recordings_folder = "recordings"
main_bird = "Parus major"

file_name = "testing_list.txt"

count = 0
with open(os.path.join(recordings_folder, file_name), "w", encoding="utf-8") as f:
    for bird_type in os.listdir(recordings_folder):
        bird_folder = os.path.join(recordings_folder, bird_type)
        if os.path.isdir(bird_folder):
            for recording in os.listdir(bird_folder):
                sound_filename = os.path.join(bird_folder, recording)
                if os.path.isfile(sound_filename) and "splitted_" in sound_filename:
                  if count < 1000:
                        if np.random.rand() > 0.3:
                            f.write(sound_filename + "\n")
                            count += 1
            count = 0
print("Done")

Done


## Création des jeux de données

In [57]:
dataset_dir = Path('recordings')

CLASSES = ["Parus_major", "Turdus_merula", "Fringilla_coelebs"]

with (dataset_dir/ 'testing_list.txt').open(encoding='utf-8') as f:
    testing_list = f.read().splitlines()

x_train = []
y_train = []
x_test = []
y_test = []

for recording in dataset_dir.glob('**/*.wav'):
    if not recording.parent.name in CLASSES:
        continue
    if "splitted_" not in str(recording):
        continue
    label = CLASSES.index(recording.parent.name)

    with wave.open(str(recording)) as f :
        data = np.frombuffer(f.readframes(f.getnframes()), dtype=np.int16).copy()

    data = data.astype(np.float32)
    data.resize((16000, 1))

    if "splitted_" in str(recording):
        if str(recording) in testing_list:
            x_test.append(data)
            y_test.append(label)
        elif y_train.count(label) < 2400:
            x_train.append(data)
            y_train.append(label)

x_train = np.array(x_train)
y_train = to_categorical(np.array(y_train))
x_test = np.array(x_test)
y_test = to_categorical(np.array(y_test))

In [58]:
print(x_test.shape)
print(y_test.shape)
print(x_train.shape)
print(y_train.shape)

(3000, 16000, 1)
(3000, 3)
(7200, 16000, 1)
(7200, 3)


In [None]:
unique_labels, label_counts = np.unique(y_train, return_counts=True)
for label, count in zip(unique_labels, label_counts):
    print(f"Label {label}: {count} occurrences")

## Normalize data

In [59]:
x_mean = x_train.mean()
x_std = x_train.std()

x_train -= x_mean
x_test -= x_mean
x_train /= x_std
x_test /= x_std

## Exporter les données

In [60]:
np.savetxt('x_test.csv', x_test.reshape(x_test.shape[0], -1), delimiter=',', fmt='%s')
np.savetxt('y_test.csv', y_test, delimiter=',', fmt='%s')

## Build model M5

In [61]:
# Modifier

model = Sequential()
model.add(Input(shape=(16000, 1)))
model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
model.add(Conv1D(filters=128, kernel_size=80, activation='relu'))
model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
model.add(Conv1D(filters=32, kernel_size=7, activation='relu'))
model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
model.add(AvgPool1D())
# model.add(Conv1D(filters=128, kernel_size=80, strides=4, activation='relu'))
# model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
# model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
# model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
# model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))
# model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
# model.add(Conv1D(filters=512, kernel_size=3, activation='relu'))
# model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
# model.add(AvgPool1D())
model.add(Flatten())
model.add(Dense(units=3))
model.add(Activation('softmax'))  # SoftMax activation needs to be separate from Dense to remove it later on
# EXPLORE Learning Rate
opt = tf.keras.optimizers.Adam(learning_rate=10e-4)
model.summary()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 max_pooling1d_20 (MaxPoolin  (None, 5333, 1)          0         
 g1D)                                                            
                                                                 
 conv1d_20 (Conv1D)          (None, 5254, 128)         10368     
                                                                 
 max_pooling1d_21 (MaxPoolin  (None, 1751, 128)        0         
 g1D)                                                            
                                                                 
 conv1d_21 (Conv1D)          (None, 1749, 64)          24640     
                                                                 
 max_pooling1d_22 (MaxPoolin  (None, 582, 64)          0         
 g1D)                                                            
                                                      

## Train model

In [62]:
model.fit(x_train, y_train, epochs=5, batch_size=100, validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f238b7ea1f0>

## Evaluate model on test dataset

In [63]:
model.evaluate(x_test, y_test, verbose=2)
pred_test = model.predict(x_test)
print(tf.math.confusion_matrix(y_test.argmax(axis=1), pred_test.argmax(axis=1)))

94/94 - 1s - loss: 0.8692 - categorical_accuracy: 0.5900 - 1s/epoch - 13ms/step
tf.Tensor(
[[455 151 394]
 [105 622 273]
 [130 177 693]], shape=(3, 3), dtype=int32)


## Save trained model

In [64]:
model.save('lab_gsc.h5')

## Remove SoftMax layer

In [65]:
model = tf.keras.Model(model.input, model.layers[-2].output, name=model.name)

## Install MicroAI for C inference code generation (kerascnn2c module)

In [66]:
!pip install https://bitbucket.org/edge-team-leat/microai_public/get/6adfbcb347d3.zip#subdirectory=third_party/kerascnn2c_fixed
import kerascnn2c

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting https://bitbucket.org/edge-team-leat/microai_public/get/6adfbcb347d3.zip#subdirectory=third_party/kerascnn2c_fixed
  Downloading https://bitbucket.org/edge-team-leat/microai_public/get/6adfbcb347d3.zip (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: kerascnn2c
  Building wheel for kerascnn2c (setup.py) ... [?25l[?25hdone
  Created wheel for kerascnn2c: filename=kerascnn2c-1.0.0-py3-none-any.whl size=21352 sha256=dc04e03924450f41b58b4325dff6a7343b528ddaa78861644d1112a6a5d3d87b
  Stored in directory: /tmp/pip-ephem-wheel-cache-ayyb125c/wheels/3c/1c/42/9f345b7c4830c9565167986fa8eeeb048f713a52913c845c66
Successfully built kerascnn2c
Installing collected packages: kerascnn2c
Successfully installed kerascnn

## Generate C code for the trained model with 16-bit fixed-point representation

In [67]:
res = kerascnn2c.Converter(output_path=Path('gsc_output_fixed'),
                           fixed_point=9, # Number of bits for the fractional part, Q7.9 format
                           number_type='int16_t', # Data type for weights/activations (16 bits quantization)
                           long_number_type='int32_t', # Data type for intermediate results
                           number_min=-(2**15), # Minimum value for 16-bit signed integers
                           number_max=(2**15)-1 # Maximum value for 16-bit signed integers
                          ).convert_model(copy.deepcopy(model))
with open('gsc_model_fixed.h', 'w') as f:
    f.write(res)

———————————————————————————————————————————————————————————————————————————————————————————————————————
Inputs                           | Layer                            | Outputs                         
———————————————————————————————————————————————————————————————————————————————————————————————————————
                                 | input_6                          | max_pooling1d_20                
-------------------------------------------------------------------------------------------------------
input_6                          | max_pooling1d_20                 | conv1d_20                       
-------------------------------------------------------------------------------------------------------
max_pooling1d_20                 | conv1d_20                        | max_pooling1d_21                
-------------------------------------------------------------------------------------------------------
conv1d_20                        | max_pooling1d_21                 

## Compile the 16-bit fixed-point C code for x86 and evaluate on small dataset

In [None]:
!g++ -Wall -Wextra -pedantic -Ofast -o gsc_fixed -Igsc_output_fixed/ gsc_output_fixed/model.c main.cpp
!./gsc_fixed x_test_gsc_250.csv y_test_gsc_250.csv

gsc_output_fixed/model.c: In function â€˜void cnn(const number_t (*)[16000], number_t*)â€™:
  114 |     activations2.max_pooling1d_68_output,
      |     ~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~
cc1plus: fatal error: main.cpp: No such file or directory
compilation terminated.
'.' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.
