# Projet

## Imports

In [1]:
import copy
import wave
from pathlib import Path
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Input, Conv1D, AvgPool1D, MaxPool1D, ZeroPadding1D, BatchNormalization, Flatten, Dense, Activation
from keras.utils.np_utils import to_categorical
import os
from download_dataset import get_bird_json, download_from_bird_json_infos
from prepare_and_clean_dataset import split_audio_file
import librosa

## Récupération des données

In [2]:
# parameters
main_bird = "Parus major"
main_bird_quality = ("A", "B", "C")
number_of_main_bird_recordings = 100

test_bird_1 = "Turdus merula"
test_bird_1_quality = ("A", "B", "C")
number_of_test_bird_1_recordings = 100

test_bird_2 = "Fringilla coelebs"
test_bird_2_quality = ("A", "B", "C")
number_of_test_bird_2_recordings = 100

recordings_folder = "recordings"

# dl data
main_bird_recordings_json = get_bird_json(main_bird, main_bird_quality, number_of_main_bird_recordings)
download_from_bird_json_infos(recordings_folder, main_bird_recordings_json)

test_bird_1_recordings_json = get_bird_json(test_bird_1, test_bird_1_quality, number_of_test_bird_1_recordings)
download_from_bird_json_infos(recordings_folder, test_bird_1_recordings_json)

test_bird_2_recordings_json = get_bird_json(test_bird_2, test_bird_2_quality, number_of_test_bird_2_recordings)
download_from_bird_json_infos(recordings_folder, test_bird_2_recordings_json)

# clean data

print("splitting audio into multiple recordings")
for bird_type in os.listdir(recordings_folder):
    bird_folder = os.path.join(recordings_folder, bird_type)
    print("looking into folder:", bird_folder)
    if os.path.isdir(bird_folder):
        number_of_recordings = len(os.listdir(bird_folder))
        for index, recording in enumerate(os.listdir(bird_folder)):
            print("splitting recording", str(index+1), recording, "out of", number_of_recordings, "for bird", bird_type)
            recording_path = os.path.join(bird_folder, recording)
            split_audio_file(recording_path, bird_folder, recording.split(".")[0], 3)




Getting recordings data for Parus+major with quality A
Getting recordings data for Parus+major with quality B
Getting recordings data for Parus+major with quality C
Downloading file: 1 out of 100 ( XC795108-230404_11Parus-major-song-10.30-Camiño-galegas,-Biobra,-Rubiá.mp3 )
Downloading file: 2 out of 100 ( XC794967-230403_41-Parus-major-14.30-Covas,-Rubiá.mp3 )
Downloading file: 3 out of 100 ( XC794645-MVI_5842.mp3 )
Downloading file: 4 out of 100 ( XC793834-pmajor_230415_aluvium.wav )
Downloading file: 5 out of 100 ( XC793240-230413_1596-1739-Kohlmeise.-CZ,-Becov-nad-Teplou.-Stephan-Risch.wav )
Downloading file: 6 out of 100 ( XC793160-Kohlmeise--Würmseeplatz--München.mp3 )
Downloading file: 7 out of 100 ( XC793158-Kohlmeise--Ambacher-Straße--München.mp3 )
Downloading file: 8 out of 100 ( XC792834-Tit,-Great-(Nové-Mlyny)-19.4.22-SF.mp3 )
Downloading file: 9 out of 100 ( XC792650-20230411_171921-cinciallegra.mp3 )
Downloading file: 10 out of 100 ( XC792271-P.-major--230406_012б-(3-50),

## Création des jeux de données

In [39]:
# create train, test and text pointers
recordings_folder = "recordings"
main_bird = "Parus major"

file_name = "testing_list.txt"

count = 0
with open(os.path.join(recordings_folder, file_name), "w", encoding="utf-8") as f:
    for bird_type in os.listdir(recordings_folder):
        bird_folder = os.path.join(recordings_folder, bird_type)
        if os.path.isdir(bird_folder):
            for recording in os.listdir(bird_folder):
                sound_filename = os.path.join(bird_folder, recording)
                if os.path.isfile(sound_filename) and "splitted_" in sound_filename:
                    if count < 1200:
                        if np.random.rand() > 0.3:
                            f.write(sound_filename + "\n")
                            count += 1
            count = 0
print("Done")

Done


## Création des jeux de données

In [40]:
dataset_dir = Path('recordings')

CLASSES = ["Parus_major", "Turdus_merula", "Fringilla_coelebs"]

with (dataset_dir/ 'testing_list.txt').open(encoding='utf-8') as f:
    testing_list = f.read().splitlines()

x_train = []
y_train = []
x_test = []
y_test = []

for recording in dataset_dir.glob('**/*.wav'):
    if not recording.parent.name in CLASSES:
        continue
    if "splitted_" not in str(recording):
        continue
    label = CLASSES.index(recording.parent.name)

    with wave.open(str(recording)) as f :
        data = np.frombuffer(f.readframes(f.getnframes()), dtype=np.int16).copy()

    data = data.astype(np.float32)
    data.resize((16000, 1))

    if "splitted_" in str(recording):
        if str(recording) in testing_list:
            x_test.append(data)
            y_test.append(label)
        else:
            x_train.append(data)
            y_train.append(label)

x_train = np.array(x_train)
y_train = to_categorical(np.array(y_train))
x_test = np.array(x_test)
y_test = to_categorical(np.array(y_test))

## Normalize data

In [41]:
x_mean = x_train.mean()
x_std = x_train.std()

x_train -= x_mean
x_test -= x_mean
x_train /= x_std
x_test /= x_std

## Exporter les données

In [42]:
np.savetxt('x_test.csv', x_test.reshape(x_test.shape[0], -1), delimiter=',', fmt='%s')
np.savetxt('y_test.csv', y_test, delimiter=',', fmt='%s')

## Build model M5

In [47]:
# Modifier

model = Sequential()
model.add(Input(shape=(16000, 1)))
# model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
# model.add(Conv1D(filters=128, kernel_size=80, activation='relu'))
# model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
# model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
# model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
# model.add(Conv1D(filters=32, kernel_size=7, activation='relu'))
# model.add(MaxPool1D(pool_size=4, strides=3, padding='valid'))
# model.add(AvgPool1D())
model.add(Conv1D(filters=128, kernel_size=80, strides=4, activation='relu'))
model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
model.add(Conv1D(filters=512, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, strides=1, padding='valid'))
model.add(AvgPool1D())
model.add(Flatten())
model.add(Dense(units=3))
model.add(Activation('softmax'))  # SoftMax activation needs to be separate from Dense to remove it later on
# EXPLORE Learning Rate
opt = tf.keras.optimizers.Adam(learning_rate=10e-3)
model.summary()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_36 (Conv1D)          (None, 3981, 128)         10368     
                                                                 
 max_pooling1d_38 (MaxPoolin  (None, 3978, 128)        0         
 g1D)                                                            
                                                                 
 conv1d_37 (Conv1D)          (None, 3976, 128)         49280     
                                                                 
 max_pooling1d_39 (MaxPoolin  (None, 3973, 128)        0         
 g1D)                                                            
                                                                 
 conv1d_38 (Conv1D)          (None, 3971, 256)         98560     
                                                                 
 max_pooling1d_40 (MaxPoolin  (None, 3968, 256)      

## Train model

In [48]:
model.fit(x_train, y_train, epochs=5, batch_size=10, validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x134a2b480d0>

## Evaluate model on test dataset

In [49]:
model.evaluate(x_test, y_test, verbose=2)
pred_test = model.predict(x_test)
print(tf.math.confusion_matrix(y_test.argmax(axis=1), pred_test.argmax(axis=1)))

113/113 - 75s - loss: 1.5546 - categorical_accuracy: 0.3333 - 75s/epoch - 662ms/step
tf.Tensor(
[[   0 1200    0]
 [   0 1200    0]
 [   0 1200    0]], shape=(3, 3), dtype=int32)


## Save trained model

In [11]:
model.save('lab_gsc.h5')

## Remove SoftMax layer

In [59]:
model = tf.keras.Model(model.input, model.layers[-2].output, name=model.name)

## Install MicroAI for C inference code generation (kerascnn2c module)

In [60]:
!pip install https://bitbucket.org/edge-team-leat/microai_public/get/6adfbcb347d3.zip#subdirectory=third_party/kerascnn2c_fixed
import kerascnn2c

Collecting https://bitbucket.org/edge-team-leat/microai_public/get/6adfbcb347d3.zip#subdirectory=third_party/kerascnn2c_fixed
  Downloading https://bitbucket.org/edge-team-leat/microai_public/get/6adfbcb347d3.zip (1.9 MB)
     ---------------------------------------- 1.9/1.9 MB 4.1 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: kerascnn2c
  Building wheel for kerascnn2c (setup.py): started
  Building wheel for kerascnn2c (setup.py): finished with status 'done'
  Created wheel for kerascnn2c: filename=kerascnn2c-1.0.0-py3-none-any.whl size=21348 sha256=a72fad2a5a6a6c183f5bf47a13180b1d0e52ece833b9fcbebe52d0117e0f24fa
  Stored in directory: C:\Users\Vinh\AppData\Local\Temp\pip-ephem-wheel-cache-4rvvczlm\wheels\29\df\9b\d62a64e871a29555dc13bc0c189d46297cdf80a3332230aaa1
Successfully built kerascnn2c
Installing collected packages: kerascnn2c
Successfully installed kerascnn2c-1.0.0



## Generate C code for the trained model with 16-bit fixed-point representation

In [61]:
res = kerascnn2c.Converter(output_path=Path('gsc_output_fixed'),
                           fixed_point=9, # Number of bits for the fractional part, Q7.9 format
                           number_type='int16_t', # Data type for weights/activations (16 bits quantization)
                           long_number_type='int32_t', # Data type for intermediate results
                           number_min=-(2**15), # Minimum value for 16-bit signed integers
                           number_max=(2**15)-1 # Maximum value for 16-bit signed integers
                          ).convert_model(copy.deepcopy(model))
with open('gsc_model_fixed.h', 'w') as f:
    f.write(res)

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\conv1d
......vars
.........0
.........1
...layers\conv1d_1
......vars
.........0
.........1
...layers\conv1d_2
......vars
.........0
.........1
...layers\dense
......vars
.........0
.........1
...layers\flatten
......vars
...layers\input_layer
......vars
...layers\max_pooling1d
......vars
...layers\max_pooling1d_1
......vars
...layers\max_pooling1d_2
......vars
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-04-23 16:54:45         3844
metadata.json                                  2023-04-23 16:54:45           64
variables.h5                                   2023-04-23 16:54:45       335984
Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-04-23 16:54:44         3844
metadata.json  

## Compile the 16-bit fixed-point C code for x86 and evaluate on small dataset

In [62]:
!g++ -Wall -Wextra -pedantic -Ofast -o gsc_fixed -Igsc_output_fixed/ gsc_output_fixed/model.c main.cpp
!./gsc_fixed x_test_gsc_250.csv y_test_gsc_250.csv

gsc_output_fixed/model.c: In function â€˜void cnn(const number_t (*)[16000], number_t*)â€™:
  114 |     activations2.max_pooling1d_68_output,
      |     ~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~
cc1plus: fatal error: main.cpp: No such file or directory
compilation terminated.
'.' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.
