# Project

## Imports

In [1]:
import copy
import socket
import wave
from pathlib import Path
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Input, Conv1D, AvgPool1D, MaxPool1D, ZeroPadding1D, BatchNormalization, Flatten, Dense, Activation
from keras.activations import softmax
from keras.utils import get_file
from keras.utils import to_categorical
import xenocanto
import random
import os
import librosa
import soundfile
from pydub import AudioSegment 

## Download, cache and extract birds data from Xeno-Canto

In [2]:
birds = ['Passer domesticus','Emberiza calandra','Fringilla coelebs']
dataset_dir = Path('dataset')
CLASSES = []
if not (dataset_dir).exists(): # Assume dataset already downloaded/extracted if directory is present
    for bird in birds : 
        xenocanto.metadata([bird,"type:song","q:A"])
        xenocanto.download([bird,"type:song","q:B"], 2)
        await xenocanto.download([bird,"type:song","q:A"], 2)
        await xenocanto.download([bird,"type:song","q:B"], 2)

# Remove silent parts

In [3]:
if not (dataset_dir/'testing_list.txt').exists():
    CLASSES = [c for c in os.listdir(dataset_dir/"audio") if os.path.isdir(dataset_dir/"audio"/c)]
    
    for c in CLASSES:
        files = [f for f in os.listdir(dataset_dir/"audio"/c) if f.endswith('.mp3')]
        for file in files:
            try:
                waveform, sample_rate = librosa.load(dataset_dir/"audio"/c/str(file))
                print(dataset_dir/"audio"/c/str(file))
                waveform = librosa.effects.trim(waveform, top_db=10)[0]
                soundfile.write("dataset/audio/"+c+"/"+str(file), waveform, sample_rate)
                
            except Exception as e:
                os.remove(str(dataset_dir)+"/audio/"+c+"/"+str(file))
                print(f"Error processing {file}: {e}")

## Train & Test

In [4]:
CLASSES = [c for c in os.listdir(dataset_dir/"audio") if os.path.isdir(dataset_dir/"audio"/c)]

if not (dataset_dir/'testing_list.txt').exists():
    
    for c in CLASSES:
        files = [f for f in os.listdir(dataset_dir/"audio"/c) if f.endswith('.mp3')]
        for file in files:
            try:
                sound = AudioSegment.from_mp3(dataset_dir/"audio"/c/str(file))
                sound.export(dataset_dir/"audio"/c/str(file).replace('.mp3','.wav'), format="wav")
                os.remove(dataset_dir/"audio"/c/str(file))
            except Exception as e:
                os.remove(dataset_dir/"audio"/c/str(file))
                print(f"Error processing {file}: {e}")
    
num_rec = min(len(os.listdir(os.path.join(dataset_dir, "audio", c))) for c in CLASSES)
num_test = int(num_rec * 0.1)

for c in CLASSES :
    files = os.listdir(dataset_dir/"audio"/c)
    for f in files[num_rec:] :
        os.remove(dataset_dir/"audio"/c/f)
os.open(dataset_dir/'testing_list.txt', os.O_CREAT)
os.open(dataset_dir/'validation_list.txt', os.O_CREAT)
for c in CLASSES :
    recs = [ rec for rec in os.listdir(dataset_dir/'audio'/c) if rec.endswith('.wav') ]
    randomrecs = random.sample(recs, num_test*2)
    for rec in randomrecs:
        if randomrecs.index(rec) < num_test:
                with open(dataset_dir/'testing_list.txt', 'a') as f:
                    f.write(c + '/' + rec +'\n')
        else : 
            with open(dataset_dir/'validation_list.txt', 'a') as f:
                    f.write(c + '/' + rec +'\n')

with (dataset_dir/'testing_list.txt').open() as f:
    testing_list = f.read().splitlines()
    
x_train = []
y_train = []
x_test = []
y_test = []
audiopath = dataset_dir/'audio'

for recording in audiopath.glob(f'**/*.wav'):
    if recording.parent.name not in CLASSES:
        continue
    
    label = CLASSES.index(recording.parent.name)
    with wave.open(str(recording)) as f:
        data = np.frombuffer(f.readframes(f.getnframes()), dtype=np.int16).copy()
    
    data = data.astype(np.float32)
    data.resize((16000, 1))
    
    if str(recording.relative_to(audiopath)).replace('\\','/') in testing_list:
        x_test.append(data)
        y_test.append(label)
    else:
        x_train.append(data)
        y_train.append(label)

x_train = np.array(x_train)
y_train = to_categorical(np.array(y_train))
x_test = np.array(x_test)
y_test = to_categorical(np.array(y_test))


In [5]:


if not (dataset_dir/'testing_list.txt').exists(): # Assume dataset already downloaded/extracted if testing list is present

    CLASSES = os.listdir(dataset_dir/"audio")
    
    for c in CLASSES :
        files = os.listdir(dataset_dir/"audio"/c)
        for file in files:
            if file.endswith('.wav'):
                try:
                    wave.open(str(dataset_dir)+"/audio/"+c+"/"+str(file)) 
                except:
                    os.remove(str(dataset_dir)+"/audio/"+c+"/"+str(file)) 
                
    numOfrec =min([len(os.listdir(dataset_dir/"audio"/c)) for c in CLASSES])
    num_test = int(numOfrec*0.1)
    for c in CLASSES :
        files = os.listdir(dataset_dir/"audio"/c)
        for f in files[numOfrec:] :
            os.remove(dataset_dir/"audio"/c/f)
    os.open(dataset_dir/'testing_list.txt', os.O_CREAT)
    os.open(dataset_dir/'validation_list.txt', os.O_CREAT)
    for c in CLASSES :
        recs = [ rec for rec in os.listdir(dataset_dir/'audio'/c) if rec.endswith('.wav') ]
        randomrecs = random.sample(recs, num_test*2)
        for rec in randomrecs:
            if randomrecs.index(rec) < num_test:
                 with open(dataset_dir/'testing_list.txt', 'a') as f:
                        f.write(c + '/' + rec +'\n')
            else : 
                with open(dataset_dir/'validation_list.txt', 'a') as f:
                        f.write(c + '/' + rec +'\n')
# Classes to handle, ordered by label
with (dataset_dir/'testing_list.txt').open() as f:
    testing_list = f.read().splitlines()
CLASSES = os.listdir(dataset_dir/"audio")
x_train = []
y_train = []
x_test = []
y_test = []
audiopath = dataset_dir/'audio'
for recording in audiopath.glob(f'**/*.wav'):
    if not recording.parent.name in CLASSES: # Ignore unused classes
        continue
    
    label = CLASSES.index(recording.parent.name) # Assign class number
    with wave.open(str(recording)) as f: # Read wave file
        data = np.frombuffer(f.readframes(f.getnframes()), dtype=np.int16).copy() # As 16-bit signed integer

    data = data.astype(np.float32) # Convert to 32-bit floating-point
    data.resize((16000, 1)) # Resize to 2s (10kHz) with zero-padding, 1 channel
    if str(recording.relative_to(audiopath)).replace('\\','/') in testing_list: # Assign to test set if file in test list
        x_test.append(data)
        y_test.append(label)
    else:
        x_train.append(data)
        y_train.append(label)

print(f'Loaded {len(x_train)} training samples and {len(x_test)} testing samples')
x_train = np.array(x_train)
y_train = to_categorical(np.array(y_train))
x_test = np.array(x_test)
y_test = to_categorical(np.array(y_test))

Loaded 1293 training samples and 231 testing samples


## Prepare for inference with fixed-point Q7.9 samples by scaling input data accordingly

In [6]:
FIXED_POINT = 9
x_train /= 2**FIXED_POINT
x_test  /= 2**FIXED_POINT

## Export small dataset (250 random vectors)

In [7]:
perms = np.random.permutation(len(y_test))[0:250]
x_test_250 = x_test[perms]
y_test_250 = y_test[perms]
np.savetxt('x_test_gsc_250.csv', x_test_250.reshape((x_test_250.shape[0], -1)), delimiter=',', fmt='%s')
np.savetxt('y_test_gsc_250.csv', y_test_250, delimiter=',', fmt='%s')

## Build model M5

In [8]:
model = Sequential()
model.add(Input(shape=(16000, 1)))
model.add(Conv1D(filters=8, kernel_size=20, strides=10,activation='relu'))
model.add(MaxPool1D(pool_size=2))
model.add(Conv1D(filters=16, kernel_size=8, strides=4, activation='relu'))
model.add(MaxPool1D(pool_size=2))
model.add(Conv1D(filters=32, kernel_size=4, strides=2, activation='relu'))
model.add(MaxPool1D(pool_size=2))
model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
model.add(AvgPool1D(4))
model.add(Flatten())
model.add(Dense(units=3))
model.add(Activation('softmax')) 
opt = tf.keras.optimizers.Adam(learning_rate=10e-3)

model.summary()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 1599, 8)           168       
                                                                 
 max_pooling1d (MaxPooling1  (None, 799, 8)            0         
 D)                                                              
                                                                 
 conv1d_1 (Conv1D)           (None, 198, 16)           1040      
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 99, 16)            0         
 g1D)                                                            
                                                                 
 conv1d_2 (Conv1D)           (None, 48, 32)            2080      
                                                                 
 max_pooling1d_2 (MaxPoolin  (None, 24, 32)            0

## Train model

In [9]:
model.fit(x_train, y_train, epochs=20, batch_size=384, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x24e8b482d50>

## Evaluate model on test dataset

In [10]:
model.evaluate(x_test, y_test, verbose=2)
pred_test = model.predict(x_test)
print(tf.math.confusion_matrix(y_test.argmax(axis=1), pred_test.argmax(axis=1)))

8/8 - 0s - loss: 0.3765 - categorical_accuracy: 0.8701 - 73ms/epoch - 9ms/step
tf.Tensor(
[[46  2 12]
 [ 2 64  9]
 [ 4  1 91]], shape=(3, 3), dtype=int32)


## Evaluate model on small dataset

In [11]:
model.evaluate(x_test_250, y_test_250, verbose=2)
pred_test_250 = model.predict(x_test_250)
print(tf.math.confusion_matrix(y_test_250.argmax(axis=1), pred_test_250.argmax(axis=1)))

8/8 - 0s - loss: 0.3765 - categorical_accuracy: 0.8701 - 74ms/epoch - 9ms/step
tf.Tensor(
[[46  2 12]
 [ 2 64  9]
 [ 4  1 91]], shape=(3, 3), dtype=int32)


## Save trained model

In [12]:
model.save('lab_gsc.h5')

  saving_api.save_model(


## Remove SoftMax layer

In [13]:
if isinstance(model.layers[-1], Activation) and model.layers[-1].activation == softmax:
    model = tf.keras.Model(model.input, model.layers[-2].output, name=model.name)
else:
    print('Error: last layer is not SoftMax Activation')

## Install Qualia-CodeGen for C inference code generation

In [14]:
%pip install qualia_codegen_core
import qualia_codegen_core
from qualia_codegen_core.graph.KerasModelGraph import KerasModelGraph
from qualia_codegen_core.graph.Quantization import Quantization
from qualia_codegen_core.graph.RoundMode import RoundMode

from importlib.resources import files
main_path = str((files('qualia_codegen_core.examples')/'Linux'/'main.cpp').resolve())


[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
Cannot find PyTorch, PyTorch framework will be unavailable


Note: you may need to restart the kernel to use updated packages.


## Convert Keras Model to Qualia-CodeGen's internal representation

In [15]:
modelgraph = KerasModelGraph(model).convert()
print(modelgraph)

—————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Inputs                                           | Layer                                            | Outputs                                          | Input shape                                      | Output shape                                    
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
                                                 | input_1                                          | conv1d                                           | (1, 16000, 1)                                    | ((1, 16000, 1),)                   

## Generate C code for the trained model with 32-bit floating-point representation

In [16]:
float_modelgraph = copy.deepcopy(modelgraph)

# layer quantization annotations for float32
for node in float_modelgraph.nodes:
    # No scale factor if not fixed-point quantization on integers
    node.q = Quantization(
            number_type=float,
            width=32,
            long_width=32,
            weights_scale_factor=0,
            output_scale_factor=0,
            weights_round_mode=RoundMode.NONE,
            output_round_mode=RoundMode.NONE,
            )

float_res = qualia_codegen_core.Converter(output_path=Path('gsc_output_floating')).convert_model(float_modelgraph)

with open('gsc_model_floating.h', 'w') as f:
    f.write(float_res)

Graphviz not available


## Compile the 32-bit floating-point C code for x86 and evaluate on small dataset

In [None]:
!g++ -std=c++17 -Wall -Wextra -pedantic -Ofast -o gsc_floating -include gsc_output_floating/include/defines.h -Igsc_output_floating/include gsc_output_floating/model.c {main_path}
!./gsc_floating x_test_gsc_250.csv y_test_gsc_250.csv

'g++' is not recognized as an internal or external command,
operable program or batch file.
'.' is not recognized as an internal or external command,
operable program or batch file.


## Generate C code for the trained model with 16-bit fixed-point representation

In [None]:
fixed_modelgraph = copy.deepcopy(modelgraph)

# layer quantization annotations for int16 Q9.7
for node in fixed_modelgraph.nodes:
    node.q = Quantization(
            number_type=int,
            width=16,
            long_width=32,
            weights_scale_factor=7,
            output_scale_factor=7,
            weights_round_mode=RoundMode.FLOOR,
            output_round_mode=RoundMode.FLOOR,
            )

fixed_res = qualia_codegen_core.Converter(output_path=Path('gsc_output_fixed')).convert_model(fixed_modelgraph)

with open('gsc_model_fixed.h', 'w') as f:
    f.write(fixed_res)

Graphviz not available


## Compile the 16-bit fixed-point C code for x86 and evaluate on small dataset

In [17]:
!g++ -std=c++17 -Wall -Wextra -pedantic -Ofast -o gsc_fixed -include gsc_output_fixed/include/defines.h -Igsc_output_fixed/include gsc_output_fixed/model.c {main_path}
!./gsc_fixed x_test_gsc_250.csv y_test_gsc_250.csv

'g++' is not recognized as an internal or external command,
operable program or batch file.
'.' is not recognized as an internal or external command,
operable program or batch file.
