<a href="https://colab.research.google.com/github/Nicordaro/ml4iot_hw3/blob/main/big_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#pip install tensorflow_model_optimization

Collecting tensorflow_model_optimization
[?25l  Downloading https://files.pythonhosted.org/packages/55/38/4fd48ea1bfcb0b6e36d949025200426fe9c3a8bfae029f0973d85518fa5a/tensorflow_model_optimization-0.5.0-py2.py3-none-any.whl (172kB)
[K     |██                              | 10kB 16.3MB/s eta 0:00:01[K     |███▉                            | 20kB 21.4MB/s eta 0:00:01[K     |█████▊                          | 30kB 8.5MB/s eta 0:00:01[K     |███████▋                        | 40kB 8.8MB/s eta 0:00:01[K     |█████████▌                      | 51kB 4.4MB/s eta 0:00:01[K     |███████████▍                    | 61kB 5.1MB/s eta 0:00:01[K     |█████████████▎                  | 71kB 5.1MB/s eta 0:00:01[K     |███████████████▏                | 81kB 5.7MB/s eta 0:00:01[K     |█████████████████               | 92kB 5.4MB/s eta 0:00:01[K     |███████████████████             | 102kB 5.8MB/s eta 0:00:01[K     |████████████████████▉           | 112kB 5.8MB/s eta 0:00:01[K     |█████

In [2]:
import os
# Clone github repository with data
# if os.path.isdir('./Project_MLDL'):
!rm -rf Project_MLDL
if not os.path.isdir('./resources'):
  !git clone https://github.com/Nicordaro/ml4iot_hw3
DATA_DIR = './ml4iot_hw3'

Cloning into 'ml4iot_hw3'...
remote: Enumerating objects: 26, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 26 (delta 12), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (26/26), done.


In [5]:
import argparse
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_model_optimization as tfmot
import zlib
import shutil

'''
parser = argparse.ArgumentParser()
parser.add_argument('--version', type=str, required=False,
        help='model version: a,b or c')
args = parser.parse_args()

version = args.version
'''

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

zip_path = tf.keras.utils.get_file(
                origin = 'http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip',
                fname = 'mini_speech_commands.zip',
                extract = True,
                cache_dir = '.',
                cache_subdir = 'data')

data_dir = os.path.join('.','data', 'mini_speech_commands')

def filenames(path) :
    with open(path, "rb") as fp:
        arr = np.array([line.replace(b"/", b"\\") for line in fp.read().splitlines()])
    return arr
    
train_files = filenames(DATA_DIR+"/kws_train_split.txt")
val_files = filenames(DATA_DIR+"/kws_val_split.txt")
test_files = filenames(DATA_DIR+"/kws_test_split.txt")

LABELS = np.array(tf.io.gfile.listdir(str(data_dir)))
LABELS = LABELS[LABELS != 'README.md']

In [7]:
LABELS

array(['down', 'no', 'yes', 'stop', 'go', 'up', 'right', 'left'],
      dtype='<U9')

In [None]:
class SignalGenerator:
    def __init__(self, labels, sampling_rate, frame_length, frame_step, num_mel_bins=None, lower_frequency=None,
                upper_frequency=None, num_coefficients=None, mfcc=False):
        
        self.labels = labels
        self.sampling_rate = sampling_rate 
        self.frame_length = frame_length  
        self.frame_step = frame_step       
        self.num_mel_bins = num_mel_bins   
        self.lower_frequency = lower_frequency  
        self.upper_frequency = upper_frequency  
        self.num_coefficients = num_coefficients  
        self.mfcc = mfcc  
        
        if mfcc is True:
            self.preprocess = self.preprocess_with_mfcc
        else :
            self.preprocess = self.preprocess_with_stft
            
            
    def read(self, file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        label = parts[-2]  
        label_id = tf.argmax(label == self.labels)
        
        audio_binary = tf.io.read_file(file_path)
        audio,_ = tf.audio.decode_wav(audio_binary)
        audio = tf.squeeze(audio, axis=1)  
        audio = self.pad(audio)
        
        return audio, label_id
    
    def pad(self, audio):
        zero_padding = tf.zeros([self.sampling_rate] - tf.shape(audio), dtype=tf.float32)
        audio = tf.concat([audio, zero_padding], 0)
        audio.set_shape([self.sampling_rate])
        
        return audio
      
    def preprocess_with_stft(self, file_path):
        audio, label = self.read(file_path)
        
        stft = tf.signal.stft(audio, frame_length=self.frame_length, frame_step=self.frame_step, fft_length=self.frame_length)
        spectrogram = tf.abs(stft)
        
        if self.mfcc == False:
            spectrogram = tf.expand_dims(spectrogram, -1)
            spectrogram = tf.image.resize(spectrogram, [50,50])
            
        return spectrogram, label
    
    
    def preprocess_with_mfcc(self, file_path):
        
        spectrogram, label = self.preprocess_with_stft(file_path)
        
        num_spectrogram_bins = spectrogram.shape[-1]
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(self.num_mel_bins, num_spectrogram_bins,
                                                                            self.sampling_rate, 
                                                                            self.lower_frequency, self.upper_frequency)
        
        mel_spectrogram = tf.tensordot(spectrogram, linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)[..., :self.num_coefficients]
        mfccs = tf.expand_dims(mfccs, -1)
          
        return mfccs, label
        
    
    def make_ds(self, filenames, train):
        ds = tf.data.Dataset.from_tensor_slices(filenames)
        ds = ds.map(self.preprocess, num_parallel_calls=4)
        ds = ds.batch(32)
        ds = ds.cache()
        
        if train == True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)
            
        return ds

In [None]:
# Big model parameters
sampling_rate = 16000
frame_length = 640
frame_step = 320
num_mel_bins = 40
lower_frequency = 20
upper_frequency = 4000
num_mfccs = 20

In [None]:
# MFCC
generator = SignalGenerator(LABELS, sampling_rate, frame_length, frame_step, num_mel_bins, 
                                lower_frequency, upper_frequency, num_mfccs, mfcc=True)                  

train_ds = generator.make_ds(train_files, True)
val_ds = generator.make_ds(val_files, False)
test_ds = generator.make_ds(test_files, False)

In [None]:
next(iter(train_ds))[0][0].shape

TensorShape([49, 20, 1])

In [None]:
def model_creation(name, alpha, mfcc=True):
    
    stride = [2,1]
    input_shape = (49,num_mfccs) 
        
    if name == "cnn":
        model = keras.Sequential([
            keras.layers.Conv2D(filters = int(128*alpha), kernel_size=[3,3], strides=stride, use_bias=False, activation='relu'), 
            keras.layers.BatchNormalization(momentum=0.1),
            keras.layers.Conv2D(filters = int(128*alpha), kernel_size=[3,3], strides=[1,2], use_bias=False, activation='relu'), 
            keras.layers.BatchNormalization(momentum=0.1),
            keras.layers.Conv2D(filters = int(128*alpha), kernel_size=[3,3], strides=[1,2], use_bias=False, activation='relu'), 
            keras.layers.BatchNormalization(momentum=0.1),
            keras.layers.GlobalAveragePooling2D(),
            keras.layers.Dense(8)
        ])
    
    return model

In [None]:
alpha = 1
mfcc = True
#number_of_clusters = 8
    
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.02)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy()
n_epochs = 20
model_name = 'cnn'

def scheduler(epoch, lr):
    if epoch == 10 or epoch == 17:
        return lr*0.1   
    return lr

In [None]:
# Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', mode = 'min', min_delta = 0.005, patience = 5)

checkpoint = tf.keras.callbacks.ModelCheckpoint( './solutions Group16/Group16_big', 
                                                 save_best_only = True,
                                                 save_weights_only = True,
                                                 monitor = 'val_loss',
                                                 mode = 'min')

schedule_callback = tf.keras.callbacks.LearningRateScheduler(schedule = scheduler)

In [None]:
cnn = model_creation(model_name, alpha, mfcc = mfcc)

cnn.compile(optimizer = optimizer,
            loss = loss,              
            metrics = metric)

cnn.fit(train_ds, epochs = n_epochs, validation_data=val_ds, callbacks = [schedule_callback, checkpoint])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x186f03ee710>

In [None]:
cnn.load_weights('./solutions Group16/Group16_kws_'+version)
test_loss, test_accuracy = cnn.evaluate(test_ds, verbose=2)
test_accuracy

25/25 - 2s - loss: 0.3046 - sparse_categorical_accuracy: 0.9100


0.9100000262260437

In [None]:
'''
# Weights clustering
cnn_clustered = tfmot.clustering.keras.cluster_weights(cnn, 
                                            number_of_clusters = number_of_clusters,
                                            cluster_centroids_init = tfmot.clustering.keras.CentroidInitialization.LINEAR)

cnn_clustered.compile(optimizer = optimizer,
            loss = loss,              
            metrics = metric)

cnn_clustered.fit(train_ds, epochs = n_epochs, validation_data=val_ds, callbacks = [checkpoint, schedule_callback, early_stopping])
'''

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


<tensorflow.python.keras.callbacks.History at 0x18680b4da58>

In [None]:
'''
cnn_clustered.load_weights('./solutions Group16/Group16_kws_'+version)
test_loss, test_accuracy = cnn_clustered.evaluate(test_ds, verbose=2)
test_accuracy
'''

25/25 - 0s - loss: 0.3760 - sparse_categorical_accuracy: 0.9175


0.9175000190734863

In [None]:
'''
# Save the clustered model
cnn_clustered = tfmot.clustering.keras.strip_clustering(cnn_clustered)
cnn_clustered.save('./solutions Group16/Group16_kws_'+version)
'''

INFO:tensorflow:Assets written to: ./solutions Group16/Group16_kws_c\assets


INFO:tensorflow:Assets written to: ./solutions Group16/Group16_kws_c\assets


In [None]:
# Save the model
cnn.save('./solutions Group16/Group16_kws_'+version)

In [None]:
# Save the tflite
converter = tf.lite.TFLiteConverter.from_saved_model('./solutions Group16/Group16_kws_'+version)
    
tflite_model = converter.convert()

with open('./Group16_kws_'+version+'.tflite.zlib', 'wb') as fp:
    fp.write(tflite_model)

In [None]:
tflite_model = converter.convert()

with open('./Group16_kws_'+version+'.tflite.zlib', 'wb') as fp:
    tflite_compressed = zlib.compress(tflite_model)
    fp.write(tflite_compressed)

In [None]:
# Delete the directory with keras models and checkpoints
shutil.rmtree('solutions Group16', ignore_errors=True)

In [None]:
# Test the model
test_ds_tflite = test_ds.unbatch().batch(1)

def acc_inference(interpreter):
    
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details() 
    output_details = interpreter.get_output_details()
    
    acc = tf.keras.metrics.SparseCategoricalAccuracy()
    
    for (x,y) in test_ds_tflite:
        interpreter.set_tensor(input_details[0]["index"], x)
        interpreter.invoke()
        y_pred = interpreter.get_tensor(output_details[0]["index"])
        
        y_true = y
        acc.update_state(y_true, y_pred)
    
    return acc.result().numpy()

In [None]:
with open('Group16_kws_'+version+'.tflite.zlib', 'rb') as fp:
    model_zip = zlib.decompress(fp.read())
    interpreter = tf.lite.Interpreter(model_content=model_zip)
    tflite_acc = acc_inference(interpreter)
tflite_acc, os.path.getsize('Group16_kws_'+version+'.tflite.zlib')

(0.9175, 40190)