Reference websites:
* https://www.hackster.io/news/easy-tinyml-on-esp32-and-arduino-a9dbc509f26c
* https://github.com/eloquentarduino/EloquentTinyML
* https://github.com/atomic14/tensorflow-lite-esp32
* https://github.com/eloquentarduino/tinymlgen
* https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization
* https://medium.com/mlearning-ai/optimizing-tflite-models-for-on-edge-machine-learning-for-efficiency-a-comparison-of-quantization-2c0123959cb6

Links to check out:
* https://www.tensorflow.org/model_optimization

#### Code below is used to generate the FFT TFLite Model 
* Healthy data: Own dataset
* Unhealthy data: Online dataset

1. Loading of dataset

In [1]:
import pandas as pd
import numpy as np
import glob

# Read in only the vibration data from the Excel spreadsheet
col_indices = [1,2,3]

# Load the Excel spreadsheet
normal_filename = glob.glob("C:/Users/jared/OneDrive - National University of Singapore/Y2/S2/3301R ML/Data_processing/Own_data/*.csv")
imbalance_data = glob.glob("C:/Users/jared/OneDrive - National University of Singapore/Y2/S2/3301R ML/Data_processing/Online_data/Machinary_Fault_dataset/imbalance/imbalance/35g/*.csv")

def dataReader(path_names, col_indices):
    data_n = pd.DataFrame()
    for i in path_names:
        # Read only columns 0 to 6 which contains rotational frequency (1 column) + vibration data (6 columns)
        low_data = pd.read_csv(i,header=None,usecols=col_indices) 
        data_n = pd.concat([data_n,low_data],ignore_index=True)
    return data_n


raw_data_norm = dataReader(normal_filename, col_indices)
raw_data_norm.iloc[:, [1, 2]] = raw_data_norm.iloc[:, [2, 1]] # Swap columns for radial and tangential data
raw_data_norm = raw_data_norm / 1000 # Convert to g

raw_data_imbalance = dataReader(imbalance_data, col_indices)

# Normalise the data
def normalise(df):
    df_normalized = df.apply(lambda x: (x - x.mean()) / x.std(), axis=0)
    return df_normalized

# Testing without normalisation
# data_norm = raw_data_norm
# data_imbalance = raw_data_imbalance

# Testing with normalisation
data_norm = normalise(raw_data_norm)
data_imbalance = normalise(raw_data_imbalance)

2. Checking if data is loaded in properly

In [2]:
print(data_norm.info())
print(data_imbalance.info())

print(data_norm.head())
print(data_imbalance.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5098741 entries, 0 to 5098740
Data columns (total 3 columns):
 #   Column  Dtype  
---  ------  -----  
 0   1       float64
 1   2       float64
 2   3       float64
dtypes: float64(3)
memory usage: 116.7 MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11250000 entries, 0 to 11249999
Data columns (total 3 columns):
 #   Column  Dtype  
---  ------  -----  
 0   1       float64
 1   2       float64
 2   3       float64
dtypes: float64(3)
memory usage: 257.5 MB
None
          1         2         3
0  1.620864 -0.940332  0.801613
1  1.462940 -0.219391 -0.682770
2  0.836181 -0.630328  0.642449
3  0.327864  0.119451 -1.184484
4 -0.229804  0.703413 -0.855775
          1         2         3
0 -1.508340 -0.250733 -0.164959
1  0.791054  0.237543  0.107908
2 -1.069483 -0.239004 -0.094518
3  0.032145  0.158992  0.125634
4 -0.093121 -0.092139  0.039919


3. Downsampling to reduce size

In [9]:
def downSampler(data, a, b):
    """
    data = data
    a = start index
    b = sampling rate
    """
    x = b
    downsampled_data = [data.iloc[a:b,:].sum()/x for i in range(int(len(data)/x))]
    return pd.DataFrame(downsampled_data)

# Create donwsampled datasets for excluding microphone data
ds_data_norm = downSampler(data_norm, 0, 2500)
ds_data_imbalance = downSampler(data_imbalance, 0, 5000)

4. Checking that data is downsampled properly

In [10]:
print(ds_data_norm.shape)
print(ds_data_imbalance.shape)

(2039, 3)
(2250, 3)


5. Data processing. FFTConolve method is used here

In [11]:
from scipy import signal
def FFTConvolve(data):
    autocorr = signal.fftconvolve(data,data[::-1],mode='full')
    return pd.DataFrame(autocorr)

# Create FFTConvolved datasets for excluding microphone data
ds_data_norm_fftconvole = FFTConvolve(ds_data_norm)
ds_data_imbalance_fftconvole = FFTConvolve(ds_data_imbalance)

6. Checking that the data processing step is done correctly

In [12]:
print(ds_data_norm_fftconvole.shape) # Check if data is FFTConvolved correctly
print(ds_data_imbalance_fftconvole.shape) # Check if data is FFTConvolved correctly

(4077, 5)
(4499, 5)


7. Data labelling

In [13]:
# Setting up labels for both datasets
y_0 = pd.DataFrame(np.zeros(int(len(ds_data_norm_fftconvole)),dtype=int))
y_1 = pd.DataFrame(np.ones(int(len(ds_data_imbalance_fftconvole)),dtype=int))
y = pd.concat([y_0,y_1],axis=0)
y # Check if labels are set correctly

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
...,...
4494,1
4495,1
4496,1
4497,1


8. Preparing data to train model

In [15]:
data_x = pd.concat([ds_data_norm_fftconvole, ds_data_imbalance_fftconvole], ignore_index=True) # Concatenate all the data
data_x # Check if data is concatenated correctly

Unnamed: 0,0,1,2,3,4
0,0.000626,0.000637,0.000390,0.000116,0.000021
1,0.001253,0.001273,0.000781,0.000232,0.000042
2,0.001879,0.001910,0.001171,0.000349,0.000063
3,0.002505,0.002546,0.001562,0.000465,0.000084
4,0.003131,0.003183,0.001952,0.000581,0.000104
...,...,...,...,...,...
8571,0.821996,0.310837,0.129171,0.018867,0.003028
8572,0.657597,0.248670,0.103336,0.015093,0.002423
8573,0.493198,0.186502,0.077502,0.011320,0.001817
8574,0.328799,0.124335,0.051668,0.007547,0.001211


In [20]:
data_x_to_csv = pd.concat([data_x.reset_index(drop=True), y.reset_index(drop=True)], axis=1) # Concatenate data and labels
data_x_to_csv # Check if data and labels are concatenated correctly

Unnamed: 0,0,1,2,3,4,0.1
0,0.000626,0.000637,0.000390,0.000116,0.000021,0
1,0.001253,0.001273,0.000781,0.000232,0.000042,0
2,0.001879,0.001910,0.001171,0.000349,0.000063,0
3,0.002505,0.002546,0.001562,0.000465,0.000084,0
4,0.003131,0.003183,0.001952,0.000581,0.000104,0
...,...,...,...,...,...,...
8571,0.821996,0.310837,0.129171,0.018867,0.003028,1
8572,0.657597,0.248670,0.103336,0.015093,0.002423,1
8573,0.493198,0.186502,0.077502,0.011320,0.001817,1
8574,0.328799,0.124335,0.051668,0.007547,0.001211,1


In [None]:
data_x_to_csv.to_csv('C:/Users/jared/Desktop/TFLite_FFTtestdata.csv',index=False,header=False) # Save to csv file

9. Splitting the data

In [12]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data_x, y, test_size=0.25, shuffle=True)

10. Training of model

In [13]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential

early_stop = EarlyStopping(monitor='loss', patience=2)

def representative_dataset():
    for val in x_test:
        yield [np.array(val, dtype=np.float32)]

def get_model():
    model = Sequential()
    model.add(Dense(32, activation='relu', input_shape=(5,)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(2, activation='softmax')) # Output layer needs to correspond to the number of classes for softmax
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=10, validation_split=0.2)
    model.summary()
    return model

FFTmodel = get_model()
converter = tf.lite.TFLiteConverter.from_keras_model(FFTmodel)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
tflite_model = converter.convert()

# Save the model to disk
open("FFT_model_quantized.tflite", "wb").write(tflite_model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                192       
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 2)                 130       
                                                                 
Total params: 2434 (9.51 KB)
Trainable params: 2434 (9.51 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmpuxid15o7\assets


INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmpuxid15o7\assets


11832

11. Converting of model to C array (Run below line on bash)

`xxd -i FFT_model_fullint_quantized.tflite > FFT_model_fullint_quantized.cc`

#### Building the multiple models using different quantization methods

In [27]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tinymlgen import port

from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Sequential
early_stop = EarlyStopping(monitor='loss', patience=2)

## This method to get representative dataset results in lower accuracy, need to look into it
# def representative_dataset():
#     for data in tf.data.Dataset.from_tensor_slices(x_te).batch(1).take(x_test.size):
#         yield [tf.dtypes.cast(data, tf.float32)]

def representative_dataset():
    for val in x_test:
        yield [np.array(val, dtype=np.float32)]


def get_model():
    # Initialising ANN model for 2 columns
    model = Sequential()
    model.add(Dense(32, activation='relu', input_shape=(5,)))
    # model.add(Dense(64, activation='relu',kernel_initializer='random_uniform'))
    model.add(Dense(64, activation='relu'))
    # model.add(Dense(64, activation='relu',kernel_initializer='random_uniform'))
    # model.add(Dense(32, activation='relu',kernel_initializer='random_uniform'))
    model.add(Dense(2, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=10, validation_split=0.2)
    model.summary()
    return model

FFTmodel = get_model()

# Dyanmic range quantization
dyanmic_converter = tf.lite.TFLiteConverter.from_keras_model(FFTmodel)
dyanmic_converter.optimizations = [tf.lite.Optimize.DEFAULT]
dyanmic_tflite_model = dyanmic_converter.convert()

# Float16 quantization
float16_converter = tf.lite.TFLiteConverter.from_keras_model(FFTmodel)
float16_converter.optimizations = [tf.lite.Optimize.DEFAULT]
float16_converter.target_spec.supported_types = [tf.float16]
float16_tflite_model = float16_converter.convert()

# Full integer quantization
# Only this one able to load on the ESP32 currently !!
fullint_converter = tf.lite.TFLiteConverter.from_keras_model(FFTmodel)
fullint_converter.optimizations = [tf.lite.Optimize.DEFAULT]
fullint_converter.representative_dataset = representative_dataset
fullint_tflite_model = fullint_converter.convert()

# Save the models to disk
open("FFT_model_dynamic_quantized.tflite", "wb").write(dyanmic_tflite_model)
open("FFT_model_float16_quantized.tflite", "wb").write(float16_tflite_model)
open("FFT_model_fullint_quantized.tflite", "wb").write(fullint_tflite_model)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_21 (Dense)            (None, 32)                192       
                                                                 
 dense_22 (Dense)            (None, 64)                2112      
                                                                 
 dense_23 (Dense)            (None, 2)                 130       
                                                                 
Total params: 2434 (9.51 KB)
Trainable params: 2434 (9.51 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmp72cpcw8r\assets


INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmp72cpcw8r\assets


INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmpanwq2smc\assets


INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmpanwq2smc\assets


INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmpt3sl6fh3\assets


INFO:tensorflow:Assets written to: C:\Users\jared\AppData\Local\Temp\tmpt3sl6fh3\assets


11904

#### Evaluating the models built using different quantization methods

In [28]:
# Testing the baseline model on the test dataset.

# Evaluating the model on the test dataset.
_, baseline_model_accuracy = FFTmodel.evaluate(x=x_test, y=y_test, verbose=0)

# Printing the baseline test accuracy in percentage.
print('The Baseline test accuracy:', baseline_model_accuracy * 100)

The Baseline test accuracy: 99.95335936546326


In [29]:
# A helper function to evaluate the TF Lite model using "test" dataset.
def evaluate_model(interpreter):
    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    input_shape = input_details[0]['shape']
    num_test_samples = len(x_test)

    # Run predictions on every set in the "test" dataset.
    prediction_y = []
    for i in range(num_test_samples):

        # Pre-processing the data to fit it with the model's input.
        input_data = np.array(x_test.iloc[i,:], dtype=np.float32)
        input_data = np.expand_dims(input_data, axis=0)
        interpreter.set_tensor(input_details[0]['index'], input_data)

        # Run inference.
        interpreter.invoke()

        # Post-processing: remove batch dimension and find the digit with highest
        # probability.
        output_data = interpreter.get_tensor(output_details[0]['index'])
        prediction_y.append(output_data.argmax())

    # Compare prediction results with ground truth labels to calculate accuracy.
    accurate_count = 0
    for index in range(len(prediction_y)):
        if prediction_y[index] == y_test.iloc[index][0]:
            accurate_count += 1
    accuracy = accurate_count * 1.0 / len(prediction_y)

    return accuracy


# Passing the FP-16 TF Lite model to the interpreter.
interpreter = tf.lite.Interpreter('FFT_model_float16_quantized.tflite')

# Allocating tensors.
interpreter.allocate_tensors()

# Evaluating the model on the test dataset.
test_accuracy_fp_16 = evaluate_model(interpreter)

# Printing the test accuracy for the FP-16 quantized TFLite model and the baseline Keras model.
print('Float 16 Quantized TFLite Model Test Accuracy:', test_accuracy_fp_16*100)

# Testing the full integer quantized model on the test dataset.

# Passing the full integer quantized TF Lite model to the interpreter.
interpreter = tf.lite.Interpreter('FFT_model_fullint_quantized.tflite')

# Allocating tensors.
interpreter.allocate_tensors()

# Evaluating the model on the test dataset.
test_accuracy_int = evaluate_model(interpreter)

# Printing the test accuracy for the full integer quantized TFLite model and the baseline Keras model.
print('Full Integer Quantized TFLite Model Test Accuracy:', test_accuracy_int*100)

# Testing the dynamic quantized model on the test dataset.

# Passing the dynamic quantized TF Lite model to the interpreter.
interpreter = tf.lite.Interpreter('FFT_model_dynamic_quantized.tflite')

# Allocating tensors.
interpreter.allocate_tensors()

# Evaluating the model on the test dataset.
test_accuracy_dynamic = evaluate_model(interpreter)

# Printing the test accuracy for the dynamic quantized TFLite model and the baseline Keras model.
print('Dynamic Quantized TFLite Model Test Accuracy:', test_accuracy_dynamic*100)

# Printing the test accuracy for the baseline Keras model.
print('Baseline Keras Model Test Accuracy:', baseline_model_accuracy*100)

Float 16 Quantized TFLite Model Test Accuracy: 99.95335820895522
Full Integer Quantized TFLite Model Test Accuracy: 99.95335820895522
Dynamic Quantized TFLite Model Test Accuracy: 99.95335820895522
Baseline Keras Model Test Accuracy: 99.95335936546326


Current to-dos:
* Find way to build FFTConvolve model without overfitting
* Find way to do hyperparameters testing for models
* Create template to test TF model and TFLite models quickly