Reference websites:
* https://www.hackster.io/news/easy-tinyml-on-esp32-and-arduino-a9dbc509f26c
* https://github.com/eloquentarduino/EloquentTinyML
* https://github.com/atomic14/tensorflow-lite-esp32
* https://github.com/eloquentarduino/tinymlgen
* https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization

#### Code below is used to generate the FFT TFLite Model 
* Healthy data: Own dataset
* Unhealthy data: Online dataset

1. Loading of dataset

In [None]:
import pandas as pd
import numpy as np
import glob

# Read in only the vibration data from the Excel spreadsheet
col_indices = [1,2,3]

# Load the Excel spreadsheet
normal_filename = glob.glob("./Own_data/*.csv")
imbalance_data = glob.glob("./Online_data/Machinary_Fault_dataset/imbalance/imbalance/35g/*.csv")

def dataReader(path_names, col_indices):
    data_n = pd.DataFrame()
    for i in path_names:
        # Read only columns 0 to 6 which contains rotational frequency (1 column) + vibration data (6 columns)
        low_data = pd.read_csv(i,header=None,usecols=col_indices) 
        data_n = pd.concat([data_n,low_data],ignore_index=True)
    return data_n


raw_data_norm = dataReader(normal_filename, col_indices)
raw_data_norm.iloc[:, [1, 2]] = raw_data_norm.iloc[:, [2, 1]] # Swap columns for radial and tangential data
raw_data_norm = raw_data_norm / 1000 # Convert to g

raw_data_imbalance = dataReader(imbalance_data, col_indices)

# Normalise the data
def normalise(df):
    df_normalized = df.apply(lambda x: (x - x.mean()) / x.std(), axis=0)
    return df_normalized

# Testing without normalisation
# data_norm = raw_data_norm
# data_imbalance = raw_data_imbalance

# Testing with normalisation
data_norm = normalise(raw_data_norm)
data_imbalance = normalise(raw_data_imbalance)

2. Checking if data is loaded in properly

In [None]:
print(data_norm.info())
print(data_imbalance.info())

print(data_norm.head())
print(data_imbalance.head())

3. Downsampling to reduce size

In [None]:
def downSampler(data, a, b):
    """
    data = data
    a = start index
    b = sampling rate
    """
    x = b
    downsampled_data = [data.iloc[a:b,:].sum()/x for i in range(int(len(data)/x))]
    return pd.DataFrame(downsampled_data)

# Create donwsampled datasets for excluding microphone data
ds_data_norm = downSampler(data_norm, 0, 2500)
ds_data_imbalance = downSampler(data_imbalance, 0, 5000)

4. Checking that data is downsampled properly

In [None]:
print(ds_data_norm.shape)
print(ds_data_imbalance.shape)

5. Data processing. FFTConolve method is used here

In [None]:
from scipy import signal
def FFTConvolve(data):
    autocorr = signal.fftconvolve(data,data[::-1],mode='full')
    return pd.DataFrame(autocorr)

# Create FFTConvolved datasets for excluding microphone data
ds_data_norm_fftconvole = FFTConvolve(ds_data_norm)
ds_data_imbalance_fftconvole = FFTConvolve(ds_data_imbalance)

6. Checking that the data processing step is done correctly

In [None]:
print(ds_data_norm_fftconvole.shape) # Check if data is FFTConvolved correctly
print(ds_data_imbalance_fftconvole.shape) # Check if data is FFTConvolved correctly

7. Data labelling

In [None]:
# Setting up labels for both datasets
y_0 = pd.DataFrame(np.zeros(int(len(ds_data_norm_fftconvole)),dtype=int))
y_1 = pd.DataFrame(np.ones(int(len(ds_data_imbalance_fftconvole)),dtype=int))
y = pd.concat([y_0,y_1],axis=0)
y # Check if labels are set correctly

8. Preparing data to train model

In [None]:
data_x = pd.concat([ds_data_norm_fftconvole, ds_data_imbalance_fftconvole], ignore_index=True) # Concatenate all the data
data_x # Check if data is concatenated correctly

9. Splitting the data

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data_x, y, test_size=0.25, shuffle=True)

10. Training of model

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential

early_stop = EarlyStopping(monitor='loss', patience=2)

# Needed for quantization, used to calibrate the range of all floating-point tensors in the model
# batch needs to be 1 in order to work, 1000 samples are enough to calibrate the range
def representative_dataset():
    for data in tf.data.Dataset.from_tensor_slices(x_train).batch(1).take(1000):
        yield [tf.dtypes.cast(data, tf.float32)]

def get_model():
    model = Sequential()
    model.add(Dense(32, activation='relu', input_shape=(5,)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(2, activation='softmax')) # Output layer needs to correspond to the number of classes for softmax
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=10, validation_split=0.2)
    model.summary()
    return model

FFTmodel = get_model()
converter = tf.lite.TFLiteConverter.from_keras_model(FFTmodel)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
tflite_model = converter.convert()

# Save the model to disk
open("FFT_model_quantized.tflite", "wb").write(tflite_model)

11. Converting of model to C array (Run below line on bash)

`xxd -i FFT_model_quantized.tflite > FFT_model_quantized.cc`