In [1]:
import pandas as pd
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
column_names = ["TPSA(Tot)", "SAacc", "H-050", "MLOGP", "RDCHI", "GATS1p", "nN", "C-040", "quantitative response"]
df = pd.read_csv('qsar_aquatic_toxicity.csv', names=column_names, delimiter=';')
df.head()

Unnamed: 0,TPSA(Tot),SAacc,H-050,MLOGP,RDCHI,GATS1p,nN,C-040,quantitative response
0,0.0,0.0,0,2.419,1.225,0.667,0,0,3.74
1,0.0,0.0,0,2.638,1.401,0.632,0,0,4.33
2,9.23,11.0,0,5.799,2.93,0.486,0,0,7.019
3,9.23,11.0,0,5.453,2.887,0.495,0,0,6.723
4,9.23,11.0,0,4.068,2.758,0.695,0,0,5.979


In [3]:
train_df, test_df = train_test_split(df, test_size=0.2)
x_train, y_train = train_df.drop('quantitative response', axis=1), train_df['quantitative response']
x_test, y_test = test_df.drop('quantitative response', axis=1), test_df['quantitative response']

In [4]:
train_df.head()

Unnamed: 0,TPSA(Tot),SAacc,H-050,MLOGP,RDCHI,GATS1p,nN,C-040,quantitative response
438,0.0,0.0,0,3.374,2.075,1.204,0,0,3.792
359,67.79,107.839,2,2.347,3.243,1.4,1,1,3.917
447,45.34,34.106,0,1.297,2.042,1.933,0,0,2.829
37,40.46,85.367,2,5.255,3.001,0.485,0,0,4.838
248,0.0,0.0,0,6.47,2.821,0.469,0,0,8.44


In [5]:
DENSE1_SIZE = 128

model = tf.keras.models.Sequential([
  tf.keras.layers.Input(shape=(x_train.shape[1:])),
  tf.keras.layers.Dense(DENSE1_SIZE, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1)
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               1152      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1,281
Trainable params: 1,281
Non-trainable params: 0
_________________________________________________________________


In [6]:
predictions = model(x_test.to_numpy())

In [7]:
loss_fn = tf.keras.losses.MeanSquaredError()

In [8]:
loss_fn(y_test.to_numpy(), predictions).numpy()

231.00876

In [9]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy', 'mse', 'mae'])

In [10]:
NUM_OF_EPOCHS = 5
model.fit(x_train, y_train,validation_data=(x_test,y_test), epochs=NUM_OF_EPOCHS)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x12341cfd6c8>

In [11]:
model.evaluate(x_test,  y_test, verbose=2)

4/4 - 0s - loss: 7.5775 - accuracy: 0.0000e+00 - mse: 7.5775 - mae: 2.2290 - 31ms/epoch - 8ms/step


[7.577507972717285, 0.0, 7.577507972717285, 2.2290146350860596]

In [12]:
def representative_dataset():
    for _ in range(100):
      data =  x_train
      yield [data.astype(np.float32)]

print(representative_dataset())

<generator object representative_dataset at 0x0000012340AAF9C8>


In [13]:
tf.saved_model.save(model, "saved_QSAR_model_keras_dir")

INFO:tensorflow:Assets written to: saved_QSAR_model_keras_dir\assets


In [14]:
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_saved_model("saved_QSAR_model_keras_dir")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
tflite_model = converter.convert()

In [15]:
with open('QSARClassifyModel_new.tflite', 'wb') as f:
  f.write(tflite_model)

In [16]:
interpreter = tf.lite.Interpreter(model_path="QSARClassifyModel_new.tflite")
interpreter.allocate_tensors()

In [17]:
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print('input_details:\n', input_details)
print('output_details:\n', output_details)

input_details:
 [{'name': 'serving_default_input_1:0', 'index': 0, 'shape': array([1, 8]), 'shape_signature': array([-1,  8]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
output_details:
 [{'name': 'StatefulPartitionedCall:0', 'index': 8, 'shape': array([1, 1]), 'shape_signature': array([-1,  1]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [18]:
x_test_np = x_test.to_numpy()
y_test_np = y_test.to_numpy()

In [19]:
# Test the model on random input data.
input_shape = input_details[0]['shape']
print(input_shape)
input_data = [x_test_np[0]]
#print(input_data)
input_data = np.array(input_data, dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)

interpreter.invoke()

# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)
print(y_test_np[0])

[1 8]
[[6.6315546]]
2.072


In [23]:
# Function to convert some hex values into an array for C programming
import time, sys

# Function to convert some hex values into an array for C programming
def hex_to_c_array(hex_data, var_name):
    c_str = ""

    # Create header guard
    c_str += '#ifndef ' + var_name.upper() + '_H\n'
    c_str += "#define " + var_name.upper() + '_H\n\n'

    c_str += "/*\n Author: Mouli Sankaran \n"
    c_str += " CAUTION: This is an auto generated file.\n DO NOT EDIT OR MAKE ANY CHANGES TO IT.\n"

# Time stamping of this model data in the generated file
    localtime = time.asctime( time.localtime(time.time()) )
    c_str += " This model data was generated on " + localtime+ '\n\n'
    print("This model data was generated on:", localtime)

# Add information about the verisons of tools and packages used in generating this header file
    c_str += " Tools used:\n Python:" + str(sys.version) + "\n Numpy:" \
            + str(np.version.version) + "\n TensorFlow:" + str(sys.version) \
            + "\n Keras: "+ str(tf.keras.__version__) + "\n\n"
    print("Tools used: Python:", sys.version, "\n Numpy:", np.version.version, \
          "\n TensorFlow:", sys.version, "\n Keras: ", tf.keras.__version__, "\n\n")

# Training details of the model
    c_str += ' Model details are:\n'
    c_str += ' NUM_OF_EPOCHS  = ' + str(NUM_OF_EPOCHS) + '\n*/\n'

# Generate 'C' constants for the no. of nodes in each layer
    c_str +=   'const int ' + 'DENSE1_SIZE' + ' = ' + str(DENSE1_SIZE) + ';\n'

    # Add array length at the top of the file
    c_str += '\nalignas(8) const unsigned int ' + var_name + '_len = '\
            + str(len(hex_data)) + ';\n'

    # Declare C variable
    c_str += 'const unsigned char ' + var_name + '[] = {'
    hex_array = []
    for i, val in enumerate(hex_data):
        # Construct string from hex
        hex_str = format(val, '#04x')

        # Add formating so each line stays within 80 characters
        if (i + 1) < len(hex_data):
          hex_str += ','
        if (i + 1) % 12 == 0:
          hex_str += '\n'
        hex_array.append(hex_str)

    # Add closing brace
    c_str += '\n' + format(''.join(hex_array)) + '\n};\n\n'

    # Close out header guard
    c_str += '#endif //' + var_name.upper() + '_H'

    return c_str

In [24]:
with open("QSAR_model_esp32_new" + '.h', 'w') as file:
  file.write(hex_to_c_array(tflite_model, "QSAR_model_esp32_new"))

This model data was generated on: Wed Nov  6 00:20:26 2024
Tools used: Python: 3.7.16 (default, Jan 17 2023, 16:06:28) [MSC v.1916 64 bit (AMD64)] 
 Numpy: 1.19.5 
 TensorFlow: 3.7.16 (default, Jan 17 2023, 16:06:28) [MSC v.1916 64 bit (AMD64)] 
 Keras:  2.7.0 




In [29]:
size = len(x_test_np[0])
x_train_np = x_train.to_numpy()

In [31]:
def gen_x_test0_hex(data, name):
#    print('\n ', name, ':\n {')
    c_str = 'float ' + name + '[784] = { '
    for i in range(size):
        for j in range(size):
#            print(' ', data[i][j], 'f,', sep='', end='')
            c_str += "%s%f%s" % (' ', data[i][j], 'f,')

    c_str += '\n};\n'
#    print('\n};\n')
    return c_str


# Write TFLite model to a C source (or header) file
with open("QSAR_x_test0_data_new" + '.h', 'w') as file:
    file.write(gen_x_test0_hex(x_test_np, 'x_test0'))

# Write TFLite model to a C source (or header) file
with open("QSAR_x_train0_data_new" + '.h', 'w') as file:
    file.write(gen_x_test0_hex(x_train_np, 'x_train0'))