In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
def normalize_data(data):
    if isinstance(data, pd.DataFrame):
        normalized_df = data.copy()
        for column in data.columns:
            max_value = data[column].max()
            normalized_df[column] = data[column] / max_value
        return normalized_df
    else:
        max_value = data.max()
        normalized_series = data / max_value
        return normalized_series

In [5]:
data = pd.read_csv('Dataset_challenge.csv')

X_data = data.drop(columns=['Outcome'])  
y_data = data['Outcome'] 


X_data = normalize_data(X_data)

print(X_data)


     Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
0       0.352941  0.743719       0.590164       0.353535  0.000000  0.500745   
1       0.058824  0.427136       0.540984       0.292929  0.000000  0.396423   
2       0.470588  0.919598       0.524590       0.000000  0.000000  0.347243   
3       0.058824  0.447236       0.540984       0.232323  0.111111  0.418778   
4       0.000000  0.688442       0.327869       0.353535  0.198582  0.642325   
..           ...       ...            ...            ...       ...       ...   
763     0.588235  0.507538       0.622951       0.484848  0.212766  0.490313   
764     0.117647  0.613065       0.573770       0.272727  0.000000  0.548435   
765     0.294118  0.608040       0.590164       0.232323  0.132388  0.390462   
766     0.058824  0.633166       0.491803       0.000000  0.000000  0.448584   
767     0.058824  0.467337       0.573770       0.313131  0.000000  0.453055   

     DiabetesPedigreeFunction       Age

In [7]:
X_train = X_data.iloc[1:594]
X_test = X_data.iloc[595:]
y_train = y_data.iloc[1:594]
y_test = y_data.iloc[595:]
print(X_train)

     Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
1       0.058824  0.427136       0.540984       0.292929  0.000000  0.396423   
2       0.470588  0.919598       0.524590       0.000000  0.000000  0.347243   
3       0.058824  0.447236       0.540984       0.232323  0.111111  0.418778   
4       0.000000  0.688442       0.327869       0.353535  0.198582  0.642325   
5       0.294118  0.582915       0.606557       0.000000  0.000000  0.381520   
..           ...       ...            ...            ...       ...       ...   
589     0.000000  0.366834       0.000000       0.000000  0.000000  0.314456   
590     0.647059  0.557789       0.688525       0.404040  0.000000  0.697466   
591     0.117647  0.562814       0.639344       0.505051  0.165485  0.587183   
592     0.176471  0.663317       0.655738       0.000000  0.000000  0.512668   
593     0.117647  0.412060       0.426230       0.222222  0.135934  0.424739   

     DiabetesPedigreeFunction       Age

In [8]:
#Imbalance in the dataset
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
print('Original dataset shape %s' % Counter(y_train))

sm = SMOTE(random_state=42)
X_train_sm, y_train_sm = sm.fit_resample(X_train, y_train)
print('Resampled dataset shape %s' % Counter(y_train_sm))


Original dataset shape Counter({0: 388, 1: 205})
Resampled dataset shape Counter({0: 388, 1: 388})


In [9]:
X_train, X_val, y_train, y_val = train_test_split(X_train_sm, y_train_sm, test_size=0.2, random_state=42)
print(X_train)

     Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
754     0.018990  0.677245       0.460049       0.386145  0.207703  0.544034   
55      0.411765  0.939698       0.557377       0.393939  0.359338  0.561848   
405     0.235294  0.577889       0.590164       0.000000  0.000000  0.430700   
673     0.447248  0.616501       0.596669       0.000000  0.000000  0.470799   
363     0.235294  0.738693       0.606557       0.252525  0.346336  0.520119   
..           ...       ...            ...            ...       ...       ...   
71      0.764706  0.633166       0.737705       0.000000  0.000000  0.646796   
106     0.235294  0.723618       0.475410       0.282828  0.165485  0.439642   
270     0.117647  0.542714       0.508197       0.323232  0.066194  0.375559   
435     0.705882  0.703518       0.696721       0.333333  0.000000  0.557377   
102     0.058824  0.407035       0.590164       0.181818  0.047281  0.396423   

     DiabetesPedigreeFunction       Age

In [9]:
def model_builder(hp):
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Flatten(input_shape=(8, )))

  hp_activation = hp.Choice('activation', values=['relu', 'sigmoid'])
  hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=1024, step=8)
  hp_layer_2 = hp.Int('layer_2', min_value=32, max_value=1024, step=8)
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

  model.add(tf.keras.layers.Dense(units=hp_layer_1, activation=hp_activation))
  model.add(tf.keras.layers.Dense(units=hp_layer_2, activation=hp_activation))
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='binary_crossentropy',
                metrics=['accuracy'])
  
  return model

In [10]:
import keras_tuner as kt

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=200,
                     factor=3,
                     directory='dir',
                     project_name='BTL_AI')

Reloading Tuner from dir\BTL_AI\tuner0.json


In [11]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

In [12]:
tuner.search(X_train, y_train, epochs=200, validation_split=0.2, callbacks=[early_stopping])

In [13]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [14]:
best_model = tuner.get_best_models(num_models=1)[0]
print(best_model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 8)                 0         
                                                                 
 dense (Dense)               (None, 864)               7776      
                                                                 
 dense_1 (Dense)             (None, 864)               747360    
                                                                 
 dense_2 (Dense)             (None, 1)                 865       
                                                                 
Total params: 756001 (2.88 MB)
Trainable params: 756001 (2.88 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [15]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, validation_split=0.2,
                    callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


In [16]:
weights = best_model.get_weights()
print(weights)

[array([[ 0.08722449, -0.06033567, -0.07263881, ..., -0.04102999,
        -0.04065112,  0.02813848],
       [-0.11356807, -0.05025801, -0.06280798, ...,  0.07084446,
         0.01555599, -0.09209627],
       [-0.03268648,  0.00149031,  0.03181622, ..., -0.07889625,
         0.04198453,  0.0613853 ],
       ...,
       [ 0.03600121,  0.07062747,  0.01881161, ..., -0.03458525,
         0.04421854,  0.05278911],
       [ 0.09497949,  0.02924438, -0.01719861, ..., -0.06155599,
        -0.00596125,  0.00202936],
       [-0.00821695, -0.08559094,  0.0167903 , ...,  0.01711818,
         0.05130079,  0.00742987]], dtype=float32), array([-1.02262367e-02,  4.00574543e-02,  7.45215220e-03, -4.13251631e-02,
        0.00000000e+00,  3.17711271e-02,  1.34358145e-02, -3.26653458e-02,
        1.97532903e-02, -3.58848646e-02,  9.27947927e-03, -3.12144216e-03,
       -8.37935321e-03, -2.96257623e-02, -3.92575143e-03, -3.97920310e-02,
       -2.48953197e-02, -2.91972347e-02,  1.80339813e-02,  3.92031036e

In [25]:
from keras.models import load_model
best_model.save("best_model.h5")

  saving_api.save_model(


In [26]:
import tensorflow_model_optimization as tfmot

quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(best_model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

q_aware_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_1 (Quantize  (None, 8)                 3         
 Layer)                                                          
                                                                 
 quant_flatten (QuantizeWra  (None, 8)                 1         
 pperV2)                                                         
                                                                 
 quant_dense (QuantizeWrapp  (None, 864)               7781      
 erV2)                                                           
                                                                 
 quant_dense_1 (QuantizeWra  (None, 864)               747365    
 pperV2)                                                         
                                                                 
 quant_dense_2 (QuantizeWra  (None, 1)                 8

In [27]:
q_aware_model.fit(X_train, y_train, epochs=1)



<keras.src.callbacks.History at 0x2785d709ee0>

In [28]:
q_aware_model.evaluate(X_test, y_test)



[0.5255584120750427, 0.7745664715766907]

In [29]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_qaware_model = converter.convert()

INFO:tensorflow:Assets written to: C:\Users\PHONG\AppData\Local\Temp\tmpedqtcjki\assets


INFO:tensorflow:Assets written to: C:\Users\PHONG\AppData\Local\Temp\tmpedqtcjki\assets


In [30]:
len(tflite_qaware_model)

764664

In [31]:
with open("tflite_qaware_model.tflite", 'wb') as f:
    f.write(tflite_qaware_model)