In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, mixed_precision, optimizers

from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os

import keras_tuner as kt

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)
print('Compute dtype:', policy.compute_dtype)
print('Variable dtype:', policy.variable_dtype)

root_dir = "E:\\Repositories\\personal-projects\\ai-ml-projects\\bmi-classification"
os.chdir(root_dir)

data_path = os.path.join(root_dir, "data", "processed")
log_dir = os.path.join(root_dir, "logs")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3060 Laptop GPU, compute capability 8.6
Compute dtype: float16
Variable dtype: float32


In [2]:
class Dataset:
    def __init__(self, data_path):
        self.data_path = data_path

    def load_data(self):
        data_frames = []
        for file in os.listdir(self.data_path):
            if file.endswith(".csv"):
                data_buffer = pd.read_csv(os.path.join(self.data_path, file))
                data_frames.append(data_buffer)
        
        if not data_frames:
            print("No CSV files found in the specified directory.")
            return None
        
        # Combine all data frames into a single data frame
        combined_data = pd.concat(data_frames, ignore_index=True)
        
        return combined_data

dataset = Dataset(data_path)
data = dataset.load_data()

if data is not None:
    print(data.head())


     Gender    Height    Weight  Index_0  Index_1  Index_2  Index_3  Index_4  \
0  1.011174  0.235303 -0.310062      0.0      0.0      0.0      0.0      1.0   
1  1.011174  1.147330 -0.588376      0.0      0.0      1.0      0.0      0.0   
2 -0.988950  0.904123  0.122870      0.0      0.0      0.0      0.0      1.0   
3 -0.988950  1.512141 -0.062672      0.0      0.0      0.0      1.0      0.0   
4  1.011174 -1.284742 -1.392394      0.0      0.0      0.0      1.0      0.0   

   Index_5  
0      0.0  
1      0.0  
2      0.0  
3      0.0  
4      0.0  


In [37]:
# Ensure 'X' contains all feature columns except the one-hot encoded 'Index' columns
X = data.drop(columns=data.columns[-6:]).astype(np.float32)
y = data[data.columns[-6:]].astype(np.float32)

X_train, X_subset, y_train, y_subset = train_test_split(X, y, test_size=0.2, random_state=seed)
X_val, X_test, y_val, y_test = train_test_split(X_subset, y_subset, test_size=0.5, random_state=seed)


In [59]:
X_train

Unnamed: 0,Gender,Height,Weight
10,1.011173,1.512141,-0.773919
334,-0.988950,-0.798328,-1.547013
244,-0.988950,0.782519,-1.732556
678,-0.988950,1.329736,1.019660
306,1.011173,0.782519,1.452593
...,...,...,...
106,1.011173,-0.251112,1.669059
270,1.011173,-1.527950,0.741346
860,-0.988950,1.086528,-0.804843
435,1.011173,-0.129508,1.607211


In [4]:
def plot_loss_and_accuracy(history):
    plt.figure(figsize=(12, 5))

    # Plotting Loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.title('Loss Over Epochs')

    # Plotting Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history.history['mae'], label='Train mae')
    plt.plot(history.history['val_mae'], label='Validation mae')
    plt.xlabel('Epoch')
    plt.ylabel('mae')
    plt.legend()
    plt.grid(True)
    plt.title('mae Over Epochs')

    plt.tight_layout()
    plt.show()

In [5]:
class HyperModel(kt.HyperModel):
    def build(self, hp):
        model = keras.Sequential()
        model.add(layers.Input(shape=(X_train.shape[1],)))
        
        for i in range(hp.Int('num_layers', 2, 5)):
            model.add(layers.Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32),
                                   activation='relu'))
            if hp.Boolean(f'use_batchnorm_{i}'):
                model.add(layers.BatchNormalization())
            
        model.add(layers.Dropout(hp.Float('dropout', 0.1, 0.5, step=0.1)))
        model.add(layers.Dense(y_train.shape[1], activation='softmax'))

        optimizer_choice = hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop'])
        lr = hp.Float('learning_rate', 1e-7, 1e-3, sampling='log')

        if optimizer_choice == 'adam':
            optimizer = optimizers.Adam(learning_rate=lr)
        elif optimizer_choice == 'sgd':
            optimizer = optimizers.SGD(learning_rate=lr)
        else:
            optimizer = optimizers.RMSprop(learning_rate=lr)
        
        model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args, 
            batch_size=hp.Int('batch_size', 32, 128, step=32),
            shuffle=hp.Boolean('shuffle'),
            **kwargs)

In [6]:
# Function to clear GPU memory
def clear_gpu_memory():
    keras.backend.clear_session()
    
# Custom callback to clear session after each trial
class ClearMemory(tf.keras.callbacks.Callback):
    def on_train_end(self, logs=None):
        clear_gpu_memory()

# Define a Keras Tuner tuner
tuner = kt.BayesianOptimization(
    HyperModel(),
    objective='val_accuracy',
    max_trials=250,
    executions_per_trial=1,
    directory='logs/keras_tuner',
    project_name='BMI_Classification'
)

Reloading Tuner from logs/keras_tuner\BMI_Classification\tuner0.json


In [7]:
tuner.search_space_summary()

Search space summary
Default search space size: 16
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 5, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
use_batchnorm_0 (Boolean)
{'default': False, 'conditions': []}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
use_batchnorm_1 (Boolean)
{'default': False, 'conditions': []}
dropout (Float)
{'default': 0.1, 'conditions': [], 'min_value': 0.1, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
optimizer (Choice)
{'default': 'adam', 'conditions': [], 'values': ['adam', 'sgd', 'rmsprop'], 'ordered': False}
learning_rate (Float)
{'default': 1e-07, 'conditions': [], 'min_value': 1e-07, 'max_value': 0.001, 'step': None, 'sampling': 'log'}
units_2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sa

In [8]:
# Perform the hyperparameter search
tuner.search(X_train, y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[ClearMemory()])

Trial 250 Complete [00h 00m 16s]
val_accuracy: 0.9222221970558167

Best val_accuracy So Far: 0.9777777791023254
Total elapsed time: 00h 58m 06s


In [21]:
tuner.results_summary()

Results summary
Results in logs/keras_tuner\BMI_Classification
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 032 summary
Hyperparameters:
num_layers: 5
units_0: 480
use_batchnorm_0: False
units_1: 416
use_batchnorm_1: False
dropout: 0.2
optimizer: adam
learning_rate: 0.0006250560171450277
units_2: 64
use_batchnorm_2: False
units_3: 480
use_batchnorm_3: False
batch_size: 128
shuffle: False
units_4: 352
use_batchnorm_4: False
Score: 0.9777777791023254

Trial 129 summary
Hyperparameters:
num_layers: 2
units_0: 512
use_batchnorm_0: False
units_1: 512
use_batchnorm_1: True
dropout: 0.1
optimizer: adam
learning_rate: 0.001
units_2: 512
use_batchnorm_2: False
units_3: 32
use_batchnorm_3: False
batch_size: 32
shuffle: False
units_4: 32
use_batchnorm_4: False
Score: 0.9777777791023254

Trial 211 summary
Hyperparameters:
num_layers: 5
units_0: 512
use_batchnorm_0: False
units_1: 512
use_batchnorm_1: False
dropout: 0.1
optimizer: adam
learning_rate: 0.001
units_2: 

In [58]:
# Retrieve the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=10)[1]

# Print best hyperparameters
print(f"""
The optimal number of layers is {best_hps.get('num_layers')}.
The optimal number of units is {[best_hps.get(f'units_{i}') for i in range(best_hps.get('num_layers'))]}.
The optimal dropout rate is {best_hps.get('dropout')}.
The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
The optimal optimizer is {best_hps.get('optimizer')}.
The optimal batch size is {best_hps.get('batch_size')}.
The optimal shuffle value is {best_hps.get('shuffle')}.
""")

# Build the model with the optimal hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hps)
model.summary()


The optimal number of layers is 2.
The optimal number of units is [512, 512].
The optimal dropout rate is 0.1.
The optimal learning rate for the optimizer is 0.001.
The optimal optimizer is adam.
The optimal batch size is 32.
The optimal shuffle value is False.

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               2048      
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 batch_normalization (BatchN  (None, 512)              2048      
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                        

In [26]:
mixed_precision.set_global_policy('float32')

# Build the model with the best hyperparameters and train it
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[ClearMemory()], batch_size=best_hps.get('batch_size'))

# Evaluate the model
val_loss, val_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f'Validation accuracy: {val_accuracy}')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
3/3 - 0s - loss: 0.1631 - accuracy: 0.9444 - 52ms/epoch - 17ms/step
Validation accuracy: 0.9444444179534912
