# HyperPerimeter Tuning for the above model...
## pip install keras-tuner

In [None]:
import sqlite3 as sql
from keras.src.models import Sequential 
from keras.src.layers import Dense, Dropout, Input
from keras_tuner import RandomSearch 
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
from keras.src.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.src.utils import to_categorical
import pandas as pd 
import pickle as pkl

In [24]:
## Spliting data into train and test data

## Read the raw data from sql table.

conn = sql.connect('../DataBase/TrainingData.db')
query="SELECT * FROM HealthData"
df = pd.read_sql_query(query, conn)

df = df.dropna(subset=['NObeyesdad'])  # Remove rows where the target is missing


## Defining the numerical, ordinal and nominal features.
num_cols = [col for col in df.columns if df[col].dtype != 'object']

nominal_cols = ['Gender', 'family_history_with_overweight', 'FAVC', 'SMOKE', 'SCC', 'MTRANS']

ordinal_cols = [ 'CALC', 'CAEC']

target_col = ['NObeyesdad']

## Building pipeline for data transformation

# For numerical features: impute with median and scale the data
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# For nominal features: impute with most frequent and encoded respectively 
nominal_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('OneHotEncoder', OneHotEncoder(handle_unknown='ignore'))
])

# For nordinal features: impute with most frequent and encoded respectively 
ordinal_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('OrdinalEncoder', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1))
])


# ColumnTransformer to apply different transformations to different columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, num_cols),
        ('nom', nominal_transformer, nominal_cols),
        ('ord', ordinal_transformer, ordinal_cols),
    ]
)

target_category_rank = {
    'Normal_Weight': 1,
    'Insufficient_Weight': 2,
    'Overweight_Level_I': 3,
    'Overweight_Level_II': 4,
    'Obesity_Type_I': 5,
    'Obesity_Type_II': 6,
    'Obesity_Type_III': 7
}

df['NObeyesdad'] = df['NObeyesdad'].replace(target_category_rank)
# Fit the preprocessor on the training data
# Fit and transform the features

df_transformed = preprocessor.fit_transform(df)


  df['NObeyesdad'] = df['NObeyesdad'].replace(target_category_rank)


In [25]:
# Extract feature names from the transformers
# For numerical features, the feature names remain the same
num_feature_names = num_cols

# For nominal features, extract feature names from OneHotEncoder
nominal_feature_names = preprocessor.transformers_[1][1].named_steps['OneHotEncoder'].get_feature_names_out(nominal_cols)

# For ordinal features, the feature names remain the same
ordinal_feature_names = ordinal_cols

# Combine all feature names
inputs_feature_names = num_feature_names + nominal_feature_names.tolist() + ordinal_feature_names


input_df = pd.DataFrame(df_transformed, columns=inputs_feature_names)
input_df.head()

Unnamed: 0,Age,Height,Weight,FCVC,NCP,CH2O,FAF,TUE,Gender_Female,Gender_Male,...,SMOKE_yes,SCC_no,SCC_yes,MTRANS_Automobile,MTRANS_Bike,MTRANS_Motorbike,MTRANS_Public_Transportation,MTRANS_Walking,CALC,CAEC
0,-0.522124,-0.875589,-0.862558,-0.785019,0.404153,-0.013073,-1.188039,0.561997,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,2.0
1,-0.522124,-1.947599,-1.168077,1.088342,0.404153,1.618759,2.33975,-1.080625,1.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,2.0,2.0
2,-0.206889,1.054029,-0.36609,-0.785019,0.404153,-0.013073,1.16382,0.561997,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0
3,0.423582,1.054029,0.015808,1.088342,0.404153,-0.013073,1.16382,-1.080625,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0
4,-0.364507,0.839627,0.12274,-0.785019,-2.167023,-0.013073,-1.188039,-1.080625,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,2.0


In [31]:
X=input_df
y=df['NObeyesdad']
x_train,  x_test, y_train , y_test =train_test_split(X, y, test_size=0.2, random_state=42)


In [44]:

y_train_hp_cat = to_categorical(y_train)
y_test_hp_cat = to_categorical(y_test)

y_test_hp_cat.shape

(423, 8)

In [38]:
## Define the model-building function 

def build_model(hp):
    
    model = Sequential()
    
    # Input layer with tunable number of neurone
    model.add(Input(shape=(x_train.shape[1],)))
    model.add(Dense(units=hp.Int('units_input', min_value=32, max_value=256, step=32),
                    activation='relu'))
    
    # first hidden layer with tunable units and dropout
    model.add(Dense(units=hp.Int('units_hidden1', min_value=64, max_value=256, step=32), activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_hidden1', min_value=0.2, max_value=0.5, step=0.1)))
    
    # second hidden layer
    model.add(Dense(units=hp.Int('unit_hidden2', min_value=32, max_value=128, step=32), activation='relu'))
    model.add(Dropout(rate=hp.Float('unit_dropout2', min_value=0.2, max_value=0.5, step=0.1)))
    
    # third hidden layer 
    model.add(Dense(units=hp.Int('unit_hidden3', min_value=16, max_value=64, step=32), activation='relu'))
    model.add(Dropout(rate=hp.Float('unit_dropout3', min_value=0.2, max_value=0.5, step=0.1)))
    
    # Output layer
    model.add(Dense(y_train_hp_cat.shape[1], activation='softmax'))
    
    # Compie the model with a tunable learning rate
    model.compile(optimizer=Adam( learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5])),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [39]:
## Initialize the tuner 

tuner = RandomSearch(build_model,
                     objective='val_accuracy',
                     max_trials=5,
                     executions_per_trial=3,
                     directory='hyperparameter_tuning',
                     project_name='Obesity Classification')

In [40]:
## Start the search using your training data and validation data.

tuner.search(x_train, y_train_hp_cat, epochs=100, validation_split=0.2)

Trial 5 Complete [00h 01m 17s]
val_accuracy: 0.5670611262321472

Best val_accuracy So Far: 0.9773175319035848
Total elapsed time: 00h 06m 25s


In [46]:
## Selecting the best model.

best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best hyperparameters:", best_hyperparameters.values)


Best hyperparameters: {'units_input': 32, 'units_hidden1': 224, 'dropout_hidden1': 0.2, 'unit_hidden2': 32, 'unit_dropout2': 0.4, 'unit_hidden3': 48, 'unit_dropout3': 0.4, 'learning_rate': 0.01}


  saveable.load_own_variables(weights_store.get(inner_path))


In [47]:
## evaluate on test data
test_loss, test_accuracy = best_model.evaluate(x_test, y_test_hp_cat)
print(f"Test accuracy of the best model: {test_accuracy:.2f}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9441 - loss: 0.1972
Test accuracy of the best model: 0.95
