In [25]:
import random
import numpy as np
import os
import tensorflow as tf

# Set the global seed
_GLOBAL_SEED = 42
random.seed(_GLOBAL_SEED)

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(_GLOBAL_SEED)

# 2. Set `python` built-in pseudo-random generator at a fixed value
random.seed(_GLOBAL_SEED)

# 3. Set `numpy` pseudo-random generator at a fixed value
np.random.seed(_GLOBAL_SEED)

# 4. Set `tensorflow` pseudo-random generator at a fixed value
tf.random.set_seed(_GLOBAL_SEED)


In [26]:
import pandas as pd

# Read the training dataset
train_df = pd.read_csv('../_Dataset/train_dataset.csv')

# Read the test dataset
test_df = pd.read_csv('../_Dataset/test_dataset.csv')


In [27]:
train_genetic_disorder_df = train_df.drop("disorder_subclass", axis=1)
test_genetic_disorder_df = test_df.drop("disorder_subclass", axis=1)

In [28]:
train_genetic_disorder_x = train_genetic_disorder_df.drop("genetic_disorder",axis=1)
train_genetic_disorder_y = train_genetic_disorder_df["genetic_disorder"]

test_genetic_disorder_x = test_genetic_disorder_df.drop("genetic_disorder",axis=1)
test_genetic_disorder_y = test_genetic_disorder_df["genetic_disorder"]

Normalize the dataset

In [29]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler and transform the training data
train_genetic_disorder_x = scaler.fit_transform(train_genetic_disorder_x)

# Use the same scaler to transform the test data
test_genetic_disorder_x = scaler.transform(test_genetic_disorder_x)

Split the data in training and validation

In [30]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

# Split data into training and validation sets
train_genetic_disorder_x, val_genetic_disorder_x, train_genetic_disorder_y, val_genetic_disorder_y = train_test_split(train_genetic_disorder_x, train_genetic_disorder_y, test_size=0.1, random_state=_GLOBAL_SEED)

train_genetic_disorder_y = to_categorical(train_genetic_disorder_y)
val_genetic_disorder_y = to_categorical(val_genetic_disorder_y)
test_generic_disorder_y = to_categorical(test_genetic_disorder_y)

Create the model

In [31]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

_NUM_CLASSES = 3
_TOTAL_EPOCHES = 10

# Define a function that creates a model
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Dense(units=train_genetic_disorder_x.shape[1], activation='relu'))  # First layer with number of neurons equal to number of input features
    for i in range(hp.Int('num_layers', 1, 20)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=256,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(_NUM_CLASSES, activation='softmax'))  # _NUM_CLASSES is the number of classes
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [2e-2, 2e-3, 2e-4])), # , 2e-2, 2e-3, 2e-4
        loss='categorical_crossentropy',  # or 'sparse_categorical_crossentropy'
        metrics=['accuracy'])
    return model

In [32]:
from tensorflow.keras.callbacks import LearningRateScheduler


# Define a learning rate scheduler
def scheduler(epoch, lr):
  if epoch < int(0.2 * _TOTAL_EPOCHES):  # 10% of total epochs
    return lr
  else:
    return lr * tf.math.exp(-0.1)  # decrease the learning rate

lr_callback = LearningRateScheduler(scheduler)


Start parameters tunning and train the model

In [33]:
from keras_tuner.tuners import RandomSearch

# Define a tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='nn_models',
    project_name='genetic_disorder',
    seed=_GLOBAL_SEED
)

# Perform hyperparameter search
tuner.search(
    train_genetic_disorder_x, train_genetic_disorder_y,
    epochs=_TOTAL_EPOCHES,
    validation_data=(val_genetic_disorder_x, val_genetic_disorder_y),
    # callbacks=[lr_callback]
)

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hps)
history = model.fit(train_genetic_disorder_x, train_genetic_disorder_y, epochs=_TOTAL_EPOCHES, validation_data=(val_genetic_disorder_x, val_genetic_disorder_y))

Reloading Tuner from nn_models\genetic_disorder\tuner0.json
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [35]:
import numpy as np
from sklearn.metrics import recall_score, mean_squared_error

# Evaluate the model on the testing dataset
test_loss, test_accuracy = model.evaluate(test_genetic_disorder_x, test_genetic_disorder_y)

# Make predictions on the testing dataset
test_predictions = model.predict(test_genetic_disorder_x)

# Convert the predictions to class labels
test_predictions = np.argmax(test_predictions, axis=1)

# Calculate the recall
test_recall = recall_score(test_genetic_disorder_y, test_predictions, average='macro')

# Calculate the mean squared error
test_mse = mean_squared_error(test_genetic_disorder_y, test_predictions)

# Print the results
print("Test Accuracy:", test_accuracy)
print("Test Recall:", test_recall)
print("Test Mean Squared Error:", test_mse)


Test Accuracy: 0.567313015460968
Test Recall: 0.49316649040113614
Test Mean Squared Error: 1.2839335180055402
