In [1]:
# Import our dependencies
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

# File to Load (Remember to Change These)
train_data = "cleaned_train.csv"

In [2]:
df_train_clean = pd.read_csv(train_data)

In [3]:
df_train_clean[0:5]

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,...,country_of_residence_United States,country_of_residence_Viet Nam,used_app_before_no,used_app_before_yes,test_taker_?,test_taker_Health care professional,test_taker_Others,test_taker_Parent,test_taker_Relative,test_taker_Self
0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [4]:
# Split our preprocessed data into our features and target arrays
y = df_train_clean.Autism_Diagnosis.values
X = df_train_clean.drop(columns="Autism_Diagnosis").values


# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Import the RandomOverSampler module form imbalanced-learn
from imblearn.over_sampling import RandomOverSampler

# Instantiate the random oversampler model
# # Assign a random_state parameter of 1 to the model
ROS = RandomOverSampler(random_state=1)

# Fit the original training data to the random_oversampler model
X_resampled, y_resampled= ROS.fit_resample(X_train_scaled, y_train)

In [7]:
# Count the distinct values of the resampled labels data
y_resampled[:5]

array([0., 1., 0., 1., 0.])

In [8]:
# X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_resampled, y_resampled, random_state=1)

In [9]:
series = pd.Series(y_resampled)
series.value_counts()

0.0    464
1.0    464
dtype: int64

In [10]:
!pip install keras-tuner

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5


In [11]:
# Create a method that creates a new Sequential model with hyperparameter options
input_features = X_resampled.shape[1]

def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value= 100,
        step=10), activation=activation, input_dim=input_features))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=50,
            step=10),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

  # Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=50,
    hyperband_iterations=2)

# Run the kerastuner search for best hyperparameters
tuner.search(X_resampled,y_resampled,epochs=50,validation_data=(X_test_scaled,y_test))

# Top 3 model hyperparameters
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
  print(param.values)

# Top 3 models
top_model = tuner.get_best_models(3)
for model in top_model:
  model_loss, model_accuracy = model.evaluate(X_test_scaled,y_test,verbose=2)
  print(f'Loss: {model_loss}, Accuracy: {model_accuracy}')

# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters()[0]
best_hyper.values

# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")



Trial 180 Complete [00h 00m 11s]
val_accuracy: 0.8500000238418579

Best val_accuracy So Far: 0.8899999856948853
Total elapsed time: 00h 11m 23s
{'activation': 'relu', 'first_units': 81, 'num_layers': 3, 'units_0': 41, 'units_1': 31, 'units_2': 1, 'units_3': 31, 'units_4': 1, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}
{'activation': 'relu', 'first_units': 31, 'num_layers': 4, 'units_0': 31, 'units_1': 31, 'units_2': 31, 'units_3': 11, 'units_4': 31, 'tuner/epochs': 17, 'tuner/initial_epoch': 6, 'tuner/bracket': 3, 'tuner/round': 2, 'tuner/trial_id': '0037'}
{'activation': 'relu', 'first_units': 91, 'num_layers': 3, 'units_0': 21, 'units_1': 41, 'units_2': 1, 'units_3': 1, 'units_4': 11, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0157'}
7/7 - 0s - loss: 0.5376 - accuracy: 0.8900 - 256ms/epoch - 37ms/step
Loss: 0.5375719666481018, Accuracy: 0.8899999856948853
7/

In [12]:
# Export our model to HDF5 file
best_model.save("Neural_Network_Optimized_Resampled.h5")

In [13]:
y_predict = best_model.predict(X_test_scaled)

import numpy as np
Y_predict = np.round(y_predict)

from sklearn.metrics import classification_report
# Create and save the training classification report
training_report = classification_report(y_test, Y_predict)

# Print the training classification report
print(training_report)

              precision    recall  f1-score   support

         0.0       0.94      0.91      0.93       151
         1.0       0.75      0.84      0.79        49

    accuracy                           0.89       200
   macro avg       0.85      0.87      0.86       200
weighted avg       0.90      0.89      0.89       200

