In [None]:
!wget -q http://archive.ics.uci.edu/static/public/45/heart+disease.zip
!unzip -q heart+disease.zip -d dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

df = pd.read_csv('dataset/processed.cleveland.data', header=None, na_values='?')
columns = [
    'age', # the person's age in years
    'sex', # the person's sex (0 = female, 1 = male)
    'cp', # the chest pain experienced (1 = typical angina, 2 = atypical angina, 3 = non-anginal pain, 4 = asymptomatic)
    'trestbps', # the person's resting blood pressure in mm Hg
    'chol', # the person's cholesterol measurement in mg/dl
    'fbs', # the person's fasting blood sugar (> 120 mg/dl, 0 = false, 1 = true)
    'restecg', # resting electrocardiographic measurement (0 = normal, 1 = having ST-T wave abnormality, 2 = showing probable or definite left ventricular hypertrophy by Estes' criteria)
    'thalach', # the person's maximum heart rate achieved
    'exang', # exercise induces angina (0 = no, 1 = true)
    'oldpeak', # ST depression induced by exercise relative to rest
    'slope', # the slope of the peak exercise ST segment (1 = upsloping, 2 = flat, 3 = downsloping)
    'ca', # the number of major vessels (0-3)
    'thal', # a blood disorder called thalassemia (3 = normal, 6 = fixed defect, 7 = reversable defect)
    'target', # heart disease (0 = no, 1,2,3,4 = yes, different types)
]

# the column types are defined by the dataset description
column_types = ['float32', 'int64', 'int64', 'float32', 'float32', 'int64', 'int64',
                'float32', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64']

columns_dict = dict(zip(df.columns, columns))

df.rename(columns=columns_dict, inplace=True)

mode_ca = df['ca'].mode().item()
df['ca'].fillna(mode_ca, inplace=True)
mode_thal = df['thal'].mode().item()
df['thal'].fillna(mode_thal, inplace=True)

column_types_dict = dict(zip(df.columns, column_types))

df = df.astype(column_types_dict)

df['target'].value_counts()

df['target'] = df['target'].apply(lambda value: 1 if value > 0 else 0)

X =  df.drop('target', axis=1).values # features
y = df['target'].values # label

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [None]:
# és aztán telepítsük magát a keras-tuner-t is

!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.6 kt-legacy-1.0.5


In [None]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam, SGD


In [None]:
def build_model(hp):
  model = Sequential()
  model.add(Dense(
      hp.Int('dense_1_units', min_value=8, max_value=64),
      activation=hp.Choice('act_1', values=['tanh', 'sigmoid']),
      input_shape=(X_train.shape[1], )
  ))
  model.add(Dropout(
      hp.Float('dropout_1', min_value=0.3, max_value=0.7)
  ))
  model.add(Dense(
      hp.Int('dense_2_units', min_value=8, max_value=64),
      activation=hp.Choice('act_2', values=['tanh', 'sigmoid'])
  ))
  model.add(Dropout(
      hp.Float('dropout_2', min_value=0.3, max_value=0.7)
  ))
  model.add(Dense(
      hp.Int('dense_3_units', min_value=8, max_value=64),
      activation=hp.Choice('act_3', values=['tanh', 'sigmoid'])
  ))
  model.add(Dropout(
      hp.Float('dropout_3', min_value=0.3, max_value=0.7)
  ))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(
      optimizer=SGD(hp.Choice('learning_rate', values=[0.01, 0.001, 0.0001])),
      loss='binary_crossentropy',
      metrics=['accuracy']
  )
  return model


In [None]:
from keras_tuner.tuners import Hyperband

tuner = Hyperband(
    build_model,
    objective='val_accuracy',
    factor=5,
    max_epochs=25,
    directory='output',
    project_name='HyperparameterOptimalizationDemo'
)

In [None]:
tuner.search_space_summary()

Search space summary
Default search space size: 10
dense_1_units (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 64, 'step': 1, 'sampling': 'linear'}
act_1 (Choice)
{'default': 'tanh', 'conditions': [], 'values': ['tanh', 'sigmoid'], 'ordered': False}
dropout_1 (Float)
{'default': 0.3, 'conditions': [], 'min_value': 0.3, 'max_value': 0.7, 'step': None, 'sampling': 'linear'}
dense_2_units (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 64, 'step': 1, 'sampling': 'linear'}
act_2 (Choice)
{'default': 'tanh', 'conditions': [], 'values': ['tanh', 'sigmoid'], 'ordered': False}
dropout_2 (Float)
{'default': 0.3, 'conditions': [], 'min_value': 0.3, 'max_value': 0.7, 'step': None, 'sampling': 'linear'}
dense_3_units (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 64, 'step': 1, 'sampling': 'linear'}
act_3 (Choice)
{'default': 'tanh', 'conditions': [], 'values': ['tanh', 'sigmoid'], 'ordered': False}
dropout_3 (Float)
{'default

In [None]:
tuner.search(X_train, y_train, epochs=25, validation_data=(X_val, y_val))

Trial 44 Complete [00h 00m 04s]
val_accuracy: 0.5600000023841858

Best val_accuracy So Far: 0.8399999737739563
Total elapsed time: 00h 01m 27s


In [None]:
model = tuner.get_best_models(num_models=1)[0]
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 31)                434       
                                                                 
 dropout (Dropout)           (None, 31)                0         
                                                                 
 dense_1 (Dense)             (None, 47)                1504      
                                                                 
 dropout_1 (Dropout)         (None, 47)                0         
                                                                 
 dense_2 (Dense)             (None, 23)                1104      
                                                                 
 dropout_2 (Dropout)         (None, 23)                0         
                                                                 
 dense_3 (Dense)             (None, 1)                 2

In [None]:
params_best = tuner.get_best_hyperparameters(num_trials=1)[0]
params_best.get_config()['values']

{'dense_1_units': 31,
 'act_1': 'relu',
 'dropout_1': 0.48487879341822315,
 'dense_2_units': 47,
 'act_2': 'relu',
 'dropout_2': 0.4782119557575787,
 'dense_3_units': 23,
 'act_3': 'swish',
 'dropout_3': 0.5932163354594445,
 'learning_rate': 0.001,
 'tuner/epochs': 25,
 'tuner/initial_epoch': 5,
 'tuner/bracket': 2,
 'tuner/round': 2,
 'tuner/trial_id': '0025'}

In [None]:
tuner.results_summary()

Results summary
Results in output/HyperparameterOptimalizationDemo
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0030 summary
Hyperparameters:
dense_1_units: 31
act_1: relu
dropout_1: 0.48487879341822315
dense_2_units: 47
act_2: relu
dropout_2: 0.4782119557575787
dense_3_units: 23
act_3: swish
dropout_3: 0.5932163354594445
learning_rate: 0.001
tuner/epochs: 25
tuner/initial_epoch: 5
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: 0025
Score: 0.8799999952316284

Trial 0025 summary
Hyperparameters:
dense_1_units: 31
act_1: relu
dropout_1: 0.48487879341822315
dense_2_units: 47
act_2: relu
dropout_2: 0.4782119557575787
dense_3_units: 23
act_3: swish
dropout_3: 0.5932163354594445
learning_rate: 0.001
tuner/epochs: 5
tuner/initial_epoch: 1
tuner/bracket: 2
tuner/round: 1
tuner/trial_id: 0016
Score: 0.8399999737739563

Trial 0031 summary
Hyperparameters:
dense_1_units: 10
act_1: relu
dropout_1: 0.563305004905992
dense_2_units: 53
act_2: swish
dropout_2: 0.35883