In [1]:
import os
import autokeras as ak
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)


Using TensorFlow backend


# Structured Data

In [2]:
X_y = pd.read_csv(os.path.join(os.path.pardir, 'autotrain', 'datasets', 'structured-data-classification.csv'))
_, features_nums = X_y.shape
X = X_y.iloc[:, 0:(features_nums - 1)].to_numpy()
y = X_y.iloc[:, -1].to_numpy()

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)

In [3]:
clf = ak.StructuredDataClassifier(
    max_trials=5,
    overwrite=True,
)

history = clf.fit(
    x=x_train,
    y=y_train,
    epochs=20,
    validation_data=(x_val, y_val),
)

Trial 5 Complete [00h 00m 01s]
val_accuracy: 0.42500001192092896

Best val_accuracy So Far: 0.44999998807907104
Total elapsed time: 00h 00m 05s




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [4]:
evaluate_result = clf.evaluate(x=x_test, y=y_test, return_dict=True)
evaluate_result



{'loss': 1.2779545783996582, 'accuracy': 0.3400000035762787}

In [5]:
from typing import Dict, List, Any, Optional
# from pydantic import BaseModel
from dataclasses import dataclass
from keras.utils import plot_model

best_keras_model = clf.tuner.get_best_model()
try:
    model_file_path = os.path.join(clf.tuner.best_model_path, 'model.png')
    plot_model(best_keras_model, to_file=model_file_path, show_layer_activations=True, show_dtype=True, show_shapes=True, show_layer_names=False)
except:
    model_file_path = None

@dataclass
class BestModelTracker:
    history: Dict[str, Any]
    hyperparameters: Dict[str, Any]
    model_graph_url: Optional[str]

best_model_tracker = BestModelTracker(
    history=history.history,
    hyperparameters=clf.tuner.get_best_hyperparameters().pop().get_config(),
    model_graph_url=model_file_path
)
best_model_tracker.__dict__

{'history': {'loss': [164.90606689453125,
   74.91258239746094,
   23.970430374145508,
   12.627298355102539,
   5.86357307434082,
   4.157534599304199,
   2.820606231689453,
   1.8201239109039307,
   1.480838418006897,
   1.3322709798812866,
   1.3129148483276367,
   1.3206045627593994,
   1.2836964130401611,
   1.270628571510315,
   1.288847804069519,
   1.3144731521606445,
   1.3208401203155518,
   1.32156240940094,
   1.3346840143203735,
   1.3481805324554443],
  'accuracy': [0.3687500059604645,
   0.3187499940395355,
   0.3343749940395355,
   0.30937498807907104,
   0.34687501192092896,
   0.3375000059604645,
   0.28125,
   0.31562501192092896,
   0.265625,
   0.34687501192092896,
   0.31562501192092896,
   0.33125001192092896,
   0.34062498807907104,
   0.3375000059604645,
   0.29374998807907104,
   0.28437501192092896,
   0.29374998807907104,
   0.3031249940395355,
   0.3031249940395355,
   0.30937498807907104],
  'val_loss': [97.84558868408203,
   47.17424011230469,
   10.93836

# Trial

In [6]:
clf.tuner.oracle.get_trial('1').get_state()

{'trial_id': '1',
 'hyperparameters': {'space': [{'class_name': 'Boolean',
    'config': {'name': 'structured_data_block_1/normalize',
     'default': False,
     'conditions': []}},
   {'class_name': 'Boolean',
    'config': {'name': 'structured_data_block_1/dense_block_1/use_batchnorm',
     'default': False,
     'conditions': []}},
   {'class_name': 'Choice',
    'config': {'name': 'structured_data_block_1/dense_block_1/num_layers',
     'default': 2,
     'conditions': [],
     'values': [1, 2, 3],
     'ordered': True}},
   {'class_name': 'Choice',
    'config': {'name': 'structured_data_block_1/dense_block_1/units_0',
     'default': 32,
     'conditions': [],
     'values': [16, 32, 64, 128, 256, 512, 1024],
     'ordered': True}},
   {'class_name': 'Choice',
    'config': {'name': 'structured_data_block_1/dense_block_1/dropout',
     'default': 0.0,
     'conditions': [],
     'values': [0.0, 0.25, 0.5],
     'ordered': True}},
   {'class_name': 'Choice',
    'config': {'name'

In [7]:
from dataclasses import dataclass
from typing import Any, Optional, List, Dict


@dataclass 
class Trial:
    trial_id: str
    hyperparameters: Dict[str, Any]
    metrics: Dict[str, Any]
    score: float
    best_step: int
    status: str
    model_graph_url: Optional[str]
    message: Any

@dataclass 
class TrialsTracker:
    trials: List[Trial]

max_trials = 5
trials = []
models = clf.tuner.get_best_models(max_trials)
index = 0
for trial in clf.tuner.oracle.get_best_trials(max_trials):
    try:
        model_file_path = os.path.join(clf.tuner.get_trial_dir(trial_id=trial.trial_id), 'model.png')
        plot_model(model=models[index], to_file=model_file_path, show_layer_activations=True, show_dtype=True, show_shapes=True, show_layer_names=False)
    except:
        model_file_path = None
    index += 1
    trials.append(Trial(
        **trial.get_state(),
        model_graph_url=model_file_path
    ))
trials_tracker = TrialsTracker(trials=trials)
trials_tracker.__dict__




{'trials': [Trial(trial_id='2', hyperparameters={'space': [{'class_name': 'Boolean', 'config': {'name': 'structured_data_block_1/normalize', 'default': False, 'conditions': []}}, {'class_name': 'Boolean', 'config': {'name': 'structured_data_block_1/dense_block_1/use_batchnorm', 'default': False, 'conditions': []}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/num_layers', 'default': 2, 'conditions': [], 'values': [1, 2, 3], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/units_0', 'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512, 1024], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/dropout', 'default': 0.0, 'conditions': [], 'values': [0.0, 0.25, 0.5], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/units_1', 'default': 32, 'conditions': [], 'values': [16, 32, 64, 12