In [1]:
import os
import autokeras as ak
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)


Using TensorFlow backend


# Structured Data

In [2]:
X_y = pd.read_csv(os.path.join(os.path.pardir, 'autotrain', 'datasets', 'structured-data-classification.csv'))
_, features_nums = X_y.shape
X = X_y.iloc[:, 0:(features_nums - 1)].to_numpy()
y = X_y.iloc[:, -1].to_numpy()

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)

In [3]:
clf = ak.StructuredDataClassifier(
    max_trials=5,
    overwrite=True,
)

history = clf.fit(
    x=x_train,
    y=y_train,
    epochs=20,
    validation_data=(x_val, y_val),
)

Trial 5 Complete [00h 00m 01s]
val_accuracy: 0.4375

Best val_accuracy So Far: 0.4375
Total elapsed time: 00h 00m 05s




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [4]:
evaluate_result = clf.evaluate(x=x_test, y=y_test, return_dict=True)
evaluate_result





{'loss': 1.1072760820388794, 'accuracy': 0.3499999940395355}

In [5]:
from typing import Dict, List, Any, Optional
# from pydantic import BaseModel
from dataclasses import dataclass
from keras.utils import plot_model

best_keras_model = clf.tuner.get_best_model()
try:
    model_file_path = os.path.join(clf.tuner.best_model_path, 'model.png')
    plot_model(best_keras_model, to_file=model_file_path, show_layer_activations=True, show_dtype=True, show_shapes=True, show_layer_names=False)
except:
    model_file_path = None

@dataclass
class BestModelTracker:
    history: Dict[str, Any]
    hyperparameters: Dict[str, Any]
    model_graph_url: Optional[str]

best_model_tracker = BestModelTracker(
    history=history.history,
    hyperparameters=clf.tuner.get_best_hyperparameters().pop().get_config(),
    model_graph_url=model_file_path
)
best_model_tracker.__dict__



{'history': {'loss': [1.1160433292388916,
   1.0939342975616455,
   1.0847777128219604,
   1.084676742553711,
   1.06989324092865,
   1.0823984146118164,
   1.0605242252349854,
   1.0604743957519531,
   1.0702459812164307,
   1.0424621105194092,
   1.0393513441085815,
   1.0317610502243042,
   1.0327776670455933,
   1.0421007871627808,
   1.0250943899154663,
   1.0374301671981812,
   1.0041248798370361,
   1.0292603969573975,
   0.9935396909713745,
   1.0245945453643799],
  'accuracy': [0.3375000059604645,
   0.34687501192092896,
   0.3687500059604645,
   0.38749998807907104,
   0.45625001192092896,
   0.39375001192092896,
   0.4375,
   0.4468750059604645,
   0.4124999940395355,
   0.46562498807907104,
   0.4468750059604645,
   0.503125011920929,
   0.4906249940395355,
   0.4625000059604645,
   0.5,
   0.46875,
   0.5406249761581421,
   0.4749999940395355,
   0.5375000238418579,
   0.4937500059604645],
  'val_loss': [1.1074541807174683,
   1.104579210281372,
   1.1042073965072632,
   1

# Trial

In [6]:
clf.tuner.oracle.get_trial('1').get_state()

{'trial_id': '1',
 'hyperparameters': {'space': [{'class_name': 'Boolean',
    'config': {'name': 'structured_data_block_1/normalize',
     'default': False,
     'conditions': []}},
   {'class_name': 'Boolean',
    'config': {'name': 'structured_data_block_1/dense_block_1/use_batchnorm',
     'default': False,
     'conditions': []}},
   {'class_name': 'Choice',
    'config': {'name': 'structured_data_block_1/dense_block_1/num_layers',
     'default': 2,
     'conditions': [],
     'values': [1, 2, 3],
     'ordered': True}},
   {'class_name': 'Choice',
    'config': {'name': 'structured_data_block_1/dense_block_1/units_0',
     'default': 32,
     'conditions': [],
     'values': [16, 32, 64, 128, 256, 512, 1024],
     'ordered': True}},
   {'class_name': 'Choice',
    'config': {'name': 'structured_data_block_1/dense_block_1/dropout',
     'default': 0.0,
     'conditions': [],
     'values': [0.0, 0.25, 0.5],
     'ordered': True}},
   {'class_name': 'Choice',
    'config': {'name'

In [7]:
from dataclasses import dataclass
from typing import Any, Optional, List, Dict


@dataclass 
class Trial:
    trial_id: str
    hyperparameters: Dict[str, Any]
    metrics: Dict[str, Any]
    score: float
    best_step: int
    status: str
    model_graph_url: Optional[str]
    message: Any

@dataclass 
class TrialsTracker:
    trials: List[Trial]

max_trials = 5
trials = []
models = clf.tuner.get_best_models(max_trials)
index = 0
for trial in clf.tuner.oracle.get_best_trials(max_trials):
    try:
        model_file_path = os.path.join(clf.tuner.get_trial_dir(trial_id=trial.trial_id), 'model.png')
        plot_model(model=models[index], to_file=model_file_path, show_layer_activations=True, show_dtype=True, show_shapes=True, show_layer_names=False)
    except:
        model_file_path = None
    index += 1
    trials.append(Trial(
        **trial.get_state(),
        model_graph_url=model_file_path
    ))
trials_tracker = TrialsTracker(trials=trials)
trials_tracker.__dict__




{'trials': [Trial(trial_id='4', hyperparameters={'space': [{'class_name': 'Boolean', 'config': {'name': 'structured_data_block_1/normalize', 'default': False, 'conditions': []}}, {'class_name': 'Boolean', 'config': {'name': 'structured_data_block_1/dense_block_1/use_batchnorm', 'default': False, 'conditions': []}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/num_layers', 'default': 2, 'conditions': [], 'values': [1, 2, 3], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/units_0', 'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512, 1024], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/dropout', 'default': 0.0, 'conditions': [], 'values': [0.0, 0.25, 0.5], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'structured_data_block_1/dense_block_1/units_1', 'default': 32, 'conditions': [], 'values': [16, 32, 64, 12

In [2]:
import os
import pandas as pd

inputs = os.path.join(os.path.pardir, 'autotrain', 'datasets', 'structured-data-classification.csv')

datasets = pd.read_csv(inputs)
best_feature_index = [-1]
best_feature_index.append(-1)
extracted_datasets = datasets.iloc[:, best_feature_index]
print(extracted_datasets)

extracted_file_name = '-'.join(['extracted', os.path.basename(inputs)])
parent_dir = os.path.dirname(inputs)

extracted_file_path = os.path.join(parent_dir, extracted_file_name)
print(extracted_file_path)

extracted_datasets.to_csv(extracted_file_path, index=False)

     淬透性  淬透性
0      0    0
1      1    1
2      0    0
3     -1   -1
4     -1   -1
..   ...  ...
495   -1   -1
496    0    0
497    0    0
498    1    1
499    0    0

[500 rows x 2 columns]
../autotrain/datasets/extracted-structured-data-classification.csv
