In [5]:
import warnings
from collections import namedtuple
from pathlib import Path

import autokeras as ak
import keras.backend as K
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

In [6]:
base_path = Path('dataset/dataset_versions')

datasets = {}
datasets_names = (
    'bfill_ffill',
    'linear_interpolation',
    'cubic_interpolation',
    'quadratic_interpolation',
    'polynomial_5_interpolation',
    'polynomial_7_interpolation',
    'polynomial_9_interpolation',
    'polynomial_11_interpolation',
)
for dataset_name in datasets_names:
    dataset = pd.read_excel(base_path / f'{dataset_name}_rescaled_dataset.xlsx')
    datasets[dataset_name] = dataset.iloc[:, 1:]

In [7]:
test_size = 0.2
seed = 7
target_feature_name = 'GDP per capita (current US$)'

SplittedDataset = namedtuple('SplittedDataset', ['name', 'x_train', 'y_train', 'x_test', 'y_test'])
splited_datasets = []

for dataset_name, dataset in datasets.items():
    model = dict()
    model['name'] = dataset_name
    data_x = dataset.drop([target_feature_name], axis=1)
    data_y = dataset[target_feature_name]
    model['x_train'], model['x_test'], model['y_train'], model['y_test'] = train_test_split(data_x, data_y, test_size=test_size, random_state=seed)
    splited_datasets.append(SplittedDataset(model['name'], model['x_train'],  model['y_train'], model['x_test'], model['y_test']))

In [4]:
def r2_score_custom(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    r2 = 1 - SS_res/(SS_tot + K.epsilon())
    return r2

def r2_loss(y_true, y_pred):
    return -r2_score_custom(y_true, y_pred)

for dataset in splited_datasets:
    clf = ak.StructuredDataRegressor(
        max_trials=500,
        project_name=f'models_for_{dataset.name}',
        loss=r2_loss,
        metrics=[r2_score_custom])
    clf.fit(dataset.x_train, dataset.y_train)

Trial 2 Complete [00h 00m 45s]
val_loss: -0.9201511144638062

Best val_loss So Far: -0.9201511144638062
Total elapsed time: 00h 01m 20s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
False             |False             |structured_data_block_1/normalize
False             |False             |structured_data_block_1/dense_block_1/use_batchnorm
1                 |2                 |structured_data_block_1/dense_block_1/num_layers
32                |32                |structured_data_block_1/dense_block_1/units_0
0                 |0                 |structured_data_block_1/dense_block_1/dropout
32                |32                |structured_data_block_1/dense_block_1/units_1
0                 |0                 |regression_head_1/dropout
adam              |adam              |optimizer
0.001             |0.001             |learning_rate



KeyboardInterrupt: 

In [None]:
"""
from keras.models import load_model

# Загрузка модели
model = load_model('models_for_polynomial_11_interpolation/best_model', custom_objects={"r2_loss": r2_loss, "r2_score_custom": r2_score_custom})
test_predict = model.predict(dataset.x_test)

rmse = np.sqrt(mean_squared_error(dataset.y_test, test_predict))
r2 = r2_score(dataset.y_test, test_predict)
print(r2, rmse)
"""