In [1]:
from __future__ import print_function
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.datasets import mnist
from keras.utils import np_utils

import pandas as pd
import numpy as np

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold

import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
from pylab import rcParams
rcParams['figure.figsize'] = 35, 28

In [3]:
import keras.callbacks as kc

class Metrics(kc.Callback):
    def on_epoch_end(self, batch, logs={}):
        predict = np.asarray(self.model.predict(self.validation_data[0]))
        targ = self.validation_data[1]
        self.ginis=np.sqrt(np.square(np.log(predict + 1) - np.log(targ + 1)).mean())
        return
metrics = Metrics()

In [4]:
df_train = pd.read_csv("../data/train_new.csv")
np.log(df_train['formation_energy_ev_natom']+1)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df_train.values)
scaled_features_df = pd.DataFrame(scaled_features, index=df_train.index, columns=df_train.columns)
features = list(set(df_train.columns.tolist()) - set(['formation_energy_ev_natom',
                                                'bandgap_energy_ev', 
                                                'id',
                                                'lattice_angle_alpha_degree_r',
                                                'lattice_angle_beta_degree_r',
                                                'lattice_angle_alpha_degree_r']))

X = scaled_features_df[features].values

In [5]:
X.shape

(2400, 397)

In [6]:
Xr = VarianceThreshold(threshold=0.001).fit_transform(X)

In [7]:
Xr.shape

(2400, 296)

In [8]:
def data():
    '''
    Data providing function:

    This function is separated from model() so that hyperopt
    won't reload data for each evaluation run.
    '''
    df_train = pd.read_csv("../data/train_new.csv")
    df_train['formation_energy_ev_natom'] = np.log(df_train['formation_energy_ev_natom']+1)
    df_train['bandgap_energy_ev'] = np.log(df_train['bandgap_energy_ev']+1)

    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df_train.values)
    scaled_features_df = pd.DataFrame(scaled_features, index=df_train.index, columns=df_train.columns)
    

    # store these off for predictions with unseen data
    Xmeans = scaler.mean_
    Xstds = scaler.scale_

    features = list(set(df_train.columns.tolist()) - set(['formation_energy_ev_natom',
                                                'bandgap_energy_ev', 
                                                'id',
                                                'lattice_angle_alpha_degree_r',
                                                'lattice_angle_beta_degree_r',
                                                'lattice_angle_alpha_degree_r']))
    targets = ['formation_energy_ev_natom', 'bandgap_energy_ev']

    y_e = scaled_features_df['formation_energy_ev_natom']
    y_be = scaled_features_df['bandgap_energy_ev']

    X = scaled_features_df[features].values
    X = VarianceThreshold(threshold=0.5).fit_transform(X)
    
    #train_size = int(0.7 * X.shape[0])
    #X_train, X_test, Y_train, Y_test = X[0:train_size], X[train_size:], y_be[0:train_size], y_be[train_size:]
    X_train, X_test, Y_train, Y_test = train_test_split(X, y_be, test_size=0.3, random_state=23)
    return X_train, X_test, Y_train, Y_test, Xmeans, Xstds

In [9]:
def model():
#     # Model
#     class Metrics(kc.Callback):
#         def on_epoch_end(self, batch, logs={}):
#             predict = np.asarray(self.model.predict(self.validation_data[0]))
#             targ = self.validation_data[1]
#             self.ginis=np.sqrt(np.square(np.log(predict + 1) - np.log(targ + 1)).mean())
#             return
#     metrics = Metrics()
    model = Sequential()
    model.add(Dense({{choice([1224,1512,1748])}}, input_dim=296, kernel_initializer='glorot_uniform',bias_initializer='zeros', activation='tanh'))#{{choice(['relu', 'tanh', 'linear'])}}
    #model.add(Dropout(0.5))
    model.add(Dense({{choice([32, 64,88, 128,225,500])}}, kernel_initializer='glorot_uniform',bias_initializer='zeros', activation='relu'))
    model.add(Dense({{choice([16,25, 32, 45])}}, kernel_initializer='glorot_uniform',bias_initializer='zeros', activation='tanh'))
    model.add(Dense({{choice([32, 64,90, 128])}}, kernel_initializer='glorot_uniform',bias_initializer='zeros', activation='relu'))
    model.add(Dense({{choice([2,4, 8, 16])}}, kernel_initializer='glorot_uniform',bias_initializer='zeros', activation='tanh'))
    model.add(Dense(1,activation='linear', kernel_initializer='glorot_uniform'))
    
    # Compile model
    model.compile(loss='mean_squared_logarithmic_error', optimizer='adam' ,metrics=['mean_squared_logarithmic_error']) #{{choice(['rmsprop', 'adam', 'sgd'])}}
    
    earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
    #checkpointer = ModelCheckpoint(filepath='/tmp/model.hdf5',monitor='val_loss', verbose=0, save_best_only=True, period=1, mode='min')
    
    history = model.fit(X_train, Y_train, batch_size=25, epochs=50, validation_data=(X_test, Y_test), callbacks=[earlystopper], verbose=0)
    score, acc = model.evaluate(X_test, Y_test, verbose=0)
    print('_______________')
    print('Test accuracy:', np.sqrt(acc))
    print('_______________')
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

In [10]:
X_train, X_test, Y_train, Y_test, Xmeans, Xstds = data()

In [11]:
best_run, best_model = optim.minimize(model=model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=750,
                                      trials=Trials(),
                                      notebook_name='DL_regression')

>>> Imports:
#coding=utf-8

from __future__ import print_function

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.layers import Input
except:
    pass

try:
    from keras.layers.core import Dense, Dropout, Activation
except:
    pass

try:
    from keras.optimizers import RMSprop
except:
    pass

try:
    from keras.callbacks import EarlyStopping, ModelCheckpoint
except:
    pass

try:
    from keras.datasets import mnist
except:
    pass

try:
    from keras.utils import np_utils
except:
    pass

try:
    import pandas as pd
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    from keras.wrappers.scikit_learn import KerasRegressor
except:
    pass

try:
    from sklearn.model_selection import cross_val_score, train_test_split
exc

_______________
Test accuracy: 0.0977542712158
_______________
_______________
Test accuracy: 0.0994191579797
_______________
_______________
Test accuracy: 0.0925730075832
_______________
_______________
Test accuracy: 0.0979812899817
_______________
_______________
Test accuracy: 0.0936901654371
_______________
_______________
Test accuracy: 0.101093104088
_______________
_______________
Test accuracy: 0.0899680001357
_______________
_______________
Test accuracy: 0.107552075617
_______________
_______________
Test accuracy: 0.109040746861
_______________
_______________
Test accuracy: 0.11394835032
_______________
_______________
Test accuracy: 0.0881028660653
_______________
_______________
Test accuracy: 0.0997320162963
_______________
_______________
Test accuracy: 0.101371800986
_______________
_______________
Test accuracy: 0.0915349100352
_______________
_______________
Test accuracy: 0.11494254563
_______________
_______________
Test accuracy: 0.0882821907989
_______________


_______________
Test accuracy: 0.449095851654
_______________
_______________
Test accuracy: 0.0910223586034
_______________
_______________
Test accuracy: 0.101991605662
_______________
_______________
Test accuracy: 0.0898228934113
_______________
_______________
Test accuracy: 0.0956020608757
_______________
_______________
Test accuracy: 0.088753336363
_______________
_______________
Test accuracy: 0.115677843927
_______________
_______________
Test accuracy: 0.0909922907896
_______________
_______________
Test accuracy: 0.0876819551217
_______________
_______________
Test accuracy: 0.093365265194
_______________
_______________
Test accuracy: 0.0892607982413
_______________
_______________
Test accuracy: 0.0921283137596
_______________
_______________
Test accuracy: 0.0895888001253
_______________
_______________
Test accuracy: 0.119102046145
_______________
_______________
Test accuracy: 0.0905249619676
_______________
_______________
Test accuracy: 0.136363087376
_______________

_______________
Test accuracy: 0.0908242828659
_______________
_______________
Test accuracy: 0.0980394189063
_______________
_______________
Test accuracy: 0.0960631921827
_______________
_______________
Test accuracy: 0.102397790401
_______________
_______________
Test accuracy: 0.0957179342974
_______________
_______________
Test accuracy: 0.0918406949223
_______________
_______________
Test accuracy: 0.100333413318
_______________
_______________
Test accuracy: 0.0893766451064
_______________
_______________
Test accuracy: 0.10565699484
_______________
_______________
Test accuracy: 0.0861935850109
_______________
_______________
Test accuracy: 0.100015909987
_______________
_______________
Test accuracy: 0.103359967586
_______________
_______________
Test accuracy: 0.0991411469652
_______________
_______________
Test accuracy: 0.0903941410632
_______________
_______________
Test accuracy: 0.0995934653926
_______________
_______________
Test accuracy: 0.0965128716576
______________

_______________
Test accuracy: 0.09534394166
_______________
_______________
Test accuracy: 0.0997277808427
_______________
_______________
Test accuracy: 0.0970295338458
_______________
_______________
Test accuracy: 0.117998089231
_______________
_______________
Test accuracy: 0.0935062225839
_______________
_______________
Test accuracy: 0.103507549343
_______________
_______________
Test accuracy: 0.112734266908
_______________
_______________
Test accuracy: 0.110475846579
_______________
_______________
Test accuracy: 0.0912431587284
_______________
_______________
Test accuracy: 0.0987254082141
_______________
_______________
Test accuracy: 0.104459276642
_______________
_______________
Test accuracy: 0.0908829528835
_______________
_______________
Test accuracy: 0.10130422108
_______________
_______________
Test accuracy: 0.101136546311
_______________
_______________
Test accuracy: 0.0992936003701
_______________
_______________
Test accuracy: 0.0897102549942
_______________
__

_______________
Test accuracy: 0.0908568163629
_______________
_______________
Test accuracy: 0.448860418805
_______________
_______________
Test accuracy: 0.0868415530356
_______________
_______________
Test accuracy: 0.0909944715394
_______________
_______________
Test accuracy: 0.1326344349
_______________
_______________
Test accuracy: 0.107401093582
_______________
_______________
Test accuracy: 0.0953371456359
_______________
_______________
Test accuracy: 0.0904426126395
_______________
_______________
Test accuracy: 0.103088862487
_______________
_______________
Test accuracy: 0.093012557196
_______________
_______________
Test accuracy: 0.449095851654
_______________
_______________
Test accuracy: 0.0931150271925
_______________
_______________
Test accuracy: 0.0944437819772
_______________
_______________
Test accuracy: 0.0991057682143
_______________
_______________
Test accuracy: 0.0892965241057
_______________
_______________
Test accuracy: 0.0840110959317
_______________


_______________
Test accuracy: 0.0927029183555
_______________
_______________
Test accuracy: 0.0942832764691
_______________
_______________
Test accuracy: 0.0866577456283
_______________
_______________
Test accuracy: 0.106956286734
_______________
_______________
Test accuracy: 0.103615239934
_______________
_______________
Test accuracy: 0.10272693176
_______________
_______________
Test accuracy: 0.0961070473716
_______________
_______________
Test accuracy: 0.121071324613
_______________
_______________
Test accuracy: 0.108743982984
_______________
_______________
Test accuracy: 0.101820068633
_______________
_______________
Test accuracy: 0.0921045797776
_______________
_______________
Test accuracy: 0.0870159219927
_______________
_______________
Test accuracy: 0.089246976254
_______________
_______________
Test accuracy: 0.0932028172239
_______________
_______________
Test accuracy: 0.0869867116747
_______________
_______________
Test accuracy: 0.0959292364397
_______________


In [12]:
best_run

{'Dense': 0,
 'Dense_1': 0,
 'Dense_2': 2,
 'Dense_3': 1,
 'Dense_4': 1,
 'Dense_5': 3,
 'Dense_6': 2}

In [16]:
np.sqrt(0.008)

0.089442719099991588

In [14]:
best_model.get_config()

[{'class_name': 'Dense',
  'config': {'activation': 'tanh',
   'activity_regularizer': None,
   'batch_input_shape': (None, 296),
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'dtype': 'float32',
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_505',
   'trainable': True,
   'units': 1224,
   'use_bias': True}},
 {'class_name': 'Dense',
  'config': {'activation': 'relu',
   'activity_regularizer': None,
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kern

In [15]:
print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test, verbose=1))

Evalutation of best performing model:
[0.008896124611298243, 0.008896124611298243]


# 0.007
[{'class_name': 'Dense',
  'config': {'activation': 'tanh',
   'activity_regularizer': None,
   'batch_input_shape': (None, 296),
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'dtype': 'float32',
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_187',
   'trainable': True,
   'units': 1512,
   'use_bias': True}},
 {'class_name': 'Dense',
  'config': {'activation': 'relu',
   'activity_regularizer': None,
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_188',
   'trainable': True,
   'units': 128,
   'use_bias': True}},
 {'class_name': 'Dense',
  'config': {'activation': 'tanh',
   'activity_regularizer': None,
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_189',
   'trainable': True,
   'units': 32,
   'use_bias': True}},
 {'class_name': 'Dense',
  'config': {'activation': 'relu',
   'activity_regularizer': None,
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_190',
   'trainable': True,
   'units': 64,
   'use_bias': True}},
 {'class_name': 'Dense',
  'config': {'activation': 'tanh',
   'activity_regularizer': None,
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_191',
   'trainable': True,
   'units': 8,
   'use_bias': True}},
 {'class_name': 'Dense',
  'config': {'activation': 'linear',
   'activity_regularizer': None,
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_192',
   'trainable': True,
   'units': 1,
   'use_bias': True}}]