In [None]:
import numpy as np
import pickle
import time
import argparse
import sys

import keras
from keras.preprocessing import sequence
from keras.models import Sequential, Model, load_model
from keras.optimizers import Adam
from keras.layers import Dense, Dropout, Activation, Input, Reshape, BatchNormalization
from keras.layers import (
    Conv1D,
    GlobalAveragePooling1D,
    MaxPooling1D,
    GlobalAveragePooling1D,
    Reshape,
    AveragePooling1D,
    Flatten,
    Concatenate,
)
from keras import backend
from keras.callbacks import TensorBoard, LearningRateScheduler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler

In [None]:
def create_model_combined(Featurizer, channels):
    input1 = Input( shape=(2000,channels) )
    input2 = Input( shape=(2000,channels) )
    input3 = Input( shape=(2000,channels) )
    input4 = Input( shape=(2000,channels) )
    output1 = Featurizer(input1)
    output2 = Featurizer(input2)
    output3 = Featurizer(input3)
    output4 = Featurizer(output4)
    output = Concatenate(axis=-1)([output1, output2, output3, output4])
    output = Flatten()(output)
    output = Dropout(0.2)(output)
    output = Dense( 200, activation="linear")(output)
    output = Dense(1000, activation="relu")(output)
    output = Dense(1000, activation="relu")(output)
    out = Dense(1, activation="linear")(output)
    model = Model( inputs=[input1,input2,input3,input4],outputs=out )
    return model

# the model for feature extraction
def dos_featurizer(channels):
    # the input data is a 2000*9 matrix
    input_DOS = Input( shape=(2000,channels) )
    # building a feature extraction network
    feature1 = AveragePooling1D(pool_size=  4,strides=4,padding="same")(input_DOS)
    feature2 = AveragePooling1D(pool_size= 25,strides=4,padding="same")(input_DOS)
    feature3 = AveragePooling1D(pool_size=200,strides=4,padding="same")(input_DOS)
    Feature  = Concatenate(axis=-1)([feature1,feature2,feature3])                   # splicing
    Feature  = Conv1D( 50,20,activation="relu", padding="same", strides=2)(Feature) # convolution
    Feature  = BatchNormalization()(Feature)                                        # normalization
    Feature  = Conv1D( 75, 3,activation="relu",padding="same",strides=2)(Feature)   # convolution
    Feature  = AveragePooling1D(pool_size=3,strides=2,padding="same")(Feature)      # pooling
    Feature  = Conv1D(100, 3,activation="relu",padding="same",strides=2)(Feature)   # convolution
    Feature  = AveragePooling1D(pool_size=3,strides=2,padding="same")(Feature)      # pooling
    Feature  = Conv1D(125, 3,activation="relu",padding="same",strides=2)(Feature)   # convolution
    Feature  = AveragePooling1D(pool_size=3,strides=2,padding="same")(Feature)      # pooling
    Feature  = Conv1D(150, 3,activation="relu",padding="same",strides=1)(Feature)   # convolution
    Featurizer_model = Model( input_dos, Feature )                                  # modeling
    return Featurizer_model

def decay_schedule(epoch, lr):
    if   epoch== 0: lr = 0.00100
    elif epoch==15: lr = 0.00050
    elif epoch==35: lr = 0.00010
    elif epoch==45: lr = 0.00005
    elif epoch==55: lr = 0.00001
    return lr

In [None]:
save_model = 0
epochs     = 60 
batch_size = 32
channels   = 9
split_ratio= 0.2
seed       = np.random.randint(1, 1e6)

In [None]:
with open('DataToPredictAdsorptionEnergy','rb') as One:
    x_surface_dos   = pickle.load(One)
    x_adsorbate_dos = pickle.load(One)
    y_targets       = pickle.load(One)

In [None]:
x_train, x_test, ads_train, ads_test, y_train, y_test = train_test_split(x_surface_dos,
                                                                         x_adsorbate_dos,
                                                                         y_targets,
                                                                         test_size=split_ratio,
                                                                         random_state=seed
                                                                        )
shared_conv = dos_featurizer(channels)
lr_scheduler = LearningRateScheduler(decay_schedule, verbose=0)
tensorboard = TensorBoard(log_dir="logs/{}".format(time.time()),histogram_freq=1)
model = create_model_combined(shared_conv, channels)
model.compile(loss="logcosh",optimizer=Adam(0.001),metrics=["mean_absolute_error"])
model.summary()
model.fit([x_train[:, :, 0:9],x_train[:, :, 9:18],x_train[:, :, 18:27],ads_train],
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=([x_test[:, :, 0:9],x_test[:, :, 9:18],x_test[:, :, 18:27],ads_test],y_test,),
          callbacks=[tensorboard,lr_scheduler],
         )
train_out = model.predict([x_train[:, :, 0:9],x_train[:, :, 9:18],x_train[:, :, 18:27],ads_train])
train_out = train_out.reshape(len(train_out))
test_out  = model.predict([ x_test[:, :, 0:9], x_test[:, :, 9:18], x_test[:, :, 18:27], ads_test])
test_out  =  test_out.reshape(len( test_out))

print("train MAE: ", mean_absolute_error(y_train, train_out))
print("train RMSE: ", mean_squared_error(y_train, train_out) ** (0.5))
print("test MAE: ", mean_absolute_error(y_test, test_out))
print("test RMSE: ", mean_squared_error(y_test, test_out) ** (0.5))

print("Saving model...")
model.save("Model/predict_AdsorptionEnergy.h5")

In [None]:
# provide input data for prediction.
# according to the introduction of the paper,
# provided DOS data for 9 electron orbitals (s,py,pz,px,dxy,dyz,dz2,dxz,dx2) of 4 atoms,
# of which 3 are from alloy, and 1 is from adsorbate,
# has an accuracy of 0.01eV within Â±10eV, to make a 2000*27 matrix and a 2000*9 matrix

Model = load_model('Model/predict_AdsorptionEnergy.h5')
predict_AdsorptionEnergy = Model.predict([x_surface_dos[:, :, 0:9],x_surface_dos[:, :, 9:18],x_surface_dos[:, :, 18:27],x_adsorbate_dos])