In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
import h5py              as h5
from sklearn.preprocessing import normalize
import time
from datetime import datetime
from sklearn import model_selection
from sklearn.model_selection import KFold
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Activation
from sklearn import model_selection
from sklearn import metrics

In [3]:
'''
xTrain - (nxd) training data
yTrain - (n,)  training data
xTest - (pxd) testing data
yTest - (p,)  testing data
'''
Training = h5.File('training_data_10000.hdf5', 'r')
xTr = Training['inputs'][...]
yTr = Training['target'][...]
yTr = np.reshape(yTr,(-1,1))

Testing = h5.File('test_data_1000.hdf5', 'r')
xTe = Testing['inputs'][...]
yTe = Testing['target'][...]
yTe = np.reshape(yTe,(-1,1))

In [4]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(xTr, yTr, test_size=0.2)

# Single ANN
---
The single ANN run on CPU takes 4 or 3 seconds. Maybe no significant results from GPU.

In [None]:
modelGPU = tf.keras.models.Sequential()
# Define input layer
modelGPU.add(tf.keras.Input(shape=(256,)))
# Define hidden layer 1
modelGPU.add(tf.keras.layers.Dense(100, activation='tanh',name='dense_1'))
modelGPU.add(tf.keras.layers.Dense(200, activation='tanh',name='dense_2'))
# Define output layer
modelGPU.add(tf.keras.layers.Dense(1,name='output'))

modelGPU.compile(optimizer='Adam',loss='mse')
early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=2)
startTime = datetime.now()
history = modelGPU.fit(X_train, y_train, epochs=1000,validation_data=(X_val,y_val),
                      verbose=2,shuffle=False,callbacks=[early_stopping])
First_Trial = datetime.now() - startTime
print("\nTime taken: ", First_Trial)

# Ensemble Method
---
Ensemble Method on CPU takes 20 minutes.

In [None]:
kf = KFold(5,shuffle=True,random_state=42)
fold = 0
oos_y = []
oos_pred = []
startTime = datetime.now()
for train,val in kf.split(xTr):
    fold+=1
    print(f'Fold#{fold}')
    
    x_train = xTr[train]
    y_train = yTr[train]
    x_val = xTr[val]
    y_val = yTr[val]
    
    model = Sequential()
    initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
    model.add(Dense(100,input_dim=xTr.shape[1],kernel_initializer=initializer,activation='tanh'))
    model.add(Dense(100,kernel_initializer=initializer,activation='tanh'))
    model.add(Dense(1,kernel_initializer=initializer,activation='linear'))
    model.compile(loss='mean_squared_error',optimizer='adam')
    
    model.fit(x_train,y_train,validation_data=(x_val,y_val),verbose=0,epochs=500)
    # save model
    filename = 'models/model_' + str(fold) +'.h5'
    model.save(filename)
    pred = model.predict(x_val)
    oos_y.append(y_val)
    oos_pred.append(pred)
    
    # Measure this fold's MSE
    score = metrics.mean_squared_error(pred,y_val)
    print(f'Fold score (MSE):{score}')
First_Trial = datetime.now() - startTime
print("\nTime taken: ", First_Trial)

In [None]:
# load models from file
def load_all_models(n_models):
    all_models = list()
    for i in range(n_models):
        # define filename for this ensemble
        filename = 'models/model_' + str(i + 1) + '.h5'
        # load model from file
        model = load_model(filename)
        # add to list of members
        all_models.append(model)
        print('>loaded %s' % filename)
    return all_models
n_members = 5
members = load_all_models(n_members)
print('Loaded %d models' % len(members))

In [None]:
preds = []
for model in members:
    pred = model.predict(xTe)
    preds.append(pred)
Final_preds = sum(preds)/n_members
a,b = relativeLoss(yTe,Final_preds)
plt.figure(figsize=(7, 5))
plt.title('Relative Loss(%) on Testing set')
plt.scatter(range(len(a)),a,label = 'relative loss')
plt.xlabel('sample')
plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(to_percent))
plt.ylim([0,6])
string = 'minimum: %f%%\n maximum:%f%%\n mean:%f%%'%(b[0],b[1],b[2])
plt.text(680,5.7,string,verticalalignment="top",bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.savefig('Pictures/5 fold cv.png')
plt.show()