In [0]:
from google.colab import drive
drive.mount('/drive', force_remount=False)

In [0]:
# load data
!wget https://github.com/jetanaso/datasets/raw/master/S50IF_CON.xls

In [0]:
# import necessary libraries
%tensorflow_version 1.x
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from keras.models import load_model, Sequential, Model
from keras.layers import Dense, Flatten, Dropout
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.merge import concatenate
from keras.utils import to_categorical, plot_model
from keras import regularizers
from tensorflow import set_random_seed

In [0]:
def split_sequences(sequences, n_steps):
	X, y = list(), list()
	for i in range(len(sequences)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the dataset
		if end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

In [0]:
# data preparation
cols = ['Date','Open','High','Low','Close','Volume']
df = pd.read_csv('S50IF_CON.xls', names=cols, index_col=False, skiprows=1, delimiter='\t')
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df['Open'] = df['Open'].str.replace(',','').astype('float')
df['High'] = df['High'].str.replace(',','').astype('float')
df['Low'] = df['Low'].str.replace(',','').astype('float')
df['Close'] = df['Close'].str.replace(',','').astype('float')
df['Volume'] = df['Volume'].str.replace(',','').astype('float')
df.set_index('Date', drop=True, inplace=True)

In [0]:
n_steps = 88
forecast_horizon = 22 

dataset = df.copy()
dataset['Open_binary'] = np.where(dataset['Open'].shift(1) < dataset['Open'], 1, 0)
dataset['High_binary'] = np.where(dataset['High'].shift(1) < dataset['High'], 1, 0)
dataset['Low_binary'] = np.where(dataset['Low'].shift(1) < dataset['Low'], 1, 0)
dataset['Close_binary'] = np.where(dataset['Close'].shift(1) < dataset['Close'], 1, 0)
dataset['Volume_binary'] = np.where(dataset['Volume'].shift(1) < dataset['Volume'], 1, 0)
dataset['Label'] = np.where(dataset['Close'].shift(-forecast_horizon) > dataset['Close'], 1, 0)
dataset = dataset.iloc[:-forecast_horizon]
dataset.tail(10)

In [0]:
# define input sequence
open = dataset['Open_binary'].values
high = dataset['High_binary'].values
low = dataset['Low_binary'].values
close = dataset['Close_binary'].values
volume = dataset['Volume_binary'].values
label = dataset['Label'].values

print(open, open.shape)
print(high, high.shape)
print(low, low.shape)
print(close, close.shape)
print(volume, volume.shape)
print(label, label.shape)

In [0]:
# convert to [rows, columns] structure
open = open.reshape((len(open), 1))
high = high.reshape((len(high), 1))
low = low.reshape((len(low), 1))
close = low.reshape((len(close), 1))
volume = volume.reshape((len(volume), 1))
label = label.reshape((len(label), 1))

# horizontally stack columns
dataset = np.hstack((open, high, low, close, volume, label))

# convert into input/output
X, y = split_sequences(dataset, n_steps)
y = to_categorical(y)
print(X.shape, y.shape)

# summarize the data, for example
for i in range(1):
	print(X[i], y[i])

In [0]:
# split train/validate/test datasets (90:5:5)
trainX = X[:int(X.shape[0]*.9),:] 
trainy = y[:int(X.shape[0]*.9)] 
validateX = X[int(X.shape[0]*.9):int(X.shape[0]*.95),:]
validatey = y[int(X.shape[0]*.9):int(X.shape[0]*.95)]
testX = X[int(X.shape[0]*.95):,:]
testy = y[int(X.shape[0]*.95):]

print(trainX.shape, trainy.shape)
print(validateX.shape, validatey.shape)
print(testX.shape, testy.shape)

In [0]:
def fit_model(trainX, trainy, validateX, validatey):
  # define model architecture
  n_features = X.shape[2]
  model = Sequential()
  model.add(Conv1D(filters=24, kernel_size=5, activation='relu', 
                  kernel_regularizer=regularizers.l2(0.007), input_shape=(n_steps, n_features)))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Dropout(rate=0.35))
  model.add(Flatten())
  model.add(Dense(50, activation='relu'))
  model.add(Dropout(rate=0.15))
  model.add(Dense(2, activation='softmax'))
  model.compile(optimizer='RMSprop', loss='mse', metrics=['accuracy'])
  
  model.summary

  # fit model
  history = model.fit(trainX, trainy, validation_data=(validateX, validatey), epochs=20, batch_size=16, verbose=0)

  # plotting learning curve
  plt.plot(history.history['loss'], label='train')
  plt.plot(history.history['val_loss'], label='validate')
  plt.title('Loss Development')
  plt.xticks(np.arange(0,21,step=2))
  plt.legend()
  plt.show()

  plt.plot(history.history['acc'], label='train')
  plt.plot(history.history['val_acc'], label='validate')
  plt.title('Accuracy Development')
  plt.xticks(np.arange(0,21,step=2))
  plt.legend()
  plt.show()

  return model

################################################################################

nums_model = 30
for i in range(nums_model):
	# fit model
	model = fit_model(trainX, trainy, validateX, validatey)
	# save model
	filename = '/drive/My Drive/Colab Notebooks/1dcnn_model_' + str(i+1) + '.h5'
	model.save(filename)
	print('>Saved %s' % filename)

In [0]:
def load_all_models(n_models):
	all_models = list()
	for i in range(n_models):
		filename = '/drive/My Drive/Colab Notebooks/1dcnn_model_' + str(i+1) + '.h5'
		model = load_model(filename)
		all_models.append(model)
		print('>loaded %s' % filename)
	return all_models

################################################################################

# load all models at once
members = load_all_models(num_models)
print('Loaded %d models' % len(members))

# evaluate standalone models on test dataset
acc_list = []
for i,model in enumerate(members):
  _, acc = model.evaluate(testX, testy, verbose=0)
  acc_list.append(acc)
  print('Model#%s Accuracy on test dataset: %.3f%%' % (i+1, acc*100))
print('='*42)
print('Average 1DCNN Accuracy on test dataset: %.3f%%' % (np.mean(acc_list)*100))

In [0]:
def define_stacked_model(members):
	# update all layers in all models to not be trainable
	for i in range(len(members)):
		model = members[i]
		for layer in model.layers:
			# make not trainable
			layer.trainable = False
			# rename to avoid 'unique layer name' issue
			layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
	# define multi-headed input
	ensemble_visible = [model.input for model in members]
	# concatenate merge output from each model
	ensemble_outputs = [model.output for model in members]
	merge = concatenate(ensemble_outputs)
	hidden = Dense(60, activation='sigmoid')(merge)
	output = Dense(2, activation='softmax')(hidden)
	model = Model(inputs=ensemble_visible, outputs=output)
	# plot graph of ensemble
	plot_model(model, show_shapes=True, to_file='/drive/My Drive/Colab Notebooks/model_graph.png')
	# compile
	model.compile(loss='mse', optimizer='RMSprop', metrics=['accuracy'])
	return model
 
def fit_stacked_model(model, inputX, inputy):
	# prepare input data
	X = [inputX for _ in range(len(model.input))]
	# fit model
	model.fit(X, inputy, epochs=30, verbose=0)
 
def predict_stacked_model(model, inputX):
	# prepare input data
	X = [inputX for _ in range(len(model.input))]
	# make prediction
	return model.predict(X, verbose=0)

################################################################################

# define ensemble model
stacked_model = define_stacked_model(members)

# fit stacked model on validate dataset
fit_stacked_model(stacked_model, validateX, validatey)

# make predictions and evaluate
yhat = predict_stacked_model(stacked_model, testX)
yhat = np.argmax(yhat, axis=1)
acc = accuracy_score(np.argmax(testy, axis=1), yhat)
print('Ensemble 1DCNN Accuracy on test dataset: %.3f%%' % (acc*100))