## Univariate model for conditional volatility
 - Base Model for comparing other models
 - 30 day rolling window estimate


In [2]:
import pandas as pd
from sklearn .preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM

Using TensorFlow backend.


In [3]:
from math import sqrt
from numpy import split
from numpy import array
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.layers import RepeatVector
from keras.layers import TimeDistributed

In [4]:
from matplotlib import pyplot as plt
import numpy as np
np.set_printoptions(suppress=True)
import pandas as pd
pd.options.display.float_format = '{:,.10f}'.format

In [5]:
data  = pd.read_excel('emotion.xls').drop('Date', axis=1 ).set_index("day")

In [19]:
df.head()

Unnamed: 0_level_0,Conditional Volatility
day,Unnamed: 1_level_1
2007-04-19,1.924018
2007-04-23,1.929078
2007-04-30,2.165012
2007-05-07,2.009764
2007-05-08,1.849752


In [18]:
df = pd.DataFrame(data.iloc[:, -1])

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3168 entries, 2007-04-19 to 2020-03-16
Data columns (total 1 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Conditional Volatility  3168 non-null   float64
dtypes: float64(1)
memory usage: 49.5 KB


In [22]:
scaler = MinMaxScaler()
train, test = df[18:3138], df[3138:3168]
train = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)

In [23]:
train = array(split(train.values, len(train)/30))

In [24]:
test = array(split(test.values, len(test)/30))

In [26]:
def to_supervised(train, n_input, n_out=30):
	# flatten data
	data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
	X, y = list(), list()
	in_start = 0
	for _ in range(len(data)):
		in_end = in_start + n_input
		out_end = in_end + n_out
		if out_end <= len(data):
			X.append(data[in_start:in_end, :])
			y.append(data[in_end:out_end, 0])
		in_start += 1
	return array(X), array(y)

In [27]:
def build_model(train, n_input):
	train_x, train_y = to_supervised(train, n_input)
	verbose, epochs, batch_size = 1, 55, 20
	n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
	# reshape output into [samples, timesteps, features]
	train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
	print(train_x.shape)
	print(train_y.shape)
    
	# define model
	model = Sequential()
	model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features)))
	model.add(RepeatVector(n_outputs))
	model.add(LSTM(200, activation='relu', return_sequences=True))
	model.add(TimeDistributed(Dense(100, activation='relu')))
	model.add(TimeDistributed(Dense(1)))
	model.compile(loss='mse', optimizer='adam')
	model.summary()
    
	# fit network
	model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
	return model

In [28]:
n_input = 30 
model = build_model(train, n_input) 
model.save( 'model7' + '.h5')
print("model saved")

(3061, 30, 1)
(3061, 30, 1)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 200)               161600    
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 30, 200)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 30, 200)           320800    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 30, 100)           20100     
_________________________________________________________________
time_distributed_2 (TimeDist (None, 30, 1)             101       
Total params: 502,601
Trainable params: 502,601
Non-trainable params: 0
_________________________________________________________________
Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 6/55
Epoch 7/55
Epoch 8/5

### Prediction and Evaluation

In [33]:
from numpy import loadtxt
from keras.models import load_model  
from keras.utils.vis_utils import plot_model

In [34]:
def forecast(model, history, n_input):
	# flatten data
	data = array(history)
	data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
	# retrieve last observations for input data
	input_x = data[-n_input:, :]
	# reshape into [1, n_input, n]
	input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
	# forecast the next week
	yhat = model.predict(input_x, verbose=1)
	# we only want the vector forecast
	yhat = yhat[0]
	return yhat

In [35]:
def evaluate_forecasts_new(actual, predicted):
	scores = list()
	# calculate an RMSE score for each day
	for i in range(actual.shape[1]):
		mse = mean_squared_error(actual[:, i], predicted[:, i])
		rmse = sqrt(mse)
		# store
		scores.append(rmse)
        
	# calculate overall RMSE
	s = 0
	for row in range(actual.shape[0]):
		for col in range(actual.shape[1]):
			s += (actual[row, col] - predicted[row, col])**2
	score = sqrt(s / (actual.shape[0] * actual.shape[1]))
	return score, scores

In [36]:
def evaluate_model_new(train, test, n_input):
    
	# history is a list of weekly data
	history = [x for x in train]
    
	# walk-forward validation 
	predictions = list()
    
	for i in range(len(test)):
		yhat_sequence = forecast(model, history, n_input)
# 		print(type(yhat_sequence))
# 		print(yhat_sequence.shape) 
# 		y1 = scaler.inverse_transform(yhat_sequence)
# 		print(type(y1))
# 		print(y1.shape)    
		predictions.append(yhat_sequence)
		history.append(test[i, :])
        
	# evaluate predictions days for each week
	predictions = array(predictions)
# 	print(predictions.shape)  
# 	predictions = scaler.inverse_transform(predictions[0,:,:])
	return predictions

In [37]:
preds = evaluate_model_new(train, test, 30)



In [38]:
preds.shape
predictions = scaler.inverse_transform(preds[0,:, :])

In [39]:
predictions = predictions.reshape(1,-1,1)
print(predictions.shape)

(1, 30, 1)


In [41]:
score, scores = evaluate_forecasts_new(test[:, :, 0], predictions)

In [42]:
def summarize_scores(name, score, scores):
	s_scores = ', '.join(['%.1f' % s for s in scores])
	print('%s: [%.3f] %s' % (name, score, s_scores))

In [43]:
summarize_scores('test', score, scores)

test: [2.518] 0.1, 0.4, 0.3, 0.6, 0.5, 0.4, 0.3, 0.2, 0.2, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.2, 0.2, 0.1, 1.2, 1.1, 1.0, 0.9, 0.8, 1.1, 3.4, 3.0, 8.8, 9.2
