In [1]:
# Define API key to be able to use the Alpha Vantage API.
# Alpha Vantage API allows us to get any parity's intraday/daily/weekly currency exchange rates. 
api_key="4YOBCFLPOVPZVL3D"

In [3]:
# To be able to import ForeignExchange, the command:
#     pip install alpha_vantage
# must be executed.
from alpha_vantage.foreignexchange import ForeignExchange

# Import other necessary libraries such as numpy, pandas ...
from pprint import pprint
import numpy as np
import pandas as pd
# cc is set to be the instance of the ForeignExchange class.
cc = ForeignExchange(key=api_key,output_format="pandas")


# ForeignExchange class has method named get_currency_exchange_daily that returns the daily dataset of given parity.
data1, _ = cc.get_currency_exchange_daily("USD",'EUR',"pandas")
data2, _ = cc.get_currency_exchange_weekly("USD", "EUR","pandas")

In [4]:
# Then, we create a DataFrame with the returned dataset.
df2 = pd.DataFrame(data1)

In [5]:
# Our features on both SVM model and ANN model are determined to be
# the moving averages of 5 days, 10 days, 20 days, 60 days, 120 days and 
# the closing rate of previous are held in feature columns. 
featurecolumns =["MA5", "MA10", "MA20", "MA60","MA120", "Previous Day"]
featureframe = pd.DataFrame(data = None, columns = featurecolumns)
# Moving averages are calculated by 
featureframe['MA5'] = df2.rolling(5).sum()["4. close"]/5
featureframe['MA10'] = df2.rolling(10).sum()["4. close"]/10
featureframe['MA20'] = df2.rolling(20).sum()["4. close"]/20
featureframe['MA60'] = df2.rolling(60).sum()["4. close"]/60
featureframe['MA120'] = df2.rolling(120).sum()["4. close"]/120
featureframe["Previous Day"] = df2["4. close"]
# We shift the feature frame to be able to predict the next day's closing rate by looking at the moving averages of the day before.
featureframe = featureframe.shift(1)
# After the shift operation, we construct the closing rate.
featureframe["Closing Rate"] = df2["4. close"]

# To calculate the moving average of x days, we need at least the data of first x days and to prevent errors,
# we drop the data of last day.
featureframe = featureframe.drop(featureframe.index[len(featureframe)-1])
featureframe = featureframe.dropna()


In [6]:
# Before we apply the ANN, we should split the data into test and train randomly.
# The model is built as %10train %90test
from sklearn.model_selection import train_test_split

Xtrain, Xtest, ytrain, ytest = train_test_split(featureframe[featurecolumns], featureframe["Closing Rate"], test_size = 0.90)

In [7]:
from sklearn.neural_network import MLPRegressor
# Initialize the Multiple Layer Regressor class instance.
nn = MLPRegressor(hidden_layer_sizes=(6), max_iter=10000,early_stopping=True)

In [8]:
# Fit the train data.
trainFitted = nn.fit(Xtrain,ytrain)

In [8]:
# Make a prediction on the fitted train data on test data.
prediction = trainFitted.predict(Xtest)

In [9]:
# Evaluate the test error.
counter = 0
E_test = 0
TestError= 0
for x, y in np.nditer([prediction, ytest.values],["refs_ok"]):
    E_test  = float(x) - float(y)
    E_test = E_test**2
    counter +=1
E_test = E_test/counter
E_test

3.042637930724378e-08

In [10]:
# Alphas are tested to evaluate the bias-variance tradeoff (regularization term)
alphaList = np.array([1e-6, 1e-5,1e-4,1e-3,1e-2,1e-1])
# Number of nodes in the hidden layer is another performance metric.
hiddenLayerList = np.array([(7),(6),(5),(4),(3)])
TotalEins= []
TotalEvals= []
TotalEval = 0
# For each (# nodes in hiddenlayer, alpha value) tuple, evaluate the cross validation error.
for j, Q in enumerate(alphaList):
    for i, C in enumerate(hiddenLayerList):
        for i in range(int(ytrain.size/10)):
            validtestfeature = Xtrain[(i*10):(i+1)*10]
            validtestlabel = ytrain[(i*10):(i+1)*10]
            validtrainfeature = np.delete(Xtrain.values, np.s_[(i*10):(i+1)*10], 0)
            validtrainlabel = np.delete(ytrain.values, np.s_[(i*10):(i+1)*10], 0)
            prob = MLPRegressor(hidden_layer_sizes=C, max_iter=10000,early_stopping=True, alpha=Q)
            m = prob.fit(validtrainfeature, validtrainlabel)
            p_valtest = prob.predict(validtestfeature)
            counter = 0
            E_val = 0
            for x, y in np.nditer([p_valtest, validtestlabel.values],["refs_ok"]):
                E_val  = float(x) - float(y)
                E_val = E_val**2
                counter +=1
            E_val = E_val/counter
            TotalEval += E_val
        TotalEval=TotalEval/int(ytrain.size/10)
        TotalEvals.append(TotalEval)
        TotalEval = 0

In [11]:
print(TotalEvals.index(min(TotalEvals)))
counter = 0
# Assign the best alpha value and the best hidden layer size by looking at the cross validation error of each.
for j, Q in enumerate(alphaList):
    for i, C in enumerate(hiddenLayerList):
        if TotalEvals.index(min(TotalEvals)) == counter:
            BestCs = C
            BestQ = Q
            print("|||THIS ONE BELOW:|||")
        print("Q = %lf, C = %s , Ecval = %.20e" % (Q, C, TotalEvals[counter]))
        counter += 1
        


20
Q = 0.000001, C = 7 , Ecval = 1.86726082716026479686e-04
Q = 0.000001, C = 6 , Ecval = 8.76516745094052166344e-05
Q = 0.000001, C = 5 , Ecval = 4.92820490891937018217e-05
Q = 0.000001, C = 4 , Ecval = 4.77618313789865341383e-05
Q = 0.000001, C = 3 , Ecval = 1.93625904251402275458e-04
Q = 0.000010, C = 7 , Ecval = 1.12651400422986708977e-04
Q = 0.000010, C = 6 , Ecval = 5.21944708394919359529e-05
Q = 0.000010, C = 5 , Ecval = 1.29382816122523147543e-04
Q = 0.000010, C = 4 , Ecval = 3.61392633909546042228e-04
Q = 0.000010, C = 3 , Ecval = 2.16774520292150626012e-04
Q = 0.000100, C = 7 , Ecval = 7.51264443935591679077e-05
Q = 0.000100, C = 6 , Ecval = 7.38742673859358526915e-05
Q = 0.000100, C = 5 , Ecval = 5.41744782887182825494e-05
Q = 0.000100, C = 4 , Ecval = 1.64723183619531771821e-04
Q = 0.000100, C = 3 , Ecval = 5.21560578387110951749e-05
Q = 0.001000, C = 7 , Ecval = 7.27761609642543587633e-05
Q = 0.001000, C = 6 , Ecval = 1.54759569515152919277e-04
Q = 0.001000, C = 5 , Ecval 

In [12]:
# Finally, predict the rate by applying the best hidden layer node number and best alpha value.
# Make a prediction on the real data of exchange rates.
prob = MLPRegressor(hidden_layer_sizes=(BestCs), max_iter=10000,early_stopping=True, alpha=BestQ)
m = prob.fit(Xtrain, ytrain)
p_test = prob.predict(Xtest)
counter = 0
E_test = 0
TestError= 0
for x, y in np.nditer([p_test, ytest.values],["refs_ok"]):
    E_test  = float(x) - float(y)
    E_test = E_test**2
    counter +=1
E_test = E_test/counter
E_test

7.212439020050681e-07

In [13]:
deneme = prob.predict(featureframe[featurecolumns])

In [14]:
# Plot the actual data, and predicted data alltogether.
plotframe = featureframe["Closing Rate"]
plotframe = plotframe.to_frame()
plotframe2 = pd.DataFrame(deneme,columns=["Predict"])
plotframe.plot()
plotframe2.plot()
plotframe2["Closing Rate"] = plotframe.values
plotframe2.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x1a23883fd0>

In [15]:
# Find out the normalized mean square error.
E_test/ytest.values.astype("float").std()**2
# Equal to normalized mean square error.

0.0005285175165195422

In [16]:
closeT = (plotframe2["Closing Rate"]-plotframe2.shift(1)["Closing Rate"]).dropna().values
predictT = (plotframe2["Predict"]-plotframe2.shift(1)["Predict"]).dropna().values
closeT[closeT > 0] = 1
closeT[closeT < 0] = -1
predictT[predictT > 0] = 1
predictT[predictT < 0] = -1

trendFrame = pd.DataFrame(data = (predictT == closeT), columns = ["Bool"])
trendFrame.describe()

Unnamed: 0,Bool
count,1186
unique,2
top,True
freq,599
