<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# Artificial Intelligence in Finance

## Machine Learning

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## Learning

<blockquote>"A computer program is said to learn from experience 𝐸 with respect to some class of tasks 𝑇 and performance measure 𝑃, if its performance at tasks in 𝑇, as measured by 𝑃, improves with experience 𝐸." — Mitchell (1997)</blockquote>


## Data

In [None]:
import numpy as np
import pandas as pd
from pylab import plt, mpl
np.random.seed(100)
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'

In [None]:
url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True)['EUR=']

In [None]:
raw.head()

In [None]:
raw.tail()

In [None]:
l = raw.resample('1M').last()

In [None]:
l.tail()

In [None]:
l.plot(figsize=(10, 6), title='EUR/USD monthly');

In [None]:
l = l.values
l -= l.mean()

In [None]:
f = np.linspace(-2, 2, len(l))

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f, l, 'ro')
plt.title('Sample Data Set')
plt.xlabel('features')
plt.ylabel('labels');

## Success

In [None]:
def MSE(l, p):
    return np.mean((l - p) ** 2)

In [None]:
reg = np.polyfit(f, l, deg=5)
reg

In [None]:
p = np.polyval(reg, f)

In [None]:
MSE(l, p)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f, l, 'ro', label='sample data')
plt.plot(f, p, '--', label='regression')
plt.legend();

In [None]:
%%time
for i in range(10, len(f) + 1, 20):
    reg = np.polyfit(f[:i], l[:i], deg=3)
    p = np.polyval(reg, f)
    mse = MSE(l, p)
    print(f'{i:3d} | MSE={mse}')

In [None]:
import logging
import tensorflow as tf
tf.random.set_seed(100)
tf.get_logger().setLevel(logging.ERROR)

In [None]:
from keras.layers import Dense
from keras.models import Sequential

In [None]:
model = Sequential()  
model.add(Dense(256, activation='relu', input_dim=1))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='rmsprop')

In [None]:
model.summary()

In [None]:
%time h = model.fit(f, l, epochs=1500, verbose=False)

In [None]:
p = model.predict(f).flatten()

In [None]:
MSE(l, p)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f, l, 'ro', label='sample data')
plt.plot(f, p, '--', label='DNN approximation')
plt.legend();

In [None]:
import pandas as pd

In [None]:
res = pd.DataFrame(h.history)

In [None]:
res.tail()

In [None]:
res.iloc[100:].plot(figsize=(10, 6))
plt.ylabel('MSE')
plt.xlabel('epochs');

## Capacity

In [None]:
reg = {}
for d in range(1, 12, 2):
    reg[d] = np.polyfit(f, l, deg=d)
    p = np.polyval(reg[d], f)
    mse = MSE(l, p)
    print(f'{d:2d} | MSE={mse}')

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f, l, 'ro', label='sample data')
for d in reg:
    p = np.polyval(reg[d], f)
    plt.plot(f, p, '--', label=f'deg={d}')
plt.legend();

In [None]:
def create_dnn_model(hl=1, hu=256):
    ''' Function to create Keras DNN model.
    
    Parameters
    ==========
    hl: int
        number of hidden layers
    hu: int
        number of hidden units (per layer)
    '''
    model = Sequential()
    for _ in range(hl):
        model.add(Dense(hu, activation='relu', input_dim=1))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mse', optimizer='rmsprop')
    return model

In [None]:
model = create_dnn_model(3)

In [None]:
model.summary()

In [None]:
%time model.fit(f, l, epochs=2500, verbose=False)

In [None]:
p = model.predict(f).flatten()

In [None]:
MSE(l, p)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f, l, 'r', label='sample data')
plt.plot(f, p, '--', label='DNN approximation')
plt.legend();

## Evaluation

In [None]:
te = int(0.25 * len(f))
va = int(0.25 * len(f))

In [None]:
np.random.seed(100)
ind = np.arange(len(f))
np.random.shuffle(ind)

In [None]:
ind_te = np.sort(ind[:te])
ind_va = np.sort(ind[te:te + va])
ind_tr = np.sort(ind[te + va:])

In [None]:
f_te = f[ind_te]
f_va = f[ind_va]
f_tr = f[ind_tr]

In [None]:
l_te = l[ind_te]
l_va = l[ind_va]
l_tr = l[ind_tr]

In [None]:
reg = {}
mse = {}
for d in range(1, 22, 4):
    reg[d] = np.polyfit(f_tr, l_tr, deg=d)
    p = np.polyval(reg[d], f_tr)
    mse_tr = MSE(l_tr, p)
    p = np.polyval(reg[d], f_va)
    mse_va = MSE(l_va, p)
    mse[d] = (mse_tr, mse_va)
    print(f'{d:2d} | MSE_tr={mse_tr:7.5f} | MSE_va={mse_va:7.5f}')

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
ax[0].plot(f_tr, l_tr, 'ro', label='training data')
ax[1].plot(f_va, l_va, 'go', label='validation data')
for d in reg:
    p = np.polyval(reg[d], f_tr)
    ax[0].plot(f_tr, p, '--', label=f'deg={d} (tr)')
    p = np.polyval(reg[d], f_va)
    plt.plot(f_va, p, '--', label=f'deg={d} (va)')
ax[0].legend()
ax[1].legend();

In [None]:
from keras.callbacks import EarlyStopping

In [None]:
model = create_dnn_model(2, 256)

In [None]:
callbacks = [EarlyStopping(monitor='loss',
                           patience=100,
                          restore_best_weights=True)]

In [None]:
%%time
h = model.fit(f_tr, l_tr, epochs=3000, verbose=False,
          validation_data=(f_va, l_va),
          callbacks=callbacks)

In [None]:
fig, ax = plt.subplots(2, 1, sharex=True, figsize=(10, 8))
ax[0].plot(f_tr, l_tr, 'ro', label='training data')
p = model.predict(f_tr)
ax[0].plot(f_tr, p, '--', label=f'DNN (tr)')
ax[0].legend()
ax[1].plot(f_va, l_va, 'go', label='validation data')
p = model.predict(f_va)
ax[1].plot(f_va, p, '--', label=f'DNN (va)')
ax[1].legend();

In [None]:
res.tail()

In [None]:
res.iloc[35::25].plot(figsize=(10, 6))
plt.ylabel('MSE')
plt.xlabel('epochs');

In [None]:
p_ols = np.polyval(reg[5], f_te)
p_dnn = model.predict(f_te).flatten()

In [None]:
MSE(l_te, p_ols)

In [None]:
MSE(l_te, p_dnn)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f_te, l_te, 'ro', label='test data')
plt.plot(f_te, p_ols, '--', label='OLS prediction')
plt.plot(f_te, p_dnn, '-.', label='DNN prediction');
plt.legend();

## Bias & Variance

In [None]:
f_tr = f[:20:2]
l_tr = l[:20:2]

In [None]:
f_va = f[1:20:2]
l_va = l[1:20:2]

In [None]:
reg_b = np.polyfit(f_tr, l_tr, deg=1)

In [None]:
reg_v = np.polyfit(f_tr, l_tr, deg=9, full=True)[0]

In [None]:
f_ = np.linspace(f_tr.min(), f_va.max(), 75)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f_tr, l_tr, 'ro', label='training data')
plt.plot(f_va, l_va, 'go', label='validation data')
plt.plot(f_, np.polyval(reg_b, f_), '--', label='high bias')
plt.plot(f_, np.polyval(reg_v, f_), '--', label='high variance')
plt.ylim(-0.2)
plt.legend(loc=2);

In [None]:
from sklearn.metrics import r2_score

In [None]:
def evaluate(reg, f, l):
    p = np.polyval(reg, f)
    bias = np.abs(l - p).mean()
    var = p.var()
    msg = f'MSE={MSE(l, p):.4f} | R2={r2_score(l, p):9.4f} | '
    msg += f'bias={bias:.4f} | var={var:.4f}'
    print(msg)

In [None]:
evaluate(reg_b, f_tr, l_tr)

In [None]:
evaluate(reg_b, f_va, l_va)

In [None]:
evaluate(reg_v, f_tr, l_tr)

In [None]:
evaluate(reg_v, f_va, l_va)

## Cross-Validation

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

In [None]:
def PolynomialRegression(degree=None, **kwargs):
    return make_pipeline(PolynomialFeatures(degree),
                        LinearRegression(**kwargs))

In [None]:
np.set_printoptions(suppress=True,
        formatter={'float': lambda x: f'{x:12.2f}'})

In [None]:
print('\nCross-validation scores')
print(74 * '=')
for deg in range(0, 10, 1):
    model = PolynomialRegression(deg)
    cvs = cross_val_score(model, f.reshape(-1, 1), l, cv=5)
    print(f'deg={deg} | ' + str(cvs.round(2)))

In [None]:
np.random.seed(100)
tf.random.set_seed(100)
from keras.wrappers.scikit_learn import KerasRegressor

In [None]:
model = KerasRegressor(build_fn=create_dnn_model,
                      verbose=False, epochs=1000,
                      hl=1, hu=36)

In [None]:
%time cross_val_score(model, f, l, cv=5)

In [None]:
model = KerasRegressor(build_fn=create_dnn_model,
                      verbose=False, epochs=1000,
                      hl=3, hu=256)

In [None]:
%time cross_val_score(model, f, l, cv=5)

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

<br><br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>