In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import string
from ipywidgets import FloatSlider, IntSlider, VBox, Layout
from bqplot import LinearScale, Lines, Axis, Figure

In [3]:
%%html

<style> .widget-readout{ color:white; } </style>

### When looking at backtesting, it is important to not overfit a time series. How do we do this?
### By simulating data with similar characteristics, and seeing if out model/strategy still perform well
### Simulation using Cholesky Decomposition in order to introduce correlations between assets

In [4]:
corr = np.array([[1, 0.7, 0.7], [0.7, 1, 0.7], [0.7, 0.7, 1]])

corr

array([[1. , 0.7, 0.7],
       [0.7, 1. , 0.7],
       [0.7, 0.7, 1. ]])

In [5]:
chol = np.linalg.cholesky(corr)
chol

array([[1.        , 0.        , 0.        ],
       [0.7       , 0.71414284, 0.        ],
       [0.7       , 0.29405882, 0.65079137]])

corr = chol * chol.T

In [7]:
np.matmul(chol, chol.T)

array([[1. , 0.7, 0.7],
       [0.7, 1. , 0.7],
       [0.7, 0.7, 1. ]])

In [10]:
rand_data = np.random.normal(size=(3,1000))
rand_data

array([[ 0.78574846, -0.33204333,  1.09074363, ..., -0.03874956,
        -1.16525056,  1.45317142],
       [-1.13906763, -0.62805027,  0.17169944, ..., -0.4850693 ,
        -1.56186606,  0.34461258],
       [ 0.52506423,  0.44377752, -2.69890895, ..., -0.34947124,
         1.72230364, -1.44590233]])

In [11]:
pd.DataFrame(rand_data.T).corr()

Unnamed: 0,0,1,2
0,1.0,0.009468,0.051643
1,0.009468,1.0,-0.012379
2,0.051643,-0.012379,1.0


In [12]:
no_corr_data = pd.DataFrame(rand_data, index = ['A', 'B', 'C']).T/100

In [18]:
sim_cum_rets_plot_no_corr = px.line((1+no_corr_data).cumprod(), title='Simulated returns with no correlation', width=1000, height=500)
sim_cum_rets_plot_no_corr.show()

In [19]:
sim_corr_rets = pd.DataFrame(np.matmul(chol, rand_data), index = ['A', 'B', 'C']).T/100

In [20]:
sim_corr_rets.head()

Unnamed: 0,A,B,C
0,0.007857,-0.002634,0.005568
1,-0.00332,-0.006809,-0.001283
2,0.010907,0.008861,-0.009424
3,-0.005645,-0.011484,-0.009847
4,-0.005023,0.000521,-0.002443


In [21]:
sim_cum_rets_plot = px.line((1+sim_corr_rets).cumprod(), title='Simulated returns with correlation', width=1000, height=500)
sim_cum_rets_plot.show()

In [22]:
sim_corr_rets.corr()

Unnamed: 0,A,B,C
A,1.0,0.693255,0.721027
B,0.693255,1.0,0.707625
C,0.721027,0.707625,1.0


In [23]:
corr_slider = FloatSlider(min =0, max =0.99, value= 0.7, step=0.01, description = 'Correlation', continuous_update=False, layout = {'fontcolor':'red'})
num_secs_slider = IntSlider(min = 2, max = 10, value = 3, description = '# Securities', continuous_update=False)
sample_size_slider = IntSlider(min = 50, max = 3000, value = 1000, description = 'Sample Size', continuous_update=False)

corr_slider.style.handle_color = 'orange'
num_secs_slider.style.handle_color = 'orange'
sample_size_slider.style.handle_color = 'orange'

In [24]:
corr = np.full((num_secs_slider.value,num_secs_slider.value),corr_slider.value)
np.fill_diagonal(corr,1)
chol = np.linalg.cholesky(corr)
rand_data = np.random.normal(size=(num_secs_slider.value,sample_size_slider.value))
sim_corr_rets = pd.DataFrame(np.matmul(chol, rand_data), index = list(string.ascii_uppercase)[:num_secs_slider.value]).T/100
cum_prod_rets = (1+sim_corr_rets).cumprod()

In [25]:
def update_matrix(caller):
    corr = np.full((num_secs_slider.value,num_secs_slider.value),corr_slider.value)
    np.fill_diagonal(corr,1)
    chol = np.linalg.cholesky(corr)
    rand_data = np.random.normal(size=(num_secs_slider.value,sample_size_slider.value))
    sim_corr_rets = pd.DataFrame(np.matmul(chol, rand_data), index = list(string.ascii_uppercase)[:num_secs_slider.value]).T/100
    cum_prod_rets = (1+sim_corr_rets).cumprod()
    line.x = cum_prod_rets.index
    line.y = cum_prod_rets.values.T

num_secs_slider.observe(update_matrix, 'value')
corr_slider.observe(update_matrix, 'value')
sample_size_slider.observe(update_matrix, 'value')

In [27]:
%matplotlib widget
sc_x = LinearScale()
sc_y = LinearScale()
line = Lines(x=cum_prod_rets.index, y=cum_prod_rets.values.T,
             scales={'x': sc_x, 'y': sc_y})
ax_x = Axis(scale=sc_x, label='Index',label_color = 'white')
ax_y = Axis(scale=sc_y, orientation='vertical', label='Cumulative Returns', label_color = 'white')
fig = Figure(marks=[line], axes=[ax_x, ax_y], title='Correlated Returns Simulator', title_style = {'fill': 'white'}, animation_duration=500)

In [28]:
VBox([num_secs_slider, corr_slider, sample_size_slider, fig])

VBox(children=(IntSlider(value=3, continuous_update=False, description='# Securities', max=10, min=2, style=Sl…