# Correlated data

A simplified approach to generate correlated data - that is in line with the simulated zero curves
Check the correlation of mortgage interest rates and swap rates

In [None]:
import numpy as np
from scipy.stats import norm

In [None]:
interest = dataset.Interest()
interest.read_data()
df_i = interest.df.copy()
df_i.rename(columns = {'fixed_period': 'type'}, inplace = True)


In [None]:
df_z10 = df_z[df_z['tenor']=='10 Years']
df_z10 = df_z10.drop(['value_dt','tenor'], axis = 'columns')
df_z10 = df_z10.resample('M').mean().reset_index()
df_z10['rate_dt'] = df_z10['rate_dt'].to_numpy().astype('datetime64[M]')
df_z10 = df_z10.set_index('rate_dt')
df_z10 = df_z10.rename(columns = {'rate':'interest'})
df_z10['type'] = 'Swap'
df_z10 = df_z10.loc[df_i.index.min(): df_i.index.max()]
df_report = pd.concat([df_i, df_z10])
sns.lineplot(data = df_report, x = df_report.index, y = 'interest', hue = 'type');

In [None]:
# Define the number of time periods and factors/instruments
num_periods = 10
num_factors = 3

# Simulate the factor values using a multivariate normal distribution
mean = np.zeros(num_factors)
covariance = np.array([[1.0, 0.5, 0.3], [0.5, 1.0, 0.2], [0.3, 0.2, 1.0]])
factors = np.random.multivariate_normal(mean, covariance, num_periods)

# Simulate the zero curves based on the simulated factor values
zero_curves = np.zeros((num_periods, num_factors))
for i in range(num_periods):
    zero_curves[i,:] = np.exp(-0.05 * (i+1) * factors[i,:])

# Simulate the data based on the simulated zero curves and assumed correlations
correlation_matrix = np.array([[1.0, 0.8, 0.5], [0.8, 1.0, 0.4], [0.5, 0.4, 1.0]])
simulated_data = np.zeros((num_periods, num_factors))
for i in range(num_periods):
    simulated_data[i,:] = norm.ppf(np.random.rand(num_factors))
    simulated_data[i,:] = np.linalg.cholesky(correlation_matrix) @ simulated_data[i,:]
    simulated_data[i,:] = np.exp(simulated_data[i,:] * np.sqrt(1.0 - np.diag(correlation_matrix))) * zero_curves[i,:]

print(simulated_data)   