# Correlated data

A simplified approach to generate correlated data - that is in line with the simulated zero curves
Check the correlation of mortgage interest rates and swap rates

Several methods and options exist:
- Box-Muller Method
- Iman-Conover Method
- Vasicek factor 2 model
- Georgian Brown Movements

The most simple option that still takes care of the correlation between variables is to use historical monte carlo.


In [1]:
import numpy as np
from src.data.zerocurve import Zerocurve
from src.data.interest import Interest

In [5]:
zerocurve = Zerocurve()
zerocurve.read_data()


2023-03-22 13:27:50.690 | INFO     | src.data.dataset:read_data:26 - reading zerocurve data from ESW.
  result = func(self.values, **kwargs)


<Response [200]>

In [57]:
from pandas.tseries.offsets import BDay
yield_data = zerocurve.yield_data
# yield_data_change = yield_data.pct_change().dropna()
yield_data_change = (yield_data - yield_data.shift()).dropna()
chg = yield_data_change.sample(1, replace = True, random_state = 1)
r1 = yield_data.iloc[-1] + yield_data_change.sample(
            1, replace=True, random_state=1
        )
print(r1.values)
last_day = yield_data.index[-1]
print(last_day)
next_day = last_day + BDay(1)
yield_data.loc[next_day] = r1.values[0]
yield_data
#print(yield_data.tail(1))
#print(yield_data.iloc[-1] + yield_data.iloc[-1] * chg)

#growth_vanilla = df["Growth Rate - Vanilla"].dropna(
#    ).sample(1000, replace=True, random_state=1)

[[3.05964817 3.05964817 3.37685878 3.39813225 3.29149253 3.14465403
  3.00024876 2.7783612  2.60010097 2.59631963 2.63675165 2.7134645
  2.76050637 2.70284022 2.15733557]]
2023-03-01 00:00:00


tenor,0,3,6,9,12,15,18,24,36,48,60,84,120,180,360
rate_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2004-09-06,2.121240,2.121240,2.298449,2.476137,2.651565,2.822715,2.988139,3.298175,3.825284,4.232494,4.538927,4.934807,5.217850,5.367339,5.410036
2004-09-07,2.139612,2.139612,2.332966,2.520157,2.700174,2.872345,3.036262,3.338711,3.844763,4.233661,4.527671,4.912476,5.193965,5.346548,5.391362
2004-09-08,2.149441,2.149441,2.352607,2.546828,2.731688,2.906993,3.072722,3.375993,3.877788,4.259989,4.547567,4.922632,5.195932,5.343014,5.385502
2004-09-09,2.129304,2.129304,2.311673,2.489955,2.662770,2.829146,2.988434,3.284360,3.784690,4.173438,4.470124,4.863580,5.158222,5.324286,5.376611
2004-09-10,2.110625,2.110625,2.268949,2.431078,2.593700,2.754329,2.911134,3.208441,3.722313,4.125765,4.433397,4.837977,5.136517,5.302756,5.355005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-24,3.045115,3.045115,3.279834,3.234152,3.087703,2.924748,2.780437,2.580853,2.444523,2.447321,2.477391,2.529558,2.560305,2.517924,2.115453
2023-02-27,3.074600,3.074600,3.325308,3.271961,3.111991,2.936755,2.783573,2.576131,2.445001,2.459598,2.500807,2.570856,2.619318,2.589171,2.164195
2023-02-28,3.052459,3.052459,3.369500,3.390191,3.282758,3.135074,2.989885,2.766859,2.588444,2.586636,2.630026,2.712942,2.767047,2.715403,2.173018
2023-03-01,3.057534,3.057534,3.374565,3.396087,3.290019,3.143989,3.000558,2.780898,2.607231,2.607283,2.650462,2.729934,2.777561,2.719192,2.173498


In [None]:
df_z10 = df_z[df_z['tenor']=='10 Years']
df_z10 = df_z10.drop(['value_dt','tenor'], axis = 'columns')
df_z10 = df_z10.resample('M').mean().reset_index()
df_z10['rate_dt'] = df_z10['rate_dt'].to_numpy().astype('datetime64[M]')
df_z10 = df_z10.set_index('rate_dt')
df_z10 = df_z10.rename(columns = {'rate':'interest'})
df_z10['type'] = 'Swap'
df_z10 = df_z10.loc[df_i.index.min(): df_i.index.max()]
df_report = pd.concat([df_i, df_z10])
sns.lineplot(data = df_report, x = df_report.index, y = 'interest', hue = 'type');

In [None]:
# Define the number of time periods and factors/instruments
num_periods = 10
num_factors = 3

# Simulate the factor values using a multivariate normal distribution
mean = np.zeros(num_factors)
covariance = np.array([[1.0, 0.5, 0.3], [0.5, 1.0, 0.2], [0.3, 0.2, 1.0]])
factors = np.random.multivariate_normal(mean, covariance, num_periods)

# Simulate the zero curves based on the simulated factor values
zero_curves = np.zeros((num_periods, num_factors))
for i in range(num_periods):
    zero_curves[i,:] = np.exp(-0.05 * (i+1) * factors[i,:])

# Simulate the data based on the simulated zero curves and assumed correlations
correlation_matrix = np.array([[1.0, 0.8, 0.5], [0.8, 1.0, 0.4], [0.5, 0.4, 1.0]])
simulated_data = np.zeros((num_periods, num_factors))
for i in range(num_periods):
    simulated_data[i,:] = norm.ppf(np.random.rand(num_factors))
    simulated_data[i,:] = np.linalg.cholesky(correlation_matrix) @ simulated_data[i,:]
    simulated_data[i,:] = np.exp(simulated_data[i,:] * np.sqrt(1.0 - np.diag(correlation_matrix))) * zero_curves[i,:]

print(simulated_data)   