### Here we will generate new market scenarios by introducing noise in historical returns, risks and correlations. 

### Remember that we will try to find stable regions in portfolio weight space which perform consistently across these simulated market scenarios.

### The Parameters we will use are:
- Noise of 20, 30, 50% on returns
- Noise of 10, 20% on risks
- Noise of 5, 10% on correlations

These are 3 * 2 * 2 = 12 combinations

We will simulate 500 random scenarios for each of these combinations to get a total of 6000 scenarios


In [31]:
import pandas as pd
import numpy as np
import os
import itertools
from tqdm import tqdm

In [2]:
input_data_dir = r'../data/simulations/market_scenarios_historical'
output_data_dir = r'../data/simulations/market_scenarios_monte_carlo'


In [29]:
ret_noise_list = [20, 35, 50]
risk_noise_list = [10,20]
corr_noise_list = [5,10]

# tyear = 2017
# ret_noise = 20

# risk_noise = 10
# corr_noise = 5
combinations = list(itertools.product(ret_noise_list, risk_noise_list, corr_noise_list))
for a,b,c in combinations[:5]:
    print(a,b,c)

20 10 5
20 10 10
20 20 5
20 20 10
35 10 5


In [36]:
year_list = list(range(2017, 2025))
ret_noise_list = [20, 35, 50]
risk_noise_list = [10,20]
corr_noise_list = [5,10]
combinations = list(itertools.product(ret_noise_list, risk_noise_list, corr_noise_list))

sim_num = 1
total_sim = 100
for tyear in tqdm(year_list):
    for ret_noise, risk_noise, corr_noise in combinations:
        for i in range(total_sim):
            
            ret_df = pd.read_csv(os.path.join(input_data_dir, str(tyear)+'_annual_ret.csv'), header=None)
            ret_df[1]=ret_df[1] * np.random.choice([1+0.01*ret_noise, 1-0.01*ret_noise], size=len(ret_df))
            ret_df.to_csv(os.path.join(output_data_dir, 'mc_sim_'+str(sim_num) +'_year_'+ str(tyear) + '_ret_noise_' + str(ret_noise)+ '.csv'), index=False, header=False)
            
            risk_df = pd.read_csv(os.path.join(input_data_dir, str(tyear)+'_annual_vol.csv'), header=None)
            risk_df[1]=risk_df[1] * np.random.choice([1+0.01*risk_noise, 1-0.01*risk_noise], size=len(risk_df))
            risk_df.to_csv(os.path.join(output_data_dir, 'mc_sim_'+str(sim_num) +'_year_'+ str(tyear) + '_risk_noise_' + str(risk_noise)+ '.csv'), index=False, header=False)
            
            corr_df = pd.read_csv(os.path.join(input_data_dir, str(tyear)+'_annual_ret.csv'), header=None)
            ret_df[1]=ret_df[1] * np.random.choice([1+0.01*ret_noise, 1-0.01*ret_noise], size=len(ret_df))
            ret_df.to_csv(os.path.join(output_data_dir, 'mc_sim_'+str(sim_num) +'_year_'+ str(tyear) + '_ret_noise_' + str(ret_noise)+ '.csv'), index=False, header=False)
            
            corr_df = pd.read_csv(os.path.join(input_data_dir, str(tyear)+'_annual_corr.csv'), index_col=0)
            correlation_matrix = corr_df.values
            random_multipliers = np.random.choice([1+0.01*corr_noise, 1-0.01*corr_noise], size=correlation_matrix.shape)
            np.fill_diagonal(random_multipliers, 1)
            modified_correlation_matrix = correlation_matrix * random_multipliers
            
            modified_correlation_df = pd.DataFrame(modified_correlation_matrix, 
                                                   index=corr_df.index, 
                                                   columns=corr_df.columns)
            modified_correlation_df.to_csv(os.path.join(output_data_dir, 'mc_sim_'+str(sim_num) +'_year_'+ str(tyear) + '_corr_noise_' + str(corr_noise)+ '.csv'), index=True, header=True)
            
            sim_num +=1
    


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [01:19<00:00,  9.97s/it]


### We get a total of 9600 additional scenarios covering years from 2017-2024