## This notebook is an example for synthetic data generation

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm

In [2]:
# Define the parameters
factor_loading = 0.25
gamma = -2.15
np.random.seed(42)

In [3]:
# Generate normalized return on obligors’ assets (Y) for only 1 bucket
def generate_y(factor_loading, num_of_obligors):
    x = np.random.normal()
    epsilon = np.random.normal(0, 1, num_of_obligors)
    y = factor_loading * x + epsilon * (1 - factor_loading**2)**0.5
    return y

In [4]:
generate_y(factor_loading, 5)

array([-0.0096953 ,  0.75130027,  1.59884586, -0.10253949, -0.1025236 ])

In [5]:
def generate_default(num_of_obligors, factor_loading, gamma):
    y = generate_y(factor_loading, num_of_obligors)
    default = (y < gamma).sum()
    return default

In [6]:
n_g = 250
d_g_list = []
for i in range(20):
    d_g_list.append(generate_default(n_g, factor_loading, gamma))
#d_g = generate_default(n_g, factor_loading, gamma)
print("Number of defaults: ", np.mean(d_g_list))

Number of defaults:  3.95


In [7]:
d_g_list

[0, 5, 3, 1, 2, 2, 7, 8, 4, 0, 3, 3, 3, 4, 1, 3, 15, 9, 4, 2]

In [8]:
norm.cdf(gamma) * n_g

3.944401847772625

In [9]:
# Generate normalized return on obligors’ assets (Y) for more buckets
def generate_default_buckets(factor_loading_list, num_of_obligors_list, gamma_list):
    x = np.random.normal(0, 1, 160)
    defaults_list = []
    
    for index, num_of_obligors in enumerate(num_of_obligors_list):
        d_g = 0
        for i in range(160):
            epsilon = np.random.normal(0, 1, num_of_obligors)
            y = factor_loading_list[index] * x[i] + epsilon * (1 - factor_loading_list[index]**2)**0.5
            d_g += (y < gamma_list[index]).sum()
        defaults_list.append(d_g)

    return defaults_list

In [10]:
factor_loading_list = [0.45, 0.45, 0.45]
num_of_obligors_list = [250, 250, 250]
gamma_list = [-2.9, -2.3, -1.6]
d_g_list = generate_default_buckets(factor_loading_list,num_of_obligors_list, gamma_list)
print("Number of defaults: ", d_g_list)

Number of defaults:  [67, 397, 2048]


In [11]:
def generate_default_time_series(factor_loading_list, num_of_obligors_list, gamma_list, time_points=160):
    # Generate time series of defaults for more grades
    if len(factor_loading_list) == 1:
        # if factor_loading_list's length is 1, then make it a list of the same length as num_of_obligors_list
        factor_loading_list = [factor_loading_list[0]] * len(num_of_obligors_list)
    x = np.random.normal(0, 1, time_points)
    defaults_df = pd.DataFrame()

    for index, num_of_obligors in enumerate(num_of_obligors_list):
        d_g = []
        for i in range(time_points):
            epsilon = np.random.normal(0, 1, num_of_obligors)
            y = factor_loading_list[index] * x[i] + epsilon * (1 - factor_loading_list[index] ** 2) ** 0.5
            d_g.append((y < gamma_list[index]).sum())
        defaults_df["d_g_" + str(index)] = d_g

    return defaults_df

In [12]:
factor_loading_list = [0.45, 0.45, 0.45]
num_of_obligors_list = [250, 250, 250]
gamma_list = [-2.9, -2.3, -1.6]
default_table = generate_default_time_series(factor_loading_list,num_of_obligors_list, gamma_list, time_points=20)
default_table

Unnamed: 0,d_g_0,d_g_1,d_g_2
0,0,1,8
1,0,3,9
2,0,3,27
3,0,0,4
4,0,1,1
5,1,6,26
6,0,0,11
7,0,1,13
8,0,5,6
9,2,8,22
