# Method of moments parameter estimation in Vasicek model

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.optimize import minimize

In [6]:
df = pd.read_csv('data\SP_historical_PD_data.csv', sep=';')

In [7]:
df.head()

Unnamed: 0,Year,Total defaults*,Investment-grade defaults,Speculative-grade defaults,Default rate (%),Investment-grade default rate (%),Speculative-grade default rate (%),Total debt outstanding (bil. $)
0,1981,2,0,2,0.15,0.0,0.63,0.06
1,1982,18,2,15,1.22,0.19,4.46,0.9
2,1983,12,1,10,0.77,0.09,2.98,0.37
3,1984,14,2,12,0.93,0.17,3.31,0.36
4,1985,19,0,18,1.13,0.0,4.37,0.31


In [8]:
# Make pd columns
df["pd_total"] = df["Default rate (%)"] / 100
df["pd_inv"] = df["Investment-grade default rate (%)"] / 100
df["pd_spec"] = df["Speculative-grade default rate (%)"] / 100

In [9]:
# Calculate the number of obligors
df['num_of_inv_grades'] = (df['Investment-grade defaults'] / (df["pd_inv"])).round()
df['num_of_spec_grades'] = (
            df['Speculative-grade defaults'] / (df["pd_spec"])).round().astype(int)
df['num_of_total_grades'] = (df['Total defaults*'] / (df["pd_total"])).round().astype(int)

# Fill-out the missing values in num_of_inv_grades column with the difference between num_of_total_grades and num_of_spec_grades
df['num_of_inv_grades'] = np.where(df['num_of_inv_grades'].isna(), df['num_of_total_grades'] - df['num_of_spec_grades'],
                                   df['num_of_inv_grades']).astype(int)

In [10]:
# Calculate the average
p_total_average = np.mean(df["pd_total"])

In [11]:
print(p_total_average)

0.0147675


In [12]:
# Expected value of 1/n_g
expected_value_of_reciprocal_n_g = np.mean(1 / df['num_of_total_grades'])

In [13]:
print(expected_value_of_reciprocal_n_g)

0.0002769531644134457


In [14]:
# Normalized volatility
normalized_volatility = np.sqrt(np.var(df["pd_total"]))/p_total_average

In [15]:
print(normalized_volatility)

0.6558144744924091


$$
\mathrm{V}\left[p_\zeta(x)\right]=\frac{\mathrm{V}\left[\hat{p}_\zeta\right]-\mathrm{E}\left[1 / \hat{n}_\zeta\right] \bar{p}_\zeta\left(1-\bar{p}_\zeta\right)}{1-\mathrm{E}\left[1 / \hat{n}_\zeta\right]}
$$

In [16]:
variance_of_p_total = (np.var(df["pd_total"]) - expected_value_of_reciprocal_n_g * p_total_average * (1 - p_total_average)) / (1 - expected_value_of_reciprocal_n_g)

In [17]:
print(variance_of_p_total)

8.9789553080102e-05


In [18]:
normalized_varience = np.sqrt(variance_of_p_total) / p_total_average

In [19]:
print(normalized_varience)

0.6416614161184627


In [20]:
cut_off_value_total = stats.norm.ppf(p_total_average)

In [21]:
print(cut_off_value_total)

-2.1762710356300374


$$
V_\zeta \equiv \operatorname{Var}\left[p_\zeta(x)\right]=\operatorname{BIVNOR}\left(C_\zeta, C_\zeta, w_\zeta^2\right)-\bar{p}_\zeta^2
$$

In [22]:
def calc_variance_of_default_rate(w_factor_loading, pd_average = p_total_average):
    
    cut_off_value = stats.norm.ppf(pd_average)
    
    # if w_factor_loading is a np array, take the first element
    if isinstance(w_factor_loading, np.ndarray):
        w_factor_loading = w_factor_loading[0]
    
    # Bivariate normal cdf parameters
    mean = [0, 0]
    cov_matrix = [[1, w_factor_loading], [w_factor_loading, 1]]
    
     # Calculate BIVNOR value
    bivnor_value = stats.multivariate_normal.cdf([cut_off_value, cut_off_value], mean=mean, cov=cov_matrix)
    
    result = bivnor_value - pd_average ** 2
    
    return result

In [23]:
# Test for calc_variance_of_default_rate
calc_variance_of_default_rate(0.7)

0.004096368277893707

## Method of moments parameter estimation for w_factor_loading

In [25]:
# Target value to achieve
target_result = variance_of_p_total

# Initial guess for the minimization algorithm
initial_guess = 0.27

# Define bounds for w_factor_loading
bounds = [(-2, 2)]

# Define object function
object_function = lambda w: abs(calc_variance_of_default_rate(w) - target_result)

# Minimization based on the objective function
result = minimize(object_function, initial_guess, bounds=bounds)

# The found value of w_factor_loading
w_factor_loading_found = result.x[0]

print("Found value of w_factor_loading:", w_factor_loading_found)

Found value of w_factor_loading: 0.056365695073192584


## Packaged function for parameter estimation

In [26]:
def estimate_w_factor_loading(historical_pd, num_of_total_grades, initial_guess=0.27):
    """
    Estimate w_factor_loading using the method of moments
    :param historical_pd: pd series, list of historical default rates
    :param num_of_total_grades: pd series, list of the number of total grades
    :param initial_guess: float, initial guess for the minimization algorithm
    :return: float, estimated w_factor_loading
    """
    
    # Calculate the average
    pd_average = np.mean(historical_pd)
    
    # Calculate the expected value of 1/n_g
    expected_value_of_reciprocal_n_g = np.mean(1 / num_of_total_grades)
    
    # Variance of default rate
    variance_of_p_d = (np.var(historical_pd) - expected_value_of_reciprocal_n_g * pd_average * (1 - pd_average)) / (1 - expected_value_of_reciprocal_n_g)

    # Define bounds for w_factor_loading
    bounds = [(-5, 5)]

    # Define object function
    object_function = lambda w: abs(calc_variance_of_default_rate(w, pd_average) - variance_of_p_d)

    # Minimization based on the objective function
    result = minimize(object_function, initial_guess, bounds=bounds)

    # The found value of w_factor_loading
    w_factor_loading_found = result.x[0]

    return w_factor_loading_found, pd_average

In [27]:
# Test for the packaged function
estimate_w_factor_loading(df["pd_total"], df["num_of_total_grades"])

(0.056365695073192584, 0.0147675)

In [28]:
estimate_w_factor_loading(df["pd_inv"], df["num_of_inv_grades"])

(0.07270379780609139, 0.0008775)

In [29]:
estimate_w_factor_loading(df["pd_spec"], df["num_of_spec_grades"])

(0.07602889962521624, 0.040069999999999995)

# Using MM_estimation function from the package

In [30]:
from src.MM_estimation import MM_estimation

# Estimate w_factor_loading for total grades
w_total, pd_total = MM_estimation(df["Total defaults*"], df["num_of_total_grades"])

print("Estimated w_factor_loading for total grades:", w_total)
print("Estimated pd_average for total grades:", pd_total)

Estimated w_factor_loading for total grades: 0.2374064566828194
Estimated pd_average for total grades: -2.17626718586128


In [31]:
from src.MM_estimation import estimate_w_factor_loading
from scipy.stats import norm

param1, param2 = estimate_w_factor_loading(df["pd_total"], df["num_of_total_grades"])
print(param1, norm.ppf(param2))

0.23741460694825398 -2.1762710356300374


# Generated default time series gives back different w_factor_loading than the input parameter

In [32]:
from src.data_generator import generate_default_time_series
import numpy as np

time_points = 160
num_of_obligors_list = [3000, 3000, 3000]
factor_loading_list = [0.3, 0.3, 0.3]
gamma_list = [-2.9, -2.3, -1.6]

generated_ts_df = generate_default_time_series(factor_loading_list, num_of_obligors_list, gamma_list, time_points)

In [33]:
generated_ts_df

Unnamed: 0,d_g_0,d_g_1,d_g_2
0,0,19,120
1,4,16,95
2,7,27,114
3,1,1,42
4,7,44,214
...,...,...,...
155,8,29,172
156,8,63,313
157,12,104,386
158,11,48,254


In [34]:
num_of_obligors_table = num_of_obligors_list[0] * np.ones(generated_ts_df.shape)

In [35]:
defaulted_all = np.array(generated_ts_df.sum(axis=0))

In [36]:
pds = defaulted_all / (num_of_obligors_list[0] * time_points)

In [37]:
from scipy.stats import norm

norm.ppf(pds)

array([-2.90514472, -2.30611573, -1.59766898])

In [38]:
from src.MM_estimation import MM_estimation

# Estimate w_factor_loading for total grades
w_total, pd_total = MM_estimation(generated_ts_df["d_g_2"], num_of_obligors_list[2])

In [39]:
print(w_total, pd_total)

0.28906850423785657 -1.597668976226097


In [44]:
# Make MM for every grade
MM_params_w = []
MM_params_gamma = []

for i in range(3):
    w_total, pd_total = MM_estimation(generated_ts_df[f'd_g_{i}'], num_of_obligors_list[i])
    print(w_total, pd_total)
    MM_params_w.append(w_total)
    MM_params_gamma.append(pd_total)

0.27003303959670394 -2.9051447155502528
0.2903447393851022 -2.3061157287707283
0.28906850423785657 -1.597668976226097


In [43]:
MM_params_w

[0.27003303959670394, 0.2903447393851022, 0.28906850423785657]

In [57]:
from src.ML_estimation import mle_trapz_g_and_w

mle_results = mle_trapz_g_and_w(generated_ts_df.values, num_of_obligors_table, MM_params_w, MM_params_gamma)

In [60]:
print(mle_results[0])
print(mle_results[1])

[0.2995126  0.29693876 0.29624572]
[-2.90594206 -2.30824816 -1.60187343]


In [47]:
from src.variable_change import a_calc_func, b_calc_func

a_init = np.array(a_calc_func(np.array(MM_params_w), np.array(MM_params_gamma)))
b_init = np.array(b_calc_func(np.array(MM_params_w), np.array(MM_params_gamma)))

In [48]:
a_init, b_init

(array([-0.28045152, -0.30341523, -0.30195964]),
 array([-3.01723168, -2.40993044, -1.66891772]))

# Simulate much time series and calculate the average of the estimated w_factor_loading

In [4]:
from src.MM_estimation import gen_data_and_mm

time_points = 40
num_of_obligors_list = [3000, 3000, 3000]
#num_of_obligors_list = [10000, 10000, 10000]
factor_loading_list = [0.3, 0.3, 0.3]
gamma_list = [-2.9, -2.3, -1.6]

In [None]:
params_df = gen_data_and_mm(time_points, num_of_obligors_list, factor_loading_list, gamma_list)

In [3]:
params_df

Unnamed: 0,w_0,w_1,w_2
0,0.270034,0.301738,0.297079
1,0.270034,0.276182,0.280614
2,0.270028,0.282262,0.284940
3,0.270037,0.307969,0.298105
4,0.269973,0.280232,0.282804
...,...,...,...
95,0.270031,0.307442,0.314267
96,0.269971,0.273184,0.265651
97,0.270040,0.349455,0.338478
98,0.270038,0.285525,0.296211


In [4]:
params_df.mean()

w_0    0.271988
w_1    0.296180
w_2    0.296989
dtype: float64

In [5]:
from src.ML_estimation import gen_data_and_mle_new

params_MLE_df = gen_data_and_mle_new(time_points, num_of_obligors_list, factor_loading_list, gamma_list, sims = 10)

In [6]:
params_MLE_df

Unnamed: 0,w_0,gamma_0,w_1,gamma_1,w_2,gamma_2
0,0.26012,-2.855633,0.312538,-2.256067,0.299646,-1.560199
1,0.340121,-2.871879,0.316171,-2.308389,0.317692,-1.618806
2,0.315537,-2.911386,0.307131,-2.329159,0.307961,-1.636171
3,0.301805,-2.881526,0.265113,-2.295405,0.290286,-1.569395
4,0.35458,-2.840057,0.30961,-2.277577,0.306788,-1.571053
5,0.285144,-2.895235,0.27807,-2.30943,0.280597,-1.61216
6,0.283053,-2.883206,0.294347,-2.260639,0.286213,-1.567112
7,0.27504,-2.759849,0.251183,-2.17233,0.249153,-1.455241
8,0.249514,-2.947472,0.267139,-2.367815,0.267151,-1.655054
9,0.307676,-2.911041,0.28764,-2.324754,0.290169,-1.62299


In [7]:
import pandas as pd
from src.data_generator import generate_default_time_series
from src.ML_estimation import mle_trapz_g_and_w
import numpy as np

grade_num = len(gamma_list)

params_df = pd.DataFrame()

for sim in range(2):
    defaults_df = generate_default_time_series(factor_loading_list, num_of_obligors_list, gamma_list, time_points)
    num_of_obligors_df = np.full_like(defaults_df, num_of_obligors_list[0])
    w_param, pd_param, _ = mle_trapz_g_and_w(defaults_df.values, num_of_obligors_df, factor_loading_list, gamma_list)
    for i in range(grade_num):
        params_df.loc[sim, "w_" + str(i)] = w_param[i]
        params_df.loc[sim, "gamma_" + str(i)] = pd_param[i]

In [8]:
params_df

Unnamed: 0,w_0,gamma_0,w_1,gamma_1,w_2,gamma_2
0,0.270681,-3.091889,0.28167,-2.396085,0.264549,-1.72887
1,0.288844,-2.785243,0.32717,-2.203742,0.322046,-1.488792
