# Method of moments parameter estimation in Vasicek model

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.optimize import minimize

In [2]:
df = pd.read_csv('SP_historical_PD_data.csv', sep=';')

In [3]:
df.head()

Unnamed: 0,Year,Total defaults*,Investment-grade defaults,Speculative-grade defaults,Default rate (%),Investment-grade default rate (%),Speculative-grade default rate (%),Total debt outstanding (bil. $)
0,1981,2,0,2,0.15,0.0,0.63,0.06
1,1982,18,2,15,1.22,0.19,4.46,0.9
2,1983,12,1,10,0.77,0.09,2.98,0.37
3,1984,14,2,12,0.93,0.17,3.31,0.36
4,1985,19,0,18,1.13,0.0,4.37,0.31


In [4]:
# Make pd columns
df["pd_total"] = df["Default rate (%)"] / 100
df["pd_inv"] = df["Investment-grade default rate (%)"] / 100
df["pd_spec"] = df["Speculative-grade default rate (%)"] / 100

In [5]:
# Calculate the number of obligors
df['num_of_inv_grades'] = (df['Investment-grade defaults'] / (df["pd_inv"])).round()
df['num_of_spec_grades'] = (
            df['Speculative-grade defaults'] / (df["pd_spec"])).round().astype(int)
df['num_of_total_grades'] = (df['Total defaults*'] / (df["pd_total"])).round().astype(int)

# Fill-out the missing values in num_of_inv_grades column with the difference between num_of_total_grades and num_of_spec_grades
df['num_of_inv_grades'] = np.where(df['num_of_inv_grades'].isna(), df['num_of_total_grades'] - df['num_of_spec_grades'],
                                   df['num_of_inv_grades']).astype(int)

In [6]:
# Calculate the average
p_total_average = np.mean(df["pd_total"])

In [7]:
print(p_total_average)

0.0147675


In [8]:
# Expected value of 1/n_g
expected_value_of_reciprocal_n_g = np.mean(1 / df['num_of_total_grades'])

In [9]:
print(expected_value_of_reciprocal_n_g)

0.0002769531644134457


In [10]:
# Normalized volatility
normalized_volatility = np.sqrt(np.var(df["pd_total"]))/p_total_average

In [11]:
print(normalized_volatility)

0.6558144744924091


$$
\mathrm{V}\left[p_\zeta(x)\right]=\frac{\mathrm{V}\left[\hat{p}_\zeta\right]-\mathrm{E}\left[1 / \hat{n}_\zeta\right] \bar{p}_\zeta\left(1-\bar{p}_\zeta\right)}{1-\mathrm{E}\left[1 / \hat{n}_\zeta\right]}
$$

In [12]:
variance_of_p_total = (np.var(df["pd_total"]) - expected_value_of_reciprocal_n_g * p_total_average * (1 - p_total_average)) / (1 - expected_value_of_reciprocal_n_g)

In [13]:
print(variance_of_p_total)

8.9789553080102e-05


In [14]:
normalized_varience = np.sqrt(variance_of_p_total) / p_total_average

In [15]:
print(normalized_varience)

0.6416614161184627


In [16]:
cut_off_value_total = stats.norm.ppf(p_total_average)

In [17]:
print(cut_off_value_total)

-2.1762710356300374


$$
V_\zeta \equiv \operatorname{Var}\left[p_\zeta(x)\right]=\operatorname{BIVNOR}\left(C_\zeta, C_\zeta, w_\zeta^2\right)-\bar{p}_\zeta^2
$$

In [18]:
def calc_variance_of_default_rate(w_factor_loading, pd_average = p_total_average):
    
    cut_off_value = stats.norm.ppf(pd_average)
    
    # if w_factor_loading is a np array, take the first element
    if isinstance(w_factor_loading, np.ndarray):
        w_factor_loading = w_factor_loading[0]
    
    # Bivariate normal cdf parameters
    mean = [0, 0]
    cov_matrix = [[1, w_factor_loading], [w_factor_loading, 1]]
    
     # Calculate BIVNOR value
    bivnor_value = stats.multivariate_normal.cdf([cut_off_value, cut_off_value], mean=mean, cov=cov_matrix)
    
    result = bivnor_value - pd_average ** 2
    
    return result

In [19]:
# Test for calc_variance_of_default_rate
calc_variance_of_default_rate(0.7)

0.004096368277893707

## Method of moments parameter estimation for w_factor_loading

In [20]:
# Target value to achieve
target_result = variance_of_p_total

# Initial guess for the minimization algorithm
initial_guess = 0.27

# Define bounds for w_factor_loading
bounds = [(-2, 2)]

# Define object function
object_function = lambda w: abs(calc_variance_of_default_rate(w) - target_result)

# Minimization based on the objective function
result = minimize(object_function, initial_guess, bounds=bounds)

# The found value of w_factor_loading
w_factor_loading_found = result.x[0]

print("Found value of w_factor_loading:", w_factor_loading_found)

Found value of w_factor_loading: 0.056365695073192584


## Packaged function for parameter estimation

In [21]:
def estimate_w_factor_loading(historical_pd, num_of_total_grades, initial_guess=0.27):
    """
    Estimate w_factor_loading using the method of moments
    :param historical_pd: pd series, list of historical default rates
    :param num_of_total_grades: pd series, list of the number of total grades
    :param initial_guess: float, initial guess for the minimization algorithm
    :return: float, estimated w_factor_loading
    """
    
    # Calculate the average
    pd_average = np.mean(historical_pd)
    
    # Calculate the expected value of 1/n_g
    expected_value_of_reciprocal_n_g = np.mean(1 / num_of_total_grades)
    
    # Variance of default rate
    variance_of_p_d = (np.var(historical_pd) - expected_value_of_reciprocal_n_g * pd_average * (1 - pd_average)) / (1 - expected_value_of_reciprocal_n_g)

    # Define bounds for w_factor_loading
    bounds = [(-5, 5)]

    # Define object function
    object_function = lambda w: abs(calc_variance_of_default_rate(w, pd_average) - variance_of_p_d)

    # Minimization based on the objective function
    result = minimize(object_function, initial_guess, bounds=bounds)

    # The found value of w_factor_loading
    w_factor_loading_found = result.x[0]

    return w_factor_loading_found, pd_average

In [22]:
# Test for the packaged function
estimate_w_factor_loading(df["pd_total"], df["num_of_total_grades"])

(0.056365695073192584, 0.0147675)

In [23]:
estimate_w_factor_loading(df["pd_inv"], df["num_of_inv_grades"])

(0.07270379780609139, 0.0008775)

In [24]:
estimate_w_factor_loading(df["pd_spec"], df["num_of_spec_grades"])

(0.07602889962521624, 0.040069999999999995)

# Using MM_estimation function from the package

In [6]:
from src.MM_estimation import MM_estimation

# Estimate w_factor_loading for total grades
w_total, pd_total = MM_estimation(df["Total defaults*"], df["num_of_total_grades"])

print("Estimated w_factor_loading for total grades:", w_total)
print("Estimated pd_average for total grades:", pd_total)

Estimated w_factor_loading for total grades: 0.05636182711809641
Estimated pd_average for total grades: -2.17626718586128


In [8]:
from src.MM_estimation import estimate_w_factor_loading
from scipy.stats import norm

param1, param2 = estimate_w_factor_loading(df["pd_total"], df["num_of_total_grades"])
print(param1, norm.ppf(param2))

0.056365695073192584 -2.1762710356300374


# Generated default time series gives back different w_factor_loading than the input parameter

In [10]:
from src.data_generator import generate_default_time_series

time_points = 160
num_of_obligors_list = [3000, 3000, 3000]
factor_loading_list = [0.45, 0.45, 0.45]
gamma_list = [-2.9, -2.3, -1.6]

generated_ts_df = generate_default_time_series(factor_loading_list, num_of_obligors_list, gamma_list, time_points)

In [11]:
generated_ts_df

Unnamed: 0,d_g_0,d_g_1,d_g_2
0,22,96,453
1,3,14,134
2,0,2,28
3,1,11,71
4,8,67,298
...,...,...,...
155,2,13,89
156,3,48,202
157,0,8,63
158,3,7,37


In [18]:
defaulted_all = np.array(generated_ts_df.sum(axis=0))

In [19]:
pds = defaulted_all / (num_of_obligors_list[0] * time_points)

In [21]:
from scipy.stats import norm

norm.ppf(pds)

array([-2.90514472, -2.3090345 , -1.60987568])

In [32]:
from src.MM_estimation import MM_estimation

# Estimate w_factor_loading for total grades
w_total, pd_total = MM_estimation(generated_ts_df["d_g_2"], num_of_obligors_list[2])

In [33]:
print(w_total, pd_total)

0.21363044659236166 -1.6098756765811062
