## This notebook is used to generate the data for MLE estimation

In [1]:
import numpy as np
from scipy.stats import norm, binom
from scipy.integrate import quad
from scipy.optimize import minimize
from src.sucess_probability import p_g

In [2]:
# Define the parameters
factor_loading = 0.45
gamma = -1.5

In [3]:
# Generate normalized return on obligors’ assets (Y) for only 1 bucket
def generate_y(factor_loading, num_of_obligors):
    x = np.random.normal()
    epsilon = np.random.normal(0, 1, num_of_obligors)
    y = factor_loading * x + epsilon * (1 - factor_loading**2)**0.5
    return y

In [4]:
generate_y(factor_loading, 5)

array([-2.86287415,  0.17011535, -0.85815945, -0.01743542,  1.16727221])

In [5]:
def generate_default(num_of_obligors, factor_loading, gamma):
    y = generate_y(factor_loading, num_of_obligors)
    default = (y < gamma).sum()
    return default

In [6]:
n_g = 250
d_g_list = []
for i in range(160):
    d_g_list.append(generate_default(n_g, factor_loading, gamma))
#d_g = generate_default(n_g, factor_loading, gamma)
print("Number of defaults: ", np.mean(d_g_list))

Number of defaults:  18.9375


In [7]:
norm.cdf(gamma) * n_g

16.701800317214513

In [8]:
default_list = []
for i in range(100):
    default_list.append(generate_default(n_g, factor_loading, gamma))
    
np.mean(default_list)

20.17

In [9]:
# Generate normalized return on obligors’ assets (Y) for more buckets
def generate_default_buckets(factor_loading_list, num_of_obligors_list, gamma_list):
    x = np.random.normal(0, 1, 160)
    defaults_list = []
    
    for index, num_of_obligors in enumerate(num_of_obligors_list):
        d_g = 0
        for i in range(160):
            epsilon = np.random.normal(0, 1, num_of_obligors)
            y = factor_loading_list[index] * x[i] + epsilon * (1 - factor_loading_list[index]**2)**0.5
            d_g += (y < gamma_list[index]).sum()
        defaults_list.append(d_g)

    return defaults_list

In [10]:
factor_loading_list = [0.45, 0.45, 0.45]
num_of_obligors_list = [250, 250, 250]
gamma_list = [-2.9, -2.3, -1.6]
d_g_list = generate_default_buckets(factor_loading_list,num_of_obligors_list, gamma_list)
print("Number of defaults: ", d_g_list)

Number of defaults:  [64, 426, 2222]


In [11]:
def calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, prob_dens_func, w_g_arr, gamma_g_arr):
    """
    Numerically calculates the value of L(d_g_arr) for multiple grades based on the given formula.

    Parameters:
        d_g_arr (numpy.array(int)): Values of d_g's by grades
        n_g_arr (numpy.array(int)): Values of n_g's by grades
        p_g (callable): The p_g function representing the probability density function.
        prob_dens_func (callable): The pdf_g function representing the probability density function.
        w_g_arr (numpy.array(float)): Parameter 'w_g's by grades
        gamma_g_arr (numpy.array(float)): Parameter 'gamma_g's by grades.

    Returns:
        float: Numerical approximation of the integral.
    """
    
    
    integrand = lambda x: np.prod(binom.pmf(d_g_arr, n_g_arr, p_g(x, w_g_arr, gamma_g_arr))) * prob_dens_func(x)
    
    result, _ = quad(integrand, -3, 3)
    
    return result

In [12]:
initial_guess = np.array([-3.17, -1.78, 0.5, 0.3])

In [13]:
p_g(0, initial_guess[2:4], initial_guess[0:2])

array([0.00012591, 0.03102439])

In [14]:
d_g_list, np.array(num_of_obligors_list) * 160

([64, 426, 2222], array([40000, 40000, 40000]))

In [15]:
np.random.seed(42) # Fix the seed for reproducibility
factor_loading_list = [0.45, 0.45, 0.45]
num_of_obligors_list = [250, 250, 250]
gamma_list = [-2.9, -2.3, -1.6]
d_g_list = generate_default_buckets(factor_loading_list,num_of_obligors_list, gamma_list)

In [16]:
# Different gamma and same factor loading parameter

d_g_arr = d_g_list
n_g_arr = np.array(num_of_obligors_list) * 160
#gamma_g = norm.ppf(d_g_arr / n_g_arr)

# MLE condition and initial guess
# initial_guess = np.array(list(gamma_g) + [0.5, 0.3])
# initial_guess = np.array([-2.85, -2.28, -1.73, 0.44, 0.44, 0.44])
# bounds = [(-5, 5), (-5, 5), (-5, 5), (-1, 1), (-1, 1), (-1, 1)]
initial_guess = np.array([-2.9, -2.3, -1.6, 0.45])
bounds = [(-5, 5), (-5, 5), (-5, 5), (-1, 1)]

# Function to be minimized in weight parameter
# objective_function = lambda params: -calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, params, gamma_g)
objective_function = lambda params: -np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, 
                                                                 np.repeat(params[3], 3), 
                                                                 params[0:3]))

result = minimize(objective_function,
                  initial_guess,
                  method="Nelder-Mead",
                  bounds=bounds,
                  options={
                      'disp': True})
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")
print(result.message)

Optimization terminated successfully.
         Current function value: 15.975290
         Iterations: 72
         Function evaluations: 136
The optimal weight parameter is [-2.7987036  -2.25997086 -1.63899683  0.46543338]
Optimization terminated successfully.


In [17]:
#[-2.86909645 -2.31877637 -1.70957583  0.44903538] 15.454434, [-2.91256677 -2.32328551 -1.67963831  0.38511863], 15.572930 [-2.87680625 -2.29940442 -1.66884253  0.46237501]

#TO-DO:
for 1 w,
for more w,
for fixed gamma

In [18]:
d_g_list, np.array(num_of_obligors_list) * 160

([73, 431, 2207], array([40000, 40000, 40000]))

In [19]:
# Fixed gamma and 1 w case

d_g_arr = d_g_list
n_g_arr = np.array(num_of_obligors_list) * 160
gamma_g = norm.ppf(d_g_arr / n_g_arr)

# MLE condition and initial guess
# initial_guess = np.array(list(gamma_g) + [0.5, 0.3])
# initial_guess = np.array([-2.85, -2.28, -1.73, 0.44, 0.44, 0.44])
# bounds = [(-5, 5), (-5, 5), (-5, 5), (-1, 1), (-1, 1), (-1, 1)]
initial_guess = np.array([0.45])
bounds = [(-1, 1)]

# Function to be minimized in weight parameter
# objective_function = lambda params: -calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, params, gamma_g)
objective_function = lambda param: -np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, 
                                                                 np.repeat(param, 3), 
                                                                 gamma_g))

result = minimize(objective_function,
                  initial_guess,
                  method="Nelder-Mead",
                  bounds=bounds,
                  options={
                      'disp': True})
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")
print(result.message)

Optimization terminated successfully.
         Current function value: 32.715524
         Iterations: 9
         Function evaluations: 18
The optimal weight parameter is [0.45298828]
Optimization terminated successfully.


In [20]:
# Different gamma and different factor loading parameters

d_g_arr = d_g_list
n_g_arr = np.array(num_of_obligors_list) * 160
#gamma_g = norm.ppf(d_g_arr / n_g_arr)

# MLE condition and initial guess
initial_guess = np.array([-2.85, -2.28, -1.73, 0.44, 0.44, 0.44])
bounds = [(-5, 5), (-5, 5), (-5, 5), (-1, 1), (-1, 1), (-1, 1)]

# Function to be minimized in weight parameter
# objective_function = lambda params: -calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, params, gamma_g)
objective_function = lambda params: -np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, 
                                                                params[3:6], 
                                                                 params[0:3]))

result = minimize(objective_function,
                  initial_guess,
                  method="Nelder-Mead",
                  bounds=bounds,
                  options={
                      'disp': True})
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")
print(result.message)

Optimization terminated successfully.
         Current function value: 15.696459
         Iterations: 376
         Function evaluations: 622
The optimal weight parameter is [-2.71832229 -2.27685227 -1.68524951  0.57457913  0.51126892  0.20935336]
Optimization terminated successfully.


In [21]:
d_g_arr, n_g_arr, initial_guess[3:6], initial_guess[0:3]

([73, 431, 2207],
 array([40000, 40000, 40000]),
 array([0.44, 0.44, 0.44]),
 array([-2.85, -2.28, -1.73]))

In [22]:
np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, initial_guess[3:6], initial_guess[0:3]))

-26.39238732015877

In [23]:
exact_values = np.array([-2.9, -2.3, -1.6, 0.45, 0.45, 0.45])

In [24]:
np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, exact_values[3:6], exact_values[0:3]))

-30.587448326033556