# Make confidence interval for the ML parameter estimation of the generated data

In [1]:
import pandas as pd
import numpy as np
from src.data_generator import generate_default_buckets
from scipy.stats import norm

In [2]:
# Fix seed for reproducibility
seed = 42
np.random.seed(seed)

In [3]:
factor_loading_list = [0.45, 0.45, 0.45]
pd_list = [0.0015, 0.0100, 0.0500]
gamma_list = norm.ppf(pd_list)
num_of_obligors_list = [400, 250, 100]

# Make a pandas dataframe with Grade, Factor Loading, PD, Gamma, Num of Obligors
df = pd.DataFrame({'Grade': ['A', 'B', 'C'], 'Factor Loading': factor_loading_list, 'PD': pd_list, 'Gamma': gamma_list, 'Num of Obligors': num_of_obligors_list})
df

Unnamed: 0,Grade,Factor Loading,PD,Gamma,Num of Obligors
0,A,0.45,0.0015,-2.967738,400
1,B,0.45,0.01,-2.326348,250
2,C,0.45,0.05,-1.644854,100


In [4]:
length_of_time_series = 160
default_list = generate_default_buckets(factor_loading_list,num_of_obligors_list, gamma_list, time_points=length_of_time_series)
print("Number of defaults: ", default_list)
num_of_obligors_over_time = [x * length_of_time_series for x in num_of_obligors_list]
print("Number of obligors over time: ", num_of_obligors_over_time)

Number of defaults:  [110, 374, 771]
Number of obligors over time:  [64000, 40000, 16000]


In [5]:
# Empirical PD
empirical_pd_list = [x/y for x, y in zip(default_list, num_of_obligors_over_time)]
print("Empirical PD: ", empirical_pd_list)

Empirical PD:  [0.00171875, 0.00935, 0.0481875]


In [6]:
gamma_list = [-2.9, -2.3, -1.6]
norm.cdf(gamma_list)

array([0.00186581, 0.01072411, 0.05479929])

In [7]:
from src.ML_estimation import calculate_my_likelihood_arr
from src.sucess_probability import p_g
from scipy.optimize import minimize

def parameter_estimation(default_list, num_of_obligors_over_time, factor_loading_init, gamma_list_init):
    initial_guess = gamma_list_init + factor_loading_init
    
    num_of_gamma = len(gamma_list_init)
    num_of_factor_loading = len(factor_loading_init)
    # bound = num_of_gamma * (-5, 5) + num_of_factor_loading * (-1, 1)
    bounds = num_of_gamma * [(-5, 5)] + num_of_factor_loading * [(-1, 1)]
    # Optimization
    objective_function = lambda params: -np.log(calculate_my_likelihood_arr(
        default_list, num_of_obligors_over_time, p_g, norm.pdf, np.repeat(params[3], 3), params[0:3]
    ))
    
    result = minimize(objective_function,
                  initial_guess,
                  method="Nelder-Mead",
                  bounds=bounds,
                  options={
                      'disp': True})
    
    return result

In [12]:
# Test the parameter estimation
factor_loading_init = [0.40]
gamma_list_init = [-2.9, -2.2, -1.6]
result = parameter_estimation(default_list, num_of_obligors_over_time, factor_loading_init, gamma_list_init)

Optimization terminated successfully.
         Current function value: 11.366573
         Iterations: 217
         Function evaluations: 366


In [13]:
result.x

array([-2.92566941e+00, -2.35144527e+00, -1.66264753e+00,  2.07984790e-06])

In [9]:
# Confidence interval for the ML parameter estimation
list_of_len_ts = [20, 30, 80, 160]
num_of_simulations = 100

# Create a dictionary to store the results
# make the keys of the dictionary the length of the time series
results = {key: [] for key in list_of_len_ts}

In [10]:
# To-do: make a module for optimization wich gives back 2 list of parameters: one for the factor_loading and one for the gamma
# Then use these parameters to make the confidence interval, dict['len_ts']['grade']['factor_loading'] = list of factor_loadings
# Make it with MM and 3 type ML