In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.optimize import minimize
from src.sucess_probability import p_g
from src.ML_estimation import multivariate_ml_estimation, calculate_my_likelihood_arr

In [2]:
# Load the data
grades_data = pd.read_csv('data/grades_hist_pd.csv', sep=';', index_col=0)

for col_name in grades_data.columns:
    grades_data[col_name] = grades_data[col_name] / 100
    
grades_cols = grades_data.columns

In [3]:
grades_data.head()

Unnamed: 0_level_0,AAA,AA,A,BBB,BB,B,CCC/C
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1981,0.0,0.0,0.0,0.0,0.0,0.0233,0.0
1982,0.0,0.0,0.0021,0.0035,0.0424,0.0318,0.2143
1983,0.0,0.0,0.0,0.0034,0.0116,0.047,0.0667
1984,0.0,0.0,0.0,0.0068,0.0114,0.0349,0.25
1985,0.0,0.0,0.0,0.0,0.015,0.0653,0.1538


In [4]:
# Assumption every grade has 10000 obligors
n_g_assumption = 10000

In [5]:
for col_name in grades_cols:
    grades_data[str(col_name) + " defaults"] = (n_g_assumption * grades_data[col_name]).astype(int)

In [6]:
grades_data

Unnamed: 0_level_0,AAA,AA,A,BBB,BB,B,CCC/C,AAA defaults,AA defaults,A defaults,BBB defaults,BB defaults,B defaults,CCC/C defaults
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1981,0.0,0.0,0.0,0.0,0.0,0.0233,0.0,0,0,0,0,0,233,0
1982,0.0,0.0,0.0021,0.0035,0.0424,0.0318,0.2143,0,0,21,34,424,318,2143
1983,0.0,0.0,0.0,0.0034,0.0116,0.047,0.0667,0,0,0,34,115,470,667
1984,0.0,0.0,0.0,0.0068,0.0114,0.0349,0.25,0,0,0,68,113,349,2500
1985,0.0,0.0,0.0,0.0,0.015,0.0653,0.1538,0,0,0,0,150,653,1538
1986,0.0,0.0,0.0018,0.0034,0.0133,0.0845,0.2308,0,0,18,34,133,844,2308
1987,0.0,0.0,0.0,0.0,0.0038,0.0313,0.1228,0,0,0,0,38,313,1228
1988,0.0,0.0,0.0,0.0,0.0105,0.0368,0.2037,0,0,0,0,105,368,2037
1989,0.0,0.0,0.0018,0.0061,0.0073,0.034,0.3333,0,0,18,60,73,340,3333
1990,0.0,0.0,0.0,0.0058,0.0357,0.0856,0.3125,0,0,0,57,356,856,3125


In [7]:
grades_data.iloc[:, -4:].sum(axis=0)

BBB defaults        774
BB defaults        3413
B defaults        16757
CCC/C defaults    99675
dtype: int64

In [8]:
# Choose last four columns and calculate the sum of defaults
d_g = grades_data.iloc[:, -4:].sum(axis=0).tolist()
n_g = [n_g_assumption * grades_data.shape[0]] * len(d_g)

In [9]:
d_g, n_g

([774, 3413, 16757, 99675], [400000, 400000, 400000, 400000])

In [10]:
# Make MLE estimation
w_init = 0.09
gamma_init = [-1] * len(d_g)
bounds = [(0, 1)] + [(-10, 10)] * len(d_g)
multivariate_ml_estimation(d_g, n_g, norm.pdf, p_g, w_init, gamma_init, bounds)

array([ 0.09, -1.  , -1.  , -1.  , -1.  ])

In [11]:
# Different gamma and same factor loading parameter

d_g_arr = np.array(d_g)
n_g_arr = np.array(n_g)
gamma_g = norm.ppf(d_g_arr / n_g_arr)

# MLE condition and initial guess
initial_guess = np.array([0.30])
bounds = [(-1, 1)]

# Function to be minimized in weight parameter
objective_function = lambda params: -np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, np.repeat(params, 4), gamma_g))


result = minimize(objective_function,
                  initial_guess,
                  method="Nelder-Mead",
                  bounds=bounds,
                  options={
                      'disp': True})
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")
print(result.message)

  objective_function = lambda params: -np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, np.repeat(params, 4), gamma_g))


Optimization terminated successfully.
         Current function value: 21.520225
         Iterations: 18
         Function evaluations: 36
The optimal weight parameter is [-8.32667268e-16]
Optimization terminated successfully.


In [12]:
gamma_g

array([-2.88856828, -2.38530428, -1.72913464, -0.67704879])

In [13]:
# Different gamma and same factor loading parameter

d_g_arr = np.array(d_g)
n_g_arr = np.array(n_g)
gamma_g = norm.ppf(d_g_arr / n_g_arr)

# MLE condition and initial guess
initial_guess = np.array([-2.85, -2.4, -1.7, -0.6, 0.25])
bounds = [(-5, 5), (-5, 5), (-5, 5), (-5, 5), (-1, 1)]

# Function to be minimized in weight parameter
objective_function = lambda params: -np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf, 
                                                                 np.repeat(params[4], 4), 
                                                                 params[0:4]))

result = minimize(objective_function,
                  initial_guess,
                  method="Nelder-Mead",
                  bounds=bounds,
                  options={
                      'disp': True})
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")
print(result.message)

  objective_function = lambda params: -np.log(calculate_my_likelihood_arr(d_g_arr, n_g_arr, p_g, norm.pdf,


Optimization terminated successfully.
         Current function value: 25.456491
         Iterations: 109
         Function evaluations: 186
The optimal weight parameter is [-2.79115976 -2.30320575 -1.66699612 -0.6468825   0.24471236]
Optimization terminated successfully.
