In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.integrate import quad
from scipy.stats import norm, binom
from scipy.optimize import minimize

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
df = pd.read_csv('SP_historical_PD_data.csv', sep=';')

In [3]:
df.shape

(40, 8)

In [4]:
df.head()

Unnamed: 0,Year,Total defaults*,Investment-grade defaults,Speculative-grade defaults,Default rate (%),Investment-grade default rate (%),Speculative-grade default rate (%),Total debt outstanding (bil. $)
0,1981,2,0,2,0.15,0.0,0.63,0.06
1,1982,18,2,15,1.22,0.19,4.46,0.9
2,1983,12,1,10,0.77,0.09,2.98,0.37
3,1984,14,2,12,0.93,0.17,3.31,0.36
4,1985,19,0,18,1.13,0.0,4.37,0.31


In [5]:
# Calculate the number of obligors
df['num_of_inv_grades'] = (df['Investment-grade defaults'] / (df['Investment-grade default rate (%)']/100)).round()
df['num_of_spec_grades'] = (df['Speculative-grade defaults'] / (df['Speculative-grade default rate (%)']/100)).round().astype(int)
df['num_of_total_grades'] = (df['Total defaults*'] / (df['Default rate (%)']/100)).round().astype(int)

# Fill-out the missing values in num_of_inv_grades column with the difference between num_of_total_grades and num_of_spec_grades
df['num_of_inv_grades'] = np.where(df['num_of_inv_grades'].isna(), df['num_of_total_grades'] - df['num_of_spec_grades'], df['num_of_inv_grades']).astype(int)

In [6]:
def p_g(X_g, w_g, gamma_g):
    """
    Calculates the success probability of a Bernoulli trial.

    Parameters:
        X_g (float): Input value.
        w_g (float): Parameter 'w_g'.
        gamma_g (float): Parameter 'gamma_g'.

    Returns:
        float: Result of p_g(X_g).
    """
    modified_result = norm.cdf((gamma_g - w_g * X_g) / np.sqrt(1 - w_g**2))

    return modified_result

In [7]:
def calculate_my_likelihood(d_g, n_g, p_g, prob_dens_func, w_g, gamma_g):
    """
    Numerically calculates the value of L(d_g) based on the given formula.

    Parameters:
        d_g (int): Value of d_g.
        n_g (int): Value of n_g.
        p_g (callable): The p_g function representing the probability density function.
        prob_dens_func (callable): The pdf_g function representing the probability density function.
        w_g (float): Parameter 'w_g'.
        gamma_g (float): Parameter 'gamma_g'.

    Returns:
        float: Numerical approximation of the integral.
    """
    integrand = lambda x: binom.pmf(d_g, n_g, p_g(x, w_g, gamma_g)) * (p_g(x, w_g, gamma_g) ** d_g) * ((1 - p_g(x, w_g, gamma_g)) ** (n_g - d_g)) * prob_dens_func(x)
    
    result, _ = quad(integrand, -np.inf, np.inf)
    
    return result

In [8]:
# An example for the calculation of the likelihood
test_d_g = 5
test_n_g = 100
prob_dens_func = norm.pdf
test_w_g = 0.2
test_gamma_g = -1.6449

L_result = calculate_my_likelihood(test_d_g, test_n_g, p_g, prob_dens_func, test_w_g, test_gamma_g)
print(f"The value of L(d) for d = {test_d_g} and n = {test_n_g} is {L_result}")

The value of L(d) for d = 5 and n = 100 is 2.5312559717345424e-10


In [9]:
def calculate_likelihood_ts(d_g, n_g, p_g, pdf_g, w_g, gamma_g):
    """
    Numerically calculates the time series value of L(d_g) based on the given formula by multiply for each date.

    Parameters:
        d_g (pd.Series): Time series for d_g.
        n_g (pd.Series): Time series for n_g.
        p_g (callable): The p_g function representing the probability density function.
        pdf_g (callable): The pdf_g function representing the probability density function.
        w_g (float): Parameter 'w_g'.
        gamma_g (float): Parameter 'gamma_g'.

    Returns:
        float: Numerical approximation of the integral.
    """
    integrand = lambda x: np.prod(binom.pmf(d_g, n_g, p_g(x, w_g, gamma_g)) * (p_g(x, w_g, gamma_g) ** d_g) * ((1 - p_g(x, w_g, gamma_g)) ** (n_g - d_g)) * pdf_g(x))
    
    result, _ = quad(integrand, -np.inf, np.inf)
    
    return result

In [10]:
# Test the calculate_likelihood_ts function
test_d_series = pd.Series([5, 8, 12, 4, 9])
test_n_series = pd.Series([100, 194, 199, 113, 97])
pd_list = test_d_series / test_n_series
test_ttc_pd = pd_list.mean()
test_gamma = stats.norm.ppf(test_ttc_pd)

test_weight = 0.2

L_result = calculate_likelihood_ts(test_d_series, test_n_series, p_g, norm.pdf, test_w_g, test_gamma)
print(f"The value of L(d) for d = {test_d_series.tolist()} and n = {test_n_series.tolist()} is {L_result}")

The value of L(d) for d = [5, 8, 12, 4, 9] and n = [100, 194, 199, 113, 97] is 3.9149108105755717e-72


In [11]:
weight_list = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]

for weight in weight_list:
    L_result = calculate_likelihood_ts(test_d_series, test_n_series, p_g, norm.pdf, weight, test_gamma)
    print(f"The value of L(d) is {L_result} with weight {weight}")

The value of L(d) is 5.876929937113723e-72 with weight 0.1
The value of L(d) is 4.822793434223152e-72 with weight 0.15
The value of L(d) is 3.9149108105755717e-72 with weight 0.2
The value of L(d) is 3.1795651147844954e-72 with weight 0.25
The value of L(d) is 2.5879515799771117e-72 with weight 0.3
The value of L(d) is 2.1069567576758481e-72 with weight 0.35
The value of L(d) is 1.7104105901804183e-72 with weight 0.4
The value of L(d) is 1.3794259309641432e-72 with weight 0.45


In [12]:
# Test the calculate_likelihood_ts function
test_d_series = pd.Series([5, 8, 12, 4, 9])
test_n_series = pd.Series([100, 194, 199, 113, 97])

# MLE condition and initial guess
w_g_initial = 0.23
gamma_g_initial = 1.23

# Function to be minimized in weight parameter
objective_function = lambda params: -calculate_likelihood_ts(test_d_series, test_n_series, p_g, norm.pdf, params[0], params[1])

initial_guess = [w_g_initial, gamma_g_initial]
# Bounds for the weight parameter
bounds = [(0, 1), (-np.inf, np.inf)]
result = minimize(objective_function, initial_guess, method='Nelder-Mead', bounds=bounds)
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight[0]}")
print(f"The optimal gamma parameter is {optimal_weight[1]}")

The optimal weight parameter is 5.368813615538475e-05
The optimal gamma parameter is -1.606742591320881


In [13]:
# Test the calculate_likelihood_ts function
test_d_series = pd.Series([5, 8, 12, 4, 9, 10, 15, 20])
test_n_series = pd.Series([100, 194, 199, 113, 97, 140, 210, 303])

result = minimize(objective_function, initial_guess, method='Nelder-Mead', bounds=bounds)
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")
print(f"The optimal gamma parameter is {optimal_weight[1]}")

The optimal weight parameter is [0.23 1.23]
The optimal gamma parameter is 1.23


# MLE example for a slice of the historical time series

In [14]:
# MLE example
short_d_g = df['Total defaults*'].iloc[10:15]
short_n_g = df['num_of_total_grades'].iloc[10:15]
gamma = stats.norm.ppf(df['Default rate (%)'].iloc[10:15].mean() / 100)

# MLE condition and initial guess
initial_guess = [0.23]
bounds = [(0, 1)]

# Function to be minimized in weight parameter
objective_function = lambda params: -calculate_likelihood_ts(short_d_g, short_n_g, p_g, norm.pdf, params[0], gamma)

result = minimize(objective_function, initial_guess, method='Nelder-Mead', bounds=bounds)
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")

The optimal weight parameter is [0.23]


In [15]:
# MLE example
short_d_g = df['Total defaults*'].iloc[10:15]
short_n_g = df['num_of_total_grades'].iloc[10:15]
gamma = stats.norm.ppf(df['Default rate (%)'].iloc[10:15].mean() / 100)

# MLE condition and initial guess
w_g_initial = 0.23
gamma_g_initial = 1.23

# Function to be minimized in weight parameter
objective_function = lambda params: -calculate_likelihood_ts(short_d_g, short_n_g, p_g, norm.pdf, params[0], params[1])

initial_guess = [w_g_initial, gamma_g_initial]
# Bounds for the weight parameter
bounds = [(0, 1), (-np.inf, np.inf)]
result = minimize(objective_function, initial_guess, method='Nelder-Mead', bounds=bounds)
# Method can be Nelder-Mead or Powel


# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")

The optimal weight parameter is [0.23 1.23]


### Value of objective function not changing with the weight parameter, so the MLE is not working properly
### Almost all the time the likelihood function is 0, so the MLE is not working properly.

# MLE for the whole historical time series

In [16]:
TTC_PD = df['Default rate (%)'].mean() / 100
print(f"The TTC PD is {TTC_PD}")

The TTC PD is 0.014767500000000003


In [17]:
d_g = df['Total defaults*']
n_g = df['num_of_total_grades']
gamma_g = stats.norm.ppf(df['Default rate (%)'].mean() / 100)

# MLE condition and initial guess
initial_guess = 0.23
bounds = [(0, 1)]

# Function to be minimized in weight parameter
objective_function = lambda params: -calculate_likelihood_ts(d_g, n_g, p_g, norm.pdf, params[0], gamma_g)

result = minimize(objective_function, initial_guess, method='Nelder-Mead', bounds=bounds)
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_weight = result.x
print(f"The optimal weight parameter is {optimal_weight}")

The optimal weight parameter is [0.23]


Value of objective function not changing with the weight parameter, so the MLE is not working properly

# We can aggregate the $d_{g}$ and $n_{g}$ values because the likelihood function is additive in these parameters

In [18]:
d_g_sum = df['Total defaults*'].sum()
n_g_sum = df['num_of_total_grades'].sum()

print(f"The sum of d_g is {d_g_sum}")
print(f"The sum of n_g is {n_g_sum}")

The sum of d_g is 3098
The sum of n_g is 202644


In [19]:
d_g_first3 = df['Total defaults*'][:3].sum()
n_g_first3 = df['num_of_total_grades'][:3].sum()

# Function to be minimized in weight parameter
objective_function = lambda params: -calculate_likelihood_ts(d_g_first3, n_g_first3, p_g, norm.pdf, params[0], params[1])

w_g_initial = 0.17
gamma_g_initial = -1.43
initial_guess = [w_g_initial, gamma_g_initial]
# Bounds for the weight parameter
bounds = [(0, 1), (-np.inf, np.inf)]
result = minimize(objective_function, initial_guess, method='Nelder-Mead', bounds=bounds)
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_parameters = result.x
print(f"The optimal weight parameter is {optimal_parameters[0]}")
print(f"The optimal gamma parameter is {optimal_parameters[1]}")

The optimal weight parameter is 1.8450109169628783e-05
The optimal gamma parameter is -2.4407377901099583


In [20]:
d_g = df['Total defaults*'].sum()
n_g = df['num_of_total_grades'].sum()

# Function to be minimized in weight parameter
objective_function = lambda params: -calculate_likelihood_ts(d_g, n_g, p_g, norm.pdf, params[0], params[1])

w_g_initial = 0.17
gamma_g_initial = -1.43
initial_guess = [w_g_initial, gamma_g_initial]
# Bounds for the weight parameter
bounds = [(0, 1), (-np.inf, np.inf)]
result = minimize(objective_function, initial_guess, method='Nelder-Mead', bounds=bounds)
# Method can be Nelder-Mead or Powell

# The optimal weight parameter
optimal_parameters = result.x
print(f"The optimal weight parameter is {optimal_parameters[0]}")
print(f"The optimal gamma parameter is {optimal_parameters[1]}")

The optimal weight parameter is 0.17
The optimal gamma parameter is -1.43


## The MLE is not working properly, the likelihood function is very close to zero if $k=3000$, $n=200,000$.

In [21]:
calculate_my_likelihood(31, 2078, p_g, norm.pdf, 0.1, -1.2)

8.761438311240149e-89

In [22]:
calculate_my_likelihood(307, 20005, p_g, norm.pdf, 0.1, -1.2)

0.0

In [23]:
calculate_my_likelihood(3098, 202644, p_g, norm.pdf, 0.1, -1.2)

0.0