# Libraries and Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir("/content/drive/MyDrive/CI for PD")

In [3]:
!pip install -qU numba

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from scipy import linalg as la
import datetime as dt
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm, trange
from numba import jit, njit
try:
    import multiprocess
except:
    import multiprocessing as multiprocess
import warnings
warnings.filterwarnings('ignore')

from numba.experimental import jitclass
from numba import  types, typed, typeof

In [5]:
%load_ext cython

In [6]:
multiprocess.cpu_count()

2

# Reading data

In [7]:

MappingData = pd.read_csv("ratings_mapping.csv")
MappingData.head()

Unnamed: 0,RatingSymbol,RatingNumber
0,AAA,0
1,AA+,1
2,AA,2
3,AA-,3
4,A+,4


In [8]:
TransitionData = pd.read_csv("issuer_credit_ratings_cleaned_local_currency_lt.csv")
TransitionData.head()

Unnamed: 0,rating,rating_action_date,obligor_name,rating_outlook
0,B+,2014-09-15,1011778 B.C. Unlimited Liability Company,Stable
1,B+,2015-06-08,1011778 B.C. Unlimited Liability Company,Stable
2,B+,2016-06-29,1011778 B.C. Unlimited Liability Company,Stable
3,B+,2017-02-14,1011778 B.C. Unlimited Liability Company,Stable
4,B+,2017-02-24,1011778 B.C. Unlimited Liability Company,Stable


In [9]:
TransitionData = TransitionData[["obligor_name", "rating_action_date", "rating"]]
TransitionData.columns = ["Id", "Date", "RatingSymbol"]
TransitionData = TransitionData.sort_values(by=["Id", "Date"])
le = LabelEncoder()
TransitionData.Id = le.fit_transform(TransitionData.Id)

In [10]:
TransitionData.Date = pd.to_datetime(TransitionData.Date)

In [11]:
TransitionData = pd.merge(TransitionData, MappingData, on='RatingSymbol', how='left')
TransitionData.head()

Unnamed: 0,Id,Date,RatingSymbol,RatingNumber
0,0,2014-09-15,B+,13
1,0,2015-06-08,B+,13
2,0,2016-06-29,B+,13
3,0,2017-02-14,B+,13
4,0,2017-02-24,B+,13


### Hazard

In [12]:
def mask_first(x):
  # https://stackoverflow.com/questions/31226142/python-pandas-delete-the-first-row-by-group
  result = np.ones_like(x)
  result[0] = 0
  return result

In [13]:
# https://github.com/dpicone1/Estimating_Credit_Rating_Transition_Matrices_Hazard_Vs_Cohort/blob/master/TransitionClassFile.py
# the default rating symbol is D
# the default rating index  is 20

# the NR rating symbol is NR
# the NR rating index  is 21

# There are 21 initial states AAA, AA+, AA, AA-, A+, A, A-, BBB+, BBB, BBB-, BB+,
#                            BB, BB-, B+, B, B-, CCC+, CCC, CCC-, CC, D
# and 22 final states         AAA, AA+, AA, AA-, A+, A, A-, BBB+, BBB, BBB-, BB+,
#                            BB, BB-, B+, B, B-, CCC+, CCC, CCC-, CC, D, NR


def create_year_data(df, year):
    year_df = df[df.Date.dt.year.isin(year)]
    year_df = year_df.reset_index(drop=True)

    companies_whose_first_rating_is_nr = year_df.groupby("Id")["RatingSymbol"].first()[year_df.groupby("Id")["RatingSymbol"].first()=="NR"].index.tolist()
    year_df_nr_first = year_df[year_df.Id.isin(companies_whose_first_rating_is_nr)]
    mask = year_df_nr_first.groupby(['Id'])['Id'].transform(mask_first).astype(bool)
    year_df_nr_first_cleaned = year_df_nr_first.loc[mask]
    year_df = pd.concat([
        year_df[~year_df.Id.isin(companies_whose_first_rating_is_nr)],
        year_df_nr_first_cleaned
    ])

    # le = LabelEncoder()
    # year_df['Id'] = le.fit_transform(year_df['Id'])

    return year_df

In [14]:
dates = np.array([dt.datetime.fromtimestamp(
            dt.datetime.timestamp(x)) for x in TransitionData.values[:, 1]],
                                                 dtype='datetime64')

In [15]:
years = np.array([x.year for x in TransitionData.Date])
arg1 = TransitionData.values[:, 0]
arg2 = TransitionData.values[:, 1]
arg3 = TransitionData.values[:, 2]
arg4 = TransitionData.values[:, 3]

In [17]:
# @jitclass([('MyRatingsIdArray', typeof(arg1)),
#            ('MyRatingsDateArray', typeof(arg2)),
#            ('MyRatingsValuesArray', typeof(arg3)),
#            ('MyRatingsNumberArray', typeof(arg4)),
#            ('MyRatingsTimeDiffArray', typeof(dates)),
#             ('MyRatingsYearArray', typeof(years)),
#              ('defautYes', typeof(True)),
#            ('NRYes', typeof(True)), ('size', types.int64), ('yend', types.int64),
#             ('ybeg', types.int64), ('TransMatrix', typeof(np.zeros([21, 22]))),
#             ('TransDen', typeof(np.zeros([21]))),
#             ('TransDenLambda', typeof(np.zeros([21]))), ('DefaultYesHazard', typeof(True))])
# class TransitionClass(object):
    # def __init__(self, RatingsIdArray, RatingsDateArray, RatingsValuesArray, RatingsNumberArray, yend):

    #     #RatingsArray is an array containing the info to be processed
    #     #yend  is final year of the analysis
    #     self.MyRatingsIdArray = RatingsIdArray

    #     self.MyRatingsDateArray = RatingsDateArray

    #     self.MyRatingsValuesArray = RatingsValuesArray

    #     self.MyRatingsNumberArray = RatingsNumberArray

    #     self.MyRatingsTimeDiffArray = np.array([0] + [int(np.timedelta64(x, "D")/ np.timedelta64(1, 'D'))\
    #                                              for x in -(self.MyRatingsDateArray[:-1]-self.MyRatingsDateArray[1:])])

    #     self.MyRatingsYearArray = np.array([x.year for x in self.MyRatingsDateArray])

    #     self.defautYes = False #The general status is that the loan/bond has not defaulted for the cohort model
    #     self.NRYes     = False #The general status is that the loan/bond is not NR for the cohort model

    #     self.DefaultedCheck()# check if the data contains a default
    #     self.NRCheck()       # check if the data contains a NR

    #     self.size      = len(self.MyRatingsValuesArray)
    #     self.yend      = yend
    #     self.ybeg      = self.MyRatingsYearArray[0]


    # def DefaultedCheck(self):
    #     # Is there a default?
    #     self.defaulted     = self.MyRatingsValuesArray[self.MyRatingsValuesArray == "D"]
    #     self.defaultedSize = len(self.defaulted)

    #     if (self.defaultedSize > 0):

    #         # remove all the data after the obligor has defaulted
    #         DefaultDate = self.MyRatingsDateArray[self.MyRatingsValuesArray == 'D']
    #         date = DefaultDate[0]
    #         self.MyRatingsIdArray = self.MyRatingsIdArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsValuesArray = self.MyRatingsValuesArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsNumberArray = self.MyRatingsNumberArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsTimeDiffArray = self.MyRatingsTimeDiffArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsYearArray = self.MyRatingsYearArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsDateArray = self.MyRatingsDateArray[self.MyRatingsDateArray <= date] # remove all the data after the default


    #         # After removing all addtional data after defaut,
    #         # we should have only one defaulted record, the first one
    #         self.defaulted     = self.MyRatingsValuesArray[self.MyRatingsValuesArray == 'D']
    #         self.defaultedSize = len(self.defaulted)
    #         self.defaultyear = self.MyRatingsYearArray[self.MyRatingsValuesArray == 'D'][0]
    #         self.defaultYes  = True


    # def NRCheck(self):
    #     # Is there a NR without a previous Default?
    #     self.NRSize = 0
    #     self.NR     = self.MyRatingsValuesArray[self.MyRatingsValuesArray == 'NR']

    #     if (len(self.NR) > 0 & self.defaultedSize == 1): # Remove the NR record. We do not need it as we stop
    #         # as soon as the borrower defaults
    #         self.MyRatingsIdArray = self.MyRatingsIdArray[self.MyRatingsValuesArray != "NR"] # remove all the data after the default
    #         self.MyRatingsValuesArray = self.MyRatingsValuesArray[self.MyRatingsValuesArray != "NR"] # remove all the data after the default
    #         self.MyRatingsNumberArray = self.MyRatingsNumberArray[self.MyRatingsValuesArray != "NR"] # remove all the data after the default
    #         self.MyRatingsTimeDiffArray = self.MyRatingsTimeDiffArray[self.MyRatingsValuesArray != "NR"] # remove all the data after the default
    #         self.MyRatingsYearArray = self.MyRatingsYearArray[self.MyRatingsValuesArray != "NR"] # remove all the data after the default
    #         self.MyRatingsDateArray = self.MyRatingsDateArray[self.MyRatingsValuesArray != "NR"] # remove all the data after the default

    #     if (len(self.NR) > 0 & self.defaultedSize == 0): # only if there is no default, otherwise the default will
    #         # overseed and stop the algo as soon as there is a default event

    #         # The obligor might have several NR or after an NR status might come a rating from AAA to C.
    #         # We remove all the data after the first NR.

    #         NRDate = self.MyRatingsDateArray[self.MyRatingsValuesArray == 'NR']
    #         date = NRDate[0]
    #         self.MyRatingsIdArray = self.MyRatingsIdArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsValuesArray = self.MyRatingsValuesArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsNumberArray = self.MyRatingsNumberArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsTimeDiffArray = self.MyRatingsTimeDiffArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsYearArray = self.MyRatingsYearArray[self.MyRatingsDateArray <= date] # remove all the data after the default
    #         self.MyRatingsDateArray = self.MyRatingsDateArray[self.MyRatingsDateArray <= date] # remove all the data after the default


    #         # After removing all addtional NR, we should have only one NR record, the oldest one
    #         self.NR     = self.MyRatingsValuesArray[self.MyRatingsValuesArray == 'NR']
    #         self.NRSize = len(self.NR)
    #         self.NRyear = self.MyRatingsDateArray[self.MyRatingsValuesArray == 'NR'][0].year
    #         self.NRYes  = True


    # def Cohort(self):

    #     self.RatingsBeg = [self.MyRatingsYearArray[0]] # containing the rating symbol at the beginning of the period
    #     self.RatingsEnd = []                                 # containing the rating symbol at the end of the period

    #     self.RatingsBegIndex = [self.MyRatingsNumberArray[0]]# containing the rating index at the beginning of the period
    #     self.RatingsEndIndex = []                                # containing the rating index at the end of the period

    #     if   (self.defaultedSize == 1):
    #         finalYear = min(self.yend, self.defaultyear)
    #     elif (self.NRSize > 0):
    #         finalYear = min(self.yend, self.NRyear)
    #     else:
    #         finalYear = self.yend

    #     for i in range(self.ybeg, finalYear + 1):

    #         self.MyRatingsDateArrayLoop = self.MyRatingsDateArray[self.MyRatingsYearArray == i]
    #         self.MyRatingsValuesArrayLoop = self.MyRatingsValuesArray[self.MyRatingsYearArray == i]
    #         self.MyRatingsNumberArrayLoop = self.MyRatingsNumberArray[self.MyRatingsYearArray == i]

    #         if(len(self.MyRatingsDateArrayLoop) > 0):
    #             dateCond         = max(self.MyRatingsDateArrayLoop)
    #             self.MyRatingsValuesArrayLoop2 = self.MyRatingsValuesArrayLoop[self.MyRatingsDateArrayLoop == dateCond]
    #             self.MyRatingsNumberArrayLoop2 = self.MyRatingsNumberArrayLoop[self.MyRatingsDateArrayLoop == dateCond]
    #             self.RatingsEnd.append(self.MyRatingsValuesArrayLoop2[0])
    #             self.RatingsEndIndex.append(self.MyRatingsNumberArrayLoop2[0])

    #         else:
    #             self.RatingsEnd.append(self.MyRatingsValuesArrayLoop2[0])
    #             self.RatingsEndIndex.append(self.MyRatingsNumberArrayLoop2[0])

    #         self.RatingsBeg.append(self.MyRatingsValuesArrayLoop2[0])
    #         self.RatingsBegIndex.append(self.MyRatingsNumberArrayLoop2[0])

    #         if(self.RatingsEnd[-1] == 'D' or self.RatingsEnd[-1] == 'NR' ):
    #             break

    #     if(self.defautYes):
    #         self.RatingsEnd[-1]      = 'D'
    #         self.RatingsEndIndex[-1] = 20

    #     if(self.NRYes):
    #         self.RatingsEnd[-1]      = 'NR'
    #         self.RatingsEndIndex[-1] = 21

    #     self.RatingsBeg      = self.RatingsBeg[:-1]
    #     self.RatingsBegIndex = self.RatingsBegIndex[:-1]


    # def CohortTransitionMatrix(self):
    #     self.TransMatrix = np.zeros([21, 22])
    #     self.TransDen    = np.zeros([21])

    #     for i in range(len(self.RatingsEndIndex)):
    #         self.TransMatrix[self.RatingsBegIndex[i], self.RatingsEndIndex[i]] +=1
    #         self.TransDen[self.RatingsBegIndex[i]] +=1


    # def HazardModel(self):

    #     self.TransDenLambda  = np.zeros([21]) # containing the denominator Hazard Model

    #     self.DefaultYesHazard = False

    #     for i in range(1, self.size):

    #         valore = self.MyRatingsTimeDiffArray[i]/365
    #         self.TransDenLambda[self.MyRatingsNumberArray[i - 1]] += valore

    #         if (self.MyRatingsValuesArray[i] == 'D' or self.MyRatingsValuesArray[i] == 'NR'):
    #             self.DefaultYesHazard = True
    #             break

    #     #first period
    #     dbeg = dt.datetime(self.MyRatingsYearArray[0], 1, 1)
    #     valoreBeg = (np.timedelta64(self.MyRatingsDateArray[0] - dbeg, "D")/ np.timedelta64(1, 'D'))/365.0
    #     self.TransDenLambda[self.MyRatingsNumberArray[0]] += valoreBeg

    #     # Last Period Analysis
    #     dfinal = dt.datetime(self.yend, 12, 31)
    #     if(self.DefaultYesHazard == False):
    #         valoreEnd = np.timedelta64(dfinal - self.MyRatingsDateArray[self.size - 1], "D")/ np.timedelta64(1, 'D')# (dfinal - self.MyRatingsArray[self.size - 1, 1])/365.0
    #         self.TransDenLambda[self.MyRatingsNumberArray[self.size - 1]] += valoreEnd



In [18]:
 np.hstack((TransitionData.values, np.zeros((len(TransitionData.values), 2))))

array([[0, Timestamp('2014-09-15 00:00:00'), 'B+', 13, 0.0, 0.0],
       [0, Timestamp('2015-06-08 00:00:00'), 'B+', 13, 0.0, 0.0],
       [0, Timestamp('2016-06-29 00:00:00'), 'B+', 13, 0.0, 0.0],
       ...,
       [8526, Timestamp('2014-08-18 00:00:00'), 'BB', 11, 0.0, 0.0],
       [8526, Timestamp('2014-11-05 00:00:00'), 'BB-', 12, 0.0, 0.0],
       [8526, Timestamp('2015-02-17 00:00:00'), 'NR', 21, 0.0, 0.0]],
      dtype=object)

In [19]:
class TransitionClass(object):
    def __init__(self, RatingsArray, yend):

        #RatingsArray is an array containing the info to be processed
        #yend  is final year of the analysis
        self.MyRatingsArray = np.hstack((RatingsArray, np.zeros((len(RatingsArray), 2))))

        self.MyRatingsArray[:, 4] = ["nan"] + [int(np.timedelta64(x, "D")/ np.timedelta64(1, 'D'))\
                                                 for x in -(self.MyRatingsArray[:, 1][:-1]-self.MyRatingsArray[:, 1][1:])]

        self.MyRatingsArray[:, 5] = np.array([x.year for x in self.MyRatingsArray[:,1]])

        self.defautYes = False #The general status is that the loan/bond has not defaulted for the cohort model
        self.NRYes     = False #The general status is that the loan/bond is not NR for the cohort model

        self.DefaultedCheck()# check if the data contains a default
        self.NRCheck()       # check if the data contains a NR

        self.size      = len(self.MyRatingsArray)
        self.yend      = yend
        self.ybeg      = self.MyRatingsArray[0, 5]


    def DefaultedCheck(self):
        # Is there a default?
        self.defaulted     = self.MyRatingsArray[self.MyRatingsArray[:, 2] == "D"]
        self.defaultedSize = len(self.defaulted)

        if (self.defaultedSize > 0):

            # remove all the data after the obligor has defaulted
            DefaultDate = self.MyRatingsArray[:, 1][self.MyRatingsArray[:, 2] == 'D']
            date = DefaultDate[0]
            self.MyRatingsArray = self.MyRatingsArray[self.MyRatingsArray[:, 1] <= date] # remove all the data after the default

            # After removing all addtional data after defaut,
            # we should have only one defaulted record, the first one
            self.defaulted     = self.MyRatingsArray[self.MyRatingsArray[:, 2] == 'D']
            self.defaultedSize = len(self.defaulted)
            self.defaultyear = self.MyRatingsArray[:, 5][self.MyRatingsArray[:, 2] == 'D'][0]
            self.defaultYes  = True


    def NRCheck(self):
        # Is there a NR without a previous Default?
        self.NRSize = 0
        self.NR     = self.MyRatingsArray[self.MyRatingsArray[:, 2] == 'NR']

        if (len(self.NR) > 0 & self.defaultedSize == 1): # Remove the NR record. We do not need it as we stop
            # as soon as the borrower defaults
            self.MyRatingsArray = self.MyRatingsArray[self.MyRatingsArray[:, 2] != "NR"] # remove all the data after the default

        if (len(self.NR) > 0 & self.defaultedSize == 0): # only if there is no default, otherwise the default will
            # overseed and stop the algo as soon as there is a default event

            # The obligor might have several NR or after an NR status might come a rating from AAA to C.
            # We remove all the data after the first NR.

            NRDate = self.MyRatingsArray[self.MyRatingsArray[:, 2] == 'NR']
            date = NRDate[0, 1]


            # After removing all addtional NR, we should have only one NR record, the oldest one
            self.NR     = self.MyRatingsArray[self.MyRatingsArray[:, 2] == 'NR']
            self.NRSize = len(self.NR)
            self.NRyear = self.NR[0, 5]
            self.NRYes  = True


    def Cohort(self):

        self.RatingsBeg = [self.MyRatingsArray[0, 2]] # containing the rating symbol at the beginning of the period
        self.RatingsEnd = []                                 # containing the rating symbol at the end of the period

        self.RatingsBegIndex = [self.MyRatingsArray[0, 3]]# containing the rating index at the beginning of the period
        self.RatingsEndIndex = []                                # containing the rating index at the end of the period

        if   (self.defaultedSize == 1):
            finalYear = min(self.yend, self.defaultyear)
        elif (self.NRSize > 0):
            finalYear = min(self.yend, self.NRyear)
        else:
            finalYear = self.yend

        for i in range(self.ybeg, finalYear + 1):

            self.MyRatingsArrayLoop = self.MyRatingsArray[self.MyRatingsArray[:, 5] == i]

            if(len(self.MyRatingsArrayLoop) > 0):
                dateCond         = max(self.MyRatingsArray[:, 1])
                self.MyRatingsArrayLoop2 = self.MyRatingsArrayLoop[self.MyRatingsArrayLoop[:, 1] == dateCond]
                self.RatingsEnd.append(self.MyRatingsArrayLoop2[0, 2])
                self.RatingsEndIndex.append(self.MyRatingsArrayLoop2[0, 3])

            else:
                self.RatingsEnd.append(self.MyRatingsArrayLoop2[0, 2])
                self.RatingsEndIndex.append(self.MyRatingsArrayLoop2[0, 3])

            self.RatingsBeg.append(self.MyRatingsArrayLoop2[0, 2])
            self.RatingsBegIndex.append(self.MyRatingsArrayLoop2[0, 3])

            if(self.RatingsEnd[-1] == 'D' or self.RatingsEnd[-1] == 'NR' ):
                break

        if(self.defautYes):
            self.RatingsEnd[-1]      = 'D'
            self.RatingsEndIndex[-1] = 20

        if(self.NRYes):
            self.RatingsEnd[-1]      = 'NR'
            self.RatingsEndIndex[-1] = 21

        self.RatingsBeg      = self.RatingsBeg[:-1]
        self.RatingsBegIndex = self.RatingsBegIndex[:-1]


    def CohortTransitionMatrix(self):
        self.TransMatrix = np.zeros([21, 22])
        self.TransDen    = np.zeros([21])

        for i in range(len(self.RatingsEndIndex)):
            self.TransMatrix[self.RatingsBegIndex[i], self.RatingsEndIndex[i]] +=1
            self.TransDen[self.RatingsBegIndex[i]] +=1


    def HazardModel(self):

        self.TransDenLambda  = np.zeros([21]) # containing the denominator Hazard Model

        self.DefaultYesHazard = False

        for i in range(1, self.size):

            valore = self.MyRatingsArray[i, 4]/365
            self.TransDenLambda[self.MyRatingsArray[i - 1]] += valore

            if (self.MyRatingsArray[i, 2] == 'D' or self.MyRatingsArray[i, 2] == 'NR'):
                self.DefaultYesHazard = True
                break

        #first period
        dbeg = dt.datetime(self.MyRatingsArray[0, 4], 1, 1)
        valoreBeg = (np.timedelta64(self.MyRatingsArray[0, 1] - dbeg, "D")/ np.timedelta64(1, 'D'))/365.0
        self.TransDenLambda[self.MyRatingsArray[0, 3]] += valoreBeg

        # Last Period Analysis
        dfinal = dt.datetime(self.yend, 12, 31)
        if(self.DefaultYesHazard == False):
            valoreEnd = np.timedelta64(dfinal - self.MyRatingsArray[self.size - 1, 1], "D")/ np.timedelta64(1, 'D')# (dfinal - self.MyRatingsArray[self.size - 1, 1])/365.0
            self.TransDenLambda[self.MyRatingsArray[self.size - 1, 3]] += valoreEnd



In [20]:
# @jit(parallel=True, fastmath=True)
# def test_transition_class():
#     trans = TransitionClass(arg1,
#                             arg2,
#                             arg3,
#                             arg4,
#                             2021)
#     trans.DefaultedCheck()
#     print(trans.defaultedSize)

In [21]:
# test_transition_class()

In [25]:
def compute_nijk(RatingsArray):
    size = len(RatingsArray)
    yend = max(RatingsArray[:, 1]).year
    ystart = min(RatingsArray[:, 1]).year
    IDS = np.unique(RatingsArray[:, 0])

    # The arrays containing the outputs
    TransMatrixResults       = np.zeros([21, 22])
    TransDenResults          = np.zeros([21])
    TransDenLambdaResults    = np.zeros([21])

    t = tqdm(IDS,
              desc='Bar desc',
              leave=True
              )

    # Run the algo for all obligors
    for i in t:
        t.set_description(f"Processing ID borrower No: {i}")
        t.refresh() # to show immediately the update
        MyRatingsArray = RatingsArray[RatingsArray[:, 0] == i]
        # print ("processing ID borrower No: ", i)
        if (len(MyRatingsArray) == 1 and MyRatingsArray[0, 3] == 'NR'):
            # when this is true the data is not processed as it contains only one record, equal to state NR
            print("Only one NR Rating event on Borrower No: ", i)

        else:
            trans = TransitionClass(RatingsArray, yend)
            trans.Cohort()
            trans.CohortTransitionMatrix()
            trans.HazardModel()

            TransMatrixResults    = trans.TransMatrix + TransMatrixResults
            TransDenResults       = trans.TransDen    + TransDenResults
            TransDenLambdaResults = trans.TransDenLambda + TransDenLambdaResults

    return TransMatrixResults, TransDenResults, TransDenLambdaResults

In [26]:
compute_nijk(TransitionData.values)

Processing ID borrower No: 0:   0%|          | 0/8527 [00:02<?, ?it/s]


IndexError: list assignment index out of range

In [None]:
def compute_default_probas(result):
  # Unpack result
  TransMatrixResults, TransDenResults, TransDenLambdaResults = result

  RatingMgrationHazard = pd.DataFrame(np.zeros([21, 22]),
                                      index=initial_states,
                                      columns=final_states)
  for i in range(21):
    for j in range(22):

      RatingMgrationHazard[i][j] = TransMatrixResults[i][j] / TransDenLambdaResults[i]

  #Default Category
  RatingMgrationHazard[21, :] = 0.0 # All the others = 0
  RatingMgrationHazard[21, 21] = 1.0 # Absorbinb

  RatingMgrationHazard = np.vstack ((RatingMgrationHazard, np.zeros((22))) )

  #1
  lmax = 0
  for i in range(21):
      if (np.abs(RatingMgrationHazard[i, i]) > lmax):
          lmax = np.abs(RatingMgrationHazard[i, i])

  #2
  mat1 = np.zeros((22, 22))
  np.fill_diagonal(mat1, lmax)


  Lstar = RatingMgrationHazard +  mat1

  tmp = la.expm(Lstar)

  vec1 = np.zeros((22, 22))

  np.fill_diagonal(vec1, np.exp(-lmax))

  mexpgenerator = np.dot(vec1, tmp)

  mexpgeneratorDF = pd.DataFrame(mexpgenerator)

  # Change the column names
  mexpgeneratorDF.columns =["AAA", "AA+", "AA", "AA-", "A+", "A", "A-", "BBB+",
                      "BBB", "BBB-", "BB+", "BB", "BB-", "B+", "B", "B-",
                      "CCC+", "CCC", "CCC-", "CC", 'Default', 'NR']

  # Change the row indexes
  mexpgeneratorDF.index = ["AAA", "AA+", "AA", "AA-", "A+", "A", "A-", "BBB+",
                      "BBB", "BBB-", "BB+", "BB", "BB-", "B+", "B", "B-",
                      "CCC+", "CCC", "CCC-", "CC", 'Default', 'NR']
  return mexpgeneratorDF["Default"]

def compute_nonparametric_bootstrap_pd(ratings, year, Nt, B=10000):

  ratings = create_year_data(ratings, year)

  pds = np.zeros(shape=(22, B))

  for i in tqdm(range(B)):
    # print(f"\n{i}\n")

    rating_sample = ratings.sample(n=int(Nt[str(year[0])].sum()), replace=True)

    rating_sample = create_year_data(rating_sample, year)

    result = postprocess_transition_results(compute_nijk_v2(rating_sample, year))

    pds[:, i] = compute_default_probas(result)

  return pds

In [None]:
# results = compute_nijk(TransitionData, [2010])

Processing Year 2010 and ID borrower No: 8522: 100%|██████████| 1689/1689 [00:50<00:00, 33.63it/s]


In [None]:
## lambda 2010
# results[-1]

array([ 12.96438356,   2.99178082,  18.94794521,  21.93972603,
        71.80273973, 140.61369863, 183.49589041, 182.49863014,
       195.4630137 , 155.57260274,  65.81917808,  81.77534247,
       115.68219178, 144.60273973, 170.53150685,  90.75068493,
        12.96438356,  11.96712329,   2.99178082,   0.99726027,
         0.        ])

In [None]:
## N_R 2010
# results[1]

array([ 13.,   3.,  19.,  22.,  72., 141., 184., 183., 196., 156.,  66.,
        82., 116., 145., 171.,  91.,  13.,  12.,   3.,   1.,   0.])

In [None]:
## nij 2010
# results[0]

array([[ 13.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   3.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,  19.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,  22.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,  72.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0., 141.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0., 184.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0., 

In [None]:
# yearly_results = {f"{year}": compute_nijk(TransitionData, [year])\
#                   for year in sorted(TransitionData.Date.dt.year.unique())}

In [None]:
# yearly_results_postprocessed = {f"{year}": postprocess_transition_results(result)
#                   for year, result in yearly_results.items()}

In [None]:
initial_states = ["AAA", "AA+", "AA", "AA-", "A+", "A", "A-", "BBB+",
                     "BBB", "BBB-", "BB+", "BB", "BB-", "B+", "B", "B-",
                     "CCC+", "CCC", "CCC-", "CC", "Default"]

final_states = ["AAA", "AA+", "AA", "AA-", "A+", "A", "A-", "BBB+",
                    "BBB", "BBB-", "BB+", "BB", "BB-", "B+", "B", "B-",
                    "CCC+", "CCC", "CCC-", "CC", "Default", "NR"]

In [None]:
compute_default_probas(postprocess_transition_results(compute_nijk(TransitionData, [2010])))

Processing Year 2010 and ID borrower No: 8522: 100%|██████████| 1689/1689 [00:48<00:00, 34.92it/s]


AAA        0.000000
AA+        0.000000
AA         0.000000
AA-        0.000000
A+         0.000000
A          0.000000
A-         0.000000
BBB+       0.000000
BBB        0.000000
BBB-       0.000000
BB+        0.000000
BB         0.000000
BB-        0.000000
B+         0.000000
B          0.000000
B-         0.000000
CCC+       0.000000
CCC        0.000000
CCC-       0.000000
CC         0.000000
Default    2.718282
NR         0.000000
Name: Default, dtype: float64

In [None]:
# compute_default_probas(yearly_results_postprocessed["2010"])

In [None]:
# Nts = pd.concat([yearly_results_postprocessed[str(year)][1]\
#            for year in sorted(TransitionData.Date.dt.year.unique())],
#           axis=1)
# Nts.columns = sorted(TransitionData.Date.dt.year.unique())
# Nts

In [None]:
# Nts.to_csv("Nts.csv", index=False)
# TransitionData.to_csv("TransitionData.csv", index=False)

Nts = pd.read_csv("Nts.csv")
TransitionData = pd.read_csv("TransitionData.csv")

In [None]:
TransitionData.Date = pd.to_datetime(TransitionData.Date)

In [None]:
B = 1000
pds_2011 = compute_nonparametric_bootstrap_pd(TransitionData, [2011], Nts, B=B)
pds_2011

  0%|          | 3/1000 [02:17<12:39:59, 45.74s/it]

In [None]:
pd.DataFrame(pds_2011).dropna(axis=1).mean(axis=1)

In [None]:
pd.DataFrame(pds_2011).dropna(axis=1).quantile(axis=1, q=[0.025, 0.975])

In [None]:
pd.DataFrame(pds_2011).to_csv(f"pds_2011_{B}.csv", index=False)

#### Parametric

In [None]:
def compute_parametric_bootstrap_ci(pd, nr, alpha=0.05):
  pass

#### Non parametric