# Quantitative Risk Management

Click <a href="https://colab.research.google.com/github/Lolillosky/QuantRiskManagement/blob/main/NOTEBOOKS/14_CDS_Regression.ipynb">
    <img src="https://upload.wikimedia.org/wikipedia/commons/d/d0/Google_Colaboratory_SVG_Logo.svg" width="30" alt="Google Colab">
</a> to open this notebook in Google Colab.



In this exercise, you are going to bootstrap a zero coupon curve from par swap rates. After that, you will bootstrap a survival probability curve from CDS quotes.

## Import main libraries:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import pandas as pd


## Import the following libraries from the repository:

In [None]:
import sys
sys.path.append('../CODE')  # Adjust the path as necessary


from IPython.display import clear_output

'''
!rm -r {'QuantRiskManagement'}

!git clone https://github.com/Lolillosky/QuantRiskManagement.git

import sys
sys.modules.pop
sys.path.insert(0,'QuantRiskManagement/CODE')
'''
clear_output()


In [None]:
CDS_Data = pd.read_csv('../DATA/CDS_Data.csv', sep=';')
#CDS_Data = pd.read_csv('/content/QuantRiskManagement/DATA/CDS_Data.csv', sep=';')

# We print the data columns
print(CDS_Data.columns)
CDS_Data.head()

In [None]:
# We are just interested in the columns that we are using as features 
# and the one we want to predict
CDS_Data = CDS_Data[['Tier', 'Sector', 'Region','AvRating', 'Spread5Y', 'Recovery']] 

# We print the original length of the dataframe
print('Original dataframe length:', len(CDS_Data))

# We get rid of rows with no rating or no 5yr spread. 
CDS_Data.dropna(subset = ['Spread5Y', 'AvRating'], inplace = True)

# For the remaining columns, we fill NAs with 'Unkown' keyword
CDS_Data.fillna('Unknown', inplace=True)

# In the Sector column, there are some examples marked as 'Unclassified'.
# We change it to 'Unkown'
CDS_Data.loc[CDS_Data['Sector'] == 'Unclassified','Sector'] = 'Unknown'

# We convert both the CDS spread and the implied recovery rate to float
CDS_Data['Spread5Y'] = CDS_Data['Spread5Y'].str.rstrip('%').astype('float') / 100.0
CDS_Data['Recovery'] = CDS_Data['Recovery'].str.rstrip('%').astype('float') / 100.0

# We get rid of defaulted names
CDS_Data = CDS_Data[CDS_Data['AvRating']!='D']

# We add the clean spread column
CDS_Data['Clean Spd'] = CDS_Data['Spread5Y'] / (1-CDS_Data['Recovery'])

# We print the length of the dataframe we are working with
print('NA filtered dataframe length:', len(CDS_Data))



In [None]:
# Notice that the features that we are using are all categorical.
# Let's print their unique values and their ocurrence

print('----------Tier------------------------------------')
print(CDS_Data['Tier'].value_counts())
print('----------Sector----------------------------------')
print(CDS_Data['Sector'].value_counts())
print('----------Region----------------------------------')
print(CDS_Data['Region'].value_counts())
print('----------AvRating--------------------------------')
print(CDS_Data['AvRating'].value_counts())

In [None]:
f, ax = plt.subplots(1)

CDS_Data[CDS_Data.AvRating=='AA']['Clean Spd'].hist(bins = 20, ax = ax, alpha = 0.5, label = 'AAA')
CDS_Data[CDS_Data.AvRating=='A']['Clean Spd'].hist(bins = 20, ax = ax, alpha = 0.5,label = 'A')
CDS_Data[CDS_Data.AvRating=='BBB']['Clean Spd'].hist(bins = 20, ax = ax, alpha = 0.5,label = 'BBB')
# CDS_data[CDS_data.AvRating=='BB']['Clean Spd'].hist(bins = 20, ax = ax, alpha = 0.5, label = 'BB')

ax.legend();

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.utils import shuffle


# We define the pipeline steps
steps = [('OneHot', OneHotEncoder(sparse_output=False, handle_unknown  = 'error')),
         ('Ridge', Ridge(fit_intercept=True, alpha = 10))]

# Pipeline is defined
pipeline = Pipeline(steps)
          
# Grid of hyperparams
parameters = {'Ridge__alpha': np.logspace(-4,0,100)}

# Grid search is defined. Here we do 5-fold cv
grid = GridSearchCV(pipeline, param_grid=parameters, cv=5, verbose = 0, refit=True, 
                    scoring= 'r2', return_train_score = True)

# We fill X & Y
X = CDS_Data[['Tier', 'Sector','Region','AvRating']]
Y = np.log(CDS_Data['Clean Spd'])  

# We shuffle the data
X, Y = shuffle(X,Y)
          
# We use the grid as if it was a model
grid.fit(X,Y);

# We plot the score for both train and test
plt.plot(np.logspace(-4,0,100), grid.cv_results_['mean_test_score'], label = 'test ' + r'$r^2$')
plt.plot(np.logspace(-4,0,100), grid.cv_results_['mean_train_score'], label = 'train ' r'$r^2$')

# We plot the best model
plt.plot(grid.best_params_['Ridge__alpha'], grid.best_score_,'o')

plt.xlabel(r'$\lambda$')
plt.ylabel(r'$r^2$')

plt.xscale('log')

plt.legend();

print('Best param found: ', grid.best_params_)


#@title CDS Spread Prediction { run: "auto" }
Type1 = "SNRFOR" #@param ["SNRFOR", "SUBLT2", "SECDOM", "SNRLAC"]
Sector1 = "Financial" #@param ["Unknown", "Financial", "Industrial", "Consumer Cyclical", "Communications and Technology", "Consumer Stable", "Utilities", "Government", "Energy", "Materials"] 
Region1 = "Europe" #@param ["N.Amer","Europe","Asia","Lat.Amer","MiddleEast","Oceania","E.Eur","India","OffShore","Africa","Supra","Caribbean"] 

Type2 = "SNRFOR" #@param ["SNRFOR", "SUBLT2", "SECDOM", "SNRLAC"]
Sector2 = "Industrial" #@param ["Unknown", "Financial", "Industrial", "Consumer Cyclical", "Communications and Technology", "Consumer Stable", "Utilities", "Government", "Energy", "Materials"] 
Region2 = "Europe" #@param ["N.Amer","Europe","Asia","Lat.Amer","MiddleEast","Oceania","E.Eur","India","OffShore","Africa","Supra","Caribbean"] 

rating = np.array([['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC']]).T

data1 = np.array([[Type1,Sector1,Region1]])
data1 = np.repeat(data1,7, axis = 0)
data1 = np.concatenate((data1, rating), axis = 1)
data1 = pd.DataFrame(data1)

data2 = np.array([[Type2,Sector2,Region2]])
data2 = np.repeat(data2,7, axis = 0)
data2 = np.concatenate((data2, rating), axis = 1)
data2 = pd.DataFrame(data2)


f, ax = plt.subplots(1)

ax.bar(x = np.arange(0,data1.shape[0])-0.2,
       height =np.exp(grid.predict(data1)), width = 0.4, 
       label = 'Data 1')


ax.bar(x = np.arange(0,data2.shape[0])+0.2,
       height =np.exp(grid.predict(data2)), width = 0.4,
      label = 'Data 2')


ax.set_xticks(np.arange(0,data1.shape[0]))

ax.set_xticklabels(['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC']);
ax.legend();
#ax.set_yscale('log')
