In [80]:
%pylab inline
import pandas as pd
import numpy as np
import fmt
import itertools

from sobol_seq import i4_sobol_generate
from scipy.stats import norm

Populating the interactive namespace from numpy and matplotlib


# Homework Set 7

This homework is to price [synthetic CDO](https://en.wikipedia.org/wiki/Synthetic_CDO) using the one factor Gaussian Copula model. 

A synthetic CDO consists of $n$ CDS, the total loss of the portfolio is defned as:

$$ l(t) = \sum_i^n w_i \tilde {\mathbb{1}}_i(t) (1-r_i(t)) $$

where $w_i$ and $r_i(t)$ are the notional weights and recovery rate of the i-th name in the portfolio. The notional weighs sum up to 1: $\sum_i w_i = 1 $. The $ \tilde {\mathbb{1}}_i(t) $ is the default indicator of the i-th name defaulted before time $t$, the default probability is therefore $p_i(t) = \mathbb E[\tilde {\mathbb{1}}_i(t) ]$

For the purpose of this homework, we consider a simplified synthetic CDO that has no coupon payments, therefore the PV of a \$1 notional synthetic CDO tranche with maturity $t$, attachment $a$ and detachment $d$ is:

$$ v(a, d) = \frac{d(t)}{d-a} \min\left((l(t) - a)^+, d-a\right) $$

where $d(t)$ is the discount factor.

The following are the parameters to the synthetic CDO, and a straight forward Monte Carlo pricer:

In [2]:
n = 125
t = 5.
defProbs = 1 - exp(-(np.random.uniform(size=n)*.03)*t)
recovery = 0.4*np.ones(n)
w = 1./n*np.ones(n)
rho = 0.5
discf = .9
npath = 1000

# a list of attachements and detachements, they pair up by elements
attachements = np.array([0, .03, .07, .1, .15, .3])
detachements = np.array([.03, .07, .1, .15, .3, .6])

#portfolio expected loss
el = np.sum(w*defProbs*(1-recovery))
print("portfolio expected loss is ", el)

portfolio expected loss is  0.04370380263509204


In [3]:
from scipy.stats import norm

class CDO(object) :
    def __init__(self, w, defProbs, recovery, a, d) :
        self.w = w/np.sum(w)
        self.p = defProbs
        self.rec = recovery
        self.rho = rho
        self.a = a
        self.d = d

    def drawDefaultIndicator(self, z, rho) :
        '''return a list of default indicators given common factor z, using one factor Gaussian Copula
        '''
        e = np.random.normal(size=np.shape(self.p))
        x = z*np.sqrt(self.rho) + np.sqrt(1-self.rho)*e
        return np.less(norm.cdf(x), self.p)

    def portfolioLoss(self, defIndicator) :
        '''compute portfolio loss given default indicators'''
        return np.sum(defIndicator*self.w*(1-self.rec))

    def tranchePV(self, portfLoss, discf) :
        '''compute tranche PV from portfolio loss
        Args:
            portfLoss: the total portfolio loss
            discf: discount factor
        Returns:
            tranche PVs'''
        
        sz = self.d - self.a
        return discf/sz*np.minimum(np.maximum(portfLoss - self.a, 0), sz)

    def drawPV(self, z, rho, discf) :
        ''' compute PV and portfolio Loss conditioned on a common factor z'''
        di = self.drawDefaultIndicator(z, rho)
        pfLoss = self.portfolioLoss(di)
        return self.tranchePV(pfLoss, discf), pfLoss
    
    
cdo = CDO(w, defProbs, recovery, attachements, detachements)

In [98]:
regular_normal_sample = np.array([])
antithetic_sample = np.array([])
importance_sample = np.array([])
sobol_sample = np.array([])
stratified_sample = np.array([])

def showReductionTable(reductionSample, vanillaSample):
    def calculatePopulationVariance(sample):
        return ((sample - sample.mean())**2).sum()/(n-1)

    var_r = calculatePopulationVariance(reductionSample)
    var_p = calculatePopulationVariance(vanillaSample)
    columns = [
        '$\sigma^2_{reduction}$',
        '$\sigma^2_{original}$',
        '$\sigma^2_r/\sigma^2_o$'
    ]
    fmt.displayDF(pd.DataFrame([[var_r, var_p, var_r/var_p]], columns=columns))


In [99]:
def regularNormal(n_paths):
    global regular_normal_sample
    regular_normal_sample = np.random.normal(size=[n_paths])
    return regular_normal_sample

def simCDO(cdo, rho, disc, paths, z_creator):
    zs = z_creator(paths)
    pv = np.zeros(np.shape(cdo.a))
    pv2 = np.zeros(np.shape(cdo.d))
    for z in zs:
        thisPV, _ = cdo.drawPV(z, rho, discf)
        pv += thisPV
        pv2 += thisPV*thisPV
        
    v = pv/paths
    var = pv2/paths - v**2
    return pv/paths, np.sqrt(var/paths)

In [100]:
pv_0, err_0 = simCDO(cdo, rho, discf, npath, regularNormal)
basic_df = pd.DataFrame(np.array([cdo.a, cdo.d, pv_0, err_0]), index=['Attach', 'Detach', 'PV', 'MC err'])
pop_var_0 = calculatePopulationVariance(regular_normal_sample)
fmt.displayDF(basic_df, fmt='4g')

Unnamed: 0,0,1,2,3,4,5
Attach,0.0,0.03,0.07,0.1,0.15,0.3
Detach,0.03,0.07,0.1,0.15,0.3,0.6
PV,0.4652,0.2432,0.1524,0.09201,0.03223,0.00396
MC err,0.01217,0.01177,0.01024,0.008092,0.004419,0.001288


## Problem 1

Modify the simCDO function to implement the following variance reduction techniques, and show whether the technique is effective:

For this homework, we only apply the variance reduction in the common market factor $z$, you should not change the random number $e$ that were drew with in the drawDefaultIndicator function, i.e., only modify the simCDO code, re-use but do not modify the CDO class. Unless explicitly mentioned, keep the simulation path the same as the base case above.

Compute the **variance** reduction factor for each technique, and comment on the effectiveness of these variance reduction techniques.

### Anti-thetic variate
Reduce the number of paths by half to account for the 2x increase in computation

In [101]:
def antitheticNormal(n_paths):
    x = np.random.normal(size=[n_paths//2])
    global antithetic_sample
    antithetic_sample = np.append(x, -x)
    return antithetic_sample

pv_a, err_a = simCDO(cdo, rho, discf, npath, antitheticNormal)
antithetic_df = pd.DataFrame(np.array([cdo.a, cdo.d, pv_a, err_a]), index=['Attach', 'Detach', 'PV', 'MC err'])

fmt.displayDF(antithetic_df, fmt='4g')
showReductionTable(antithetic_sample, regular_normal_sample)

Unnamed: 0,0,1,2,3,4,5
Attach,0.0,0.03,0.07,0.1,0.15,0.3
Detach,0.03,0.07,0.1,0.15,0.3,0.6
PV,0.4653,0.254,0.1627,0.09904,0.03498,0.00198
MC err,0.01225,0.01206,0.0105,0.008321,0.004668,0.0008531


Unnamed: 0,$\sigma^2_{reduction}$,$\sigma^2_{original}$,$\sigma^2_r/\sigma^2_o$
0,0.5004,0.4967,1.0076


We know that the reduction in variance here is going to be nil: Gaussian distributions are symmetric across zero. We'll save time in generating the normal random samples, but since this is a very cheap process in the first place, it's probably not worthwhile. Unsurprisingly, MC error is not appreciably different for doing this, and the estimates of PV aren't, either.

### Importance sampling
Shift $z$ by -1

### Sobol Sequence

A Sobol sequence exists to efficiently cover a given space. However, we need to use a normal distribution to get our numbers. The best way of going about this is to use a Sobol sequence to slam out the "random" numbers in the interval $[0, 1]$, and then use the inverse CDF function to get normally distributed values from that.

In [111]:
def sobolSampler(n_paths):
    global sobol_sample
    sobol_sample = norm.ppf(i4_sobol_generate(1, n_paths).T[0])
    return sobol_sample

pv_sobol, err_sobol = simCDO(cdo, rho, discf, npath, sobolSampler)
sobol_df = pd.DataFrame(np.array([cdo.a, cdo.d, pv_sobol, err_sobol]), index=['Attach', 'Detach', 'PV', 'MC err'])

fmt.displayDF(sobol_df, fmt='4g')
showReductionTable(sobol_sample, regular_normal_sample)

Unnamed: 0,0,1,2,3,4,5
Attach,0.0,0.03,0.07,0.1,0.15,0.3
Detach,0.03,0.07,0.1,0.15,0.3,0.6
PV,0.4679,0.2408,0.1538,0.09619,0.03483,0.002102
MC err,0.01226,0.01162,0.01034,0.008303,0.004706,0.0007016


Unnamed: 0,$\sigma^2_{reduction}$,$\sigma^2_{original}$,$\sigma^2_r/\sigma^2_o$
0,0.4922,0.4967,0.9911


We do see a slight reduction in variance here. The reason for this is fairly obvious: the Sobol sequence more accurately represents the population than does raw random sampling. Still, no real change in effect.

### Stratified sampling
Sample $z$ using an equal sized grid

In [114]:
def stratifySampler(n_paths, n_buckets=None):
    n_buckets = float(n_buckets or n_paths//2)
    global stratified_sampler
    stratified_sampler = norm.ppf(np.array([
        (i + x)/n_buckets 
        for i, x in zip(itertools.cycle(np.arange(n_buckets)),
                        np.random.uniform(size=n_paths))
    ]))
    return stratified_sampler

pv_strat, err_strat = simCDO(cdo, rho, discf, npath, stratifySampler)
strat_df = pd.DataFrame(np.array([cdo.a, cdo.d, pv_strat, err_strat]), index=['Attach', 'Detach', 'PV', 'MC err'])

fmt.displayDF(sobol_df, fmt='4g')
showReductionTable(stratified_sample, regular_normal_sample)

[-3.12649550e+00 -2.87447623e+00 -2.54276944e+00 -2.50272822e+00
 -2.35337375e+00 -2.30921728e+00 -2.20514434e+00 -2.14987928e+00
 -2.12825075e+00 -2.05805023e+00 -2.04406093e+00 -1.99265071e+00
 -1.94473620e+00 -1.93158268e+00 -1.90358595e+00 -1.87348865e+00
 -1.84131356e+00 -1.82012350e+00 -1.77598454e+00 -1.76919565e+00
 -1.72839013e+00 -1.71354910e+00 -1.69237375e+00 -1.67793714e+00
 -1.65844955e+00 -1.63611862e+00 -1.62074381e+00 -1.59712390e+00
 -1.58281764e+00 -1.56788574e+00 -1.55218295e+00 -1.53739328e+00
 -1.51532301e+00 -1.50322705e+00 -1.48958973e+00 -1.47259104e+00
 -1.45617496e+00 -1.44268883e+00 -1.41979286e+00 -1.40727405e+00
 -1.39875630e+00 -1.37927118e+00 -1.37025219e+00 -1.36370603e+00
 -1.34188289e+00 -1.33890952e+00 -1.32578304e+00 -1.31065100e+00
 -1.30349806e+00 -1.28403201e+00 -1.28139857e+00 -1.26492317e+00
 -1.25575584e+00 -1.24602660e+00 -1.23066745e+00 -1.22132435e+00
 -1.21589025e+00 -1.20230601e+00 -1.18723324e+00 -1.18182652e+00
 -1.17307085e+00 -1.15936

Unnamed: 0,0,1,2,3,4,5
Attach,0.0,0.03,0.07,0.1,0.15,0.3
Detach,0.03,0.07,0.1,0.15,0.3,0.6
PV,0.4679,0.2408,0.1538,0.09619,0.03483,0.002102
MC err,0.01226,0.01162,0.01034,0.008303,0.004706,0.0007016


  if __name__ == '__main__':
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,$\sigma^2_{reduction}$,$\sigma^2_{original}$,$\sigma^2_r/\sigma^2_o$
0,0.0,0.4967,0.0


## (Extra Credit) Problem 2

Consider a control variate for the problem above. The large pool model assumes that the portfolio is a large homogeneous pool, using the average default rate: $\bar p = \frac{1}{n}\sum_i p_i$. Then the portfolio loss conditioned on market factor $z$ under the large pool model is a determinsitic scalar:

$$ l(z) = (1-r)\Phi\left(\frac{\Phi^{-1}(\bar p) - \sqrt \rho z}{\sqrt{1-\rho}}\right)$$

where $r$ is the constant recovery of all names. $\Phi()$ is the normal CDF function; $\Phi^{-1}()$ is its inverse. The tranche PVs can then be computed from the $l(z)$.

Please investigate if the large pool model can be used as an effective control variate. Does it work better for some tranches?

Hint: to answer this question, you only need to compute the correlation between the actual and control variates. 