In [108]:
import pandas as pd
import numpy as np
import datetime as dt
from scipy.stats import norm
from pandas.tseries.offsets import *
from scipy import optimize
from tqdm import tqdm


tqdm.pandas()

In [79]:
# File position
resource = "../../data/pulled/"
# resource = "../../data/generated/"
results = "../../data/generated/"

In [4]:
# read crsp pulled data
# crsp_d = pd.read_csv(resource + "wrds_crsp_d.csv", parse_dates=['date'])
# crsp_d = crsp_d.sort_values(by=['permno','permco','date'])
# crsp_d.to_csv(results + 'crsp_d_sorted.csv', index=False)

In [80]:
# read compustat pulled data
comp = pd.read_csv(resource + "wrds_comp.csv", parse_dates=['datadate'])

# fill the nan in current and long debt data
comp['dlc']=comp['dlc'].fillna(0)
comp['dltt']=comp['dltt'].fillna(0)

# select the columns
comp=comp[['gvkey','datadate','dlc', 'dltt']] 

In [118]:
# read crsp pulled data
crsp_d = pd.read_csv(results + "prova_default.csv", parse_dates=['date'])

# read crsp pulled data
# crsp_d = pd.read_csv(resource + "wrds_crsp_d.csv", parse_dates=['date'])

# change variable format to int
crsp_d[['permco','permno','shrcd']]=crsp_d[['permco','permno','shrcd']].astype(int) 

# calculate market equity
crsp_d['me']=crsp_d['prc'].abs()*crsp_d['shrout'] 
crsp_d=crsp_d.drop(['prc','shrout'], axis=1)
crsp_d=crsp_d.sort_values(by=['date','permco','me'])

### Aggregate Market Cap ###
# sum of me across different permno belonging to same permco a given date
crsp_summe = crsp_d.groupby(['date','permco'])['me'].sum().reset_index()

# largest mktcap within a permco/date
crsp_maxme = crsp_d.groupby(['date','permco'])['me'].max().reset_index()

# join by jdate/maxme to find the permno
crsp=pd.merge(crsp_d, crsp_maxme, how='inner', on=['date','permco','me'])

# drop me column and replace with the sum me
crsp=crsp.drop(['me'], axis=1)

# join with sum of me to get the correct market cap info
crsp=pd.merge(crsp, crsp_summe, how='inner', on=['date','permco'])

# sort by permno and date and also drop duplicates
crsp=crsp.sort_values(by=['permno','date']).drop_duplicates()

crsp['day']=crsp['date'].dt.day
crsp['month']=crsp['date'].dt.month
crsp['year']=crsp['date'].dt.year

# crsp['me'].fillna(method='ffill', inplace=True)
crsp = crsp.dropna(subset=['me'], how='any')

In [84]:
#######################
# CCM Block           #
#######################
ccm = pd.read_csv(resource + "wrds_ccm.csv", parse_dates=['linkdt', 'linkenddt'])

# if linkenddt is missing then set to today date
ccm['linkenddt']=ccm['linkenddt'].fillna(pd.to_datetime('today'))

ccm=pd.merge(comp,ccm,how='left',on=['gvkey'])
ccm['start_disclosure_date']=ccm['datadate'] + MonthEnd(4)

# set link date bounds
ccm=ccm[(ccm['start_disclosure_date']>=ccm['linkdt'])&(ccm['start_disclosure_date']<=ccm['linkenddt'])]
ccm=ccm[['gvkey','permno','datadate','start_disclosure_date','dlc','dltt']]
ccm['debt']=ccm['dlc']+.5*ccm['dltt']*1000  # The CRSP data is in thousands while Compustat data is in millions
ccm=ccm.drop(['dlc','dltt'], axis=1)
ccm['end_public_date']=ccm.groupby(['gvkey'])['start_disclosure_date'].shift(-1)
ccm['end_public_date']=ccm['end_public_date'].fillna(ccm['start_disclosure_date']+MonthEnd(12))

In [61]:
# link comp and crsp
# ccm_final=pd.merge(crsp2, ccm2, how='left', on=['permno'])
# ccm_final = ccm_final.loc[(ccm_final.public_date < ccm_final.date) & (ccm_final.date <= ccm_final.end_public_date)]



In [62]:
# Compute the default probability
# default_p = ccm_final.groupby(['permno','year','month'])['day'].max().reset_index()
# default_p['date'] = pd.to_datetime(default_p[["year", "month", "day"]]) + MonthEnd(0)
# default_p=default_p.drop(['year','month','day'], axis=1)

# fred = pd.read_csv(resource + "fred.csv", parse_dates=['date'])
# rf = fred[['date','DGS1']] # 1 year T-bill
# default_p=pd.merge(default_p, rf, how='left', on=['date'])


In [88]:
default_p = crsp.groupby(['permno','year','month'])['day'].max().reset_index()
default_p['date'] = pd.to_datetime(default_p[['year','month', 'day']]) + MonthEnd(0)
default_p=default_p.drop(['year','month','day'], axis=1)

default_p = pd.merge(default_p, ccm, how='left', on=['permno'])
default_p = default_p.loc[(default_p.start_disclosure_date < default_p.date) & (default_p.date <= default_p.end_public_date)]

fred = pd.read_csv(resource + "fred.csv", parse_dates=['date'])
rf = fred[['date','DGS1']] # 1 year T-bill
default_p=pd.merge(default_p, rf, how='left', on=['date'])


In [101]:
"""
Script to calculate distance-to-default
"""
def calcVolatility(x): 
    return np.std(np.log(np.divide(x[1:],x[:-1])))*np.sqrt(252)

def BlackScholesCallValue(S,X,r,sigma,T):
    d1=(np.log(S/X)+(r+0.5*sigma**2)*T)/(sigma*np.sqrt(T));
    d2=d1-sigma*np.sqrt(T);
    delta=norm.cdf(d1)
    CP=S*delta-X*np.exp(-r*T)*norm.cdf(d2);
    return [CP, delta]    

def BlackScholesZero(C,X,r,sigma,T):
    # solves Black Scholes Call option formula for S using Newton's method
    if X==0:
        return C

    UpperS=C+X*np.exp(-r*T);
    if sigma==0 or X==0:
        return UpperS

    LowerS=C
    tol=0.0001*C
    delta=C; itercount=0; x0=LowerS; x1=UpperS
    while abs(delta)>tol and itercount<=100:
        itercount=itercount+1
        [bs0,D0]=BlackScholesCallValue(x0, X, r, sigma, T)
        y0=bs0-C
        [bs1,D1]=BlackScholesCallValue(x1, X, r, sigma, T)
        y1=bs1-C

        if D0<0.01:
            xguess=(x0*y1-x1*y0)/(y1-y0);
        else:
            xguess = x0 - y0/D0;

        if xguess>UpperS:
            xguess=UpperS
        elif xguess<LowerS:
            xguess=LowerS
        [bsxg,Dxg]=BlackScholesCallValue(xguess,X,r,sigma,T)
        delta=bsxg-C
        x0=xguess
        
    if abs(delta)>tol:
        return np.nan
    else:
        return xguess
    
def dailyDLIcalcs(Ve,X,r,sigma_a,T):
    Va=[]
    for v in Ve:
        Va.append(BlackScholesZero(v,X,r,sigma_a,T))
    Va=np.array(Va)
    return [Va, calcVolatility(Va)]

def calc_DD(rf, X, Ve, T):
    sigma_a=calcVolatility(Ve)
    sigma_previous=sigma_a

    delta=1.0
    itercount=0

    while not np.isnan(delta) and delta>0.0001 and itercount<100:
        itercount=itercount+1
        [Va, sigma_a]=dailyDLIcalcs(Ve, X, rf, sigma_a, T)
        delta=abs(sigma_a-sigma_previous)
        sigma_previous=sigma_a

    # compute drift term
    mu=np.mean(np.log(np.divide(Va[1:],Va[:-1])))

    if X==0: # no probability of default
        DD=100
    elif sigma_a==0: # if stock is not traded
        DD=np.nan
    else:
        DD=(np.log(Va[-1]/X)+(mu-(0.5*sigma_a**2))*T) / (sigma_a*np.sqrt(T))

    p_def = norm.cdf(-DD)

    return [p_def, DD, itercount, delta]

In [112]:
def row_dd(x,market,T):
    # market data from last 12 months
    end_date = x['date']
    start_date = end_date + MonthEnd(-12)
    result = market[(market['permno'] == x['permno']) & (market['date'] > start_date) & (market['date'] <= end_date)].loc[:, ['date','me']]
    r = x['DGS1']
    Ve = result['me'].values
    X = x['debt']
    [p_def, DD, itercount, delta]=calc_DD(r, X, Ve, T)
    return p_def

In [115]:
T = 1
default_p['p_def'] = default_p.progress_apply(lambda x:  row_dd(x,crsp,T), axis=1)

# output the prepared data
default_p.to_csv(results + 'p_def.csv', index=False)



















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

In [117]:
crsp_d = pd.read_csv(resource + "wrds_crsp_d.csv", parse_dates=['date'])

crsp_d=crsp_d[(crsp_d['permno']<=10006)]

# change variable format to int
crsp_d[['permco','permno','shrcd']]=crsp_d[['permco','permno','shrcd']].astype(int)

    # calculate market equity
crsp_d['me']=crsp_d['prc'].abs()*crsp_d['shrout'] 
crsp_d=crsp_d.drop(['prc','shrout'], axis=1)
crsp_d=crsp_d.sort_values(by=['date','permco','me'])

crsp_d['me'].fillna(method='ffill', inplace=True)