In [None]:
# This code is prepared by Orhan Erdem.
# Please email orhanerdem at gmail.com for errors, suggestions

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib.pylab as plt

%matplotlib inline

In [None]:
default_df=pd.read_excel('Mortgage_Default.xlsx')
default_df.head()

In [None]:
default_df=pd.get_dummies(default_df,prefix_sep='_',drop_first=True,dtype=float)
default_df.head()

In [None]:
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(20,8))
sns.scatterplot(x='balance',y='default_Yes',data=default_df,ax=ax1)
sns.scatterplot(x='income',y='default_Yes',data=default_df,ax=ax2)
plt.show()

In [None]:
ax = default_df[default_df.default_Yes==1].plot.scatter(x='income', y='balance', c='red', label='defaulted')
default_df[default_df.default_Yes==0].plot.scatter(x='income', y='balance', c='green', label='not defaulted', ax=ax)

x_adhoc = np.array([20, 40])
y_adhoc = np.array([10, 140])
ax.plot(x_adhoc, y_adhoc, '-', color='blue', label='ad hoc line')
ax.set_xlabel('income ($000s)')
ax.set_ylabel('balance ($000s)')

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show()

In [None]:
#Discriminant Analysis
da_reg = LinearDiscriminantAnalysis()
da_reg.fit(default_df.drop(columns=['default_Yes']), default_df['default_Yes'])

c_balance = da_reg.coef_[0, 0]
c_income = da_reg.coef_[0, 1]
intercept = da_reg.intercept_[0]
print('Coefficients', da_reg.coef_)
print('Intercept', da_reg.intercept_)

In [None]:
ax = default_df[default_df.default_Yes==1].plot.scatter(x='income', y='balance', c='red', label='defaulted')
default_df[default_df.default_Yes==0].plot.scatter(x='income', y='balance', c='green', label='not defaulted', ax=ax)

ax.plot(da_reg.means_[:,1], da_reg.means_[:,0], 'x', color='black', label='Class means', markersize=15)
x_adhoc = np.array([20, 40])
y_adhoc = np.array([10, 140])
ax.plot(x_adhoc, y_adhoc, '-', color='blue', label='ad hoc line')
ax.set_xlabel('income ($000s)')
ax.set_ylabel('balance ($000s)')

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show()

In [None]:
ax = default_df[default_df.default_Yes==1].plot.scatter(x='income', y='balance', c='red', label='defaulted')
default_df[default_df.default_Yes==0].plot.scatter(x='income', y='balance', c='green', label='not-defaulted', ax=ax)

ax.plot(da_reg.means_[:,1], da_reg.means_[:,0], 'x', color='black', label='Class means', markersize=5)
x_lda = np.array(ax.get_xlim())
y_lda = (- intercept - c_income * x_lda) / c_balance
ax.plot(x_lda, y_lda, '--', color='C1', label='DA line')
ax.plot(x_adhoc, y_adhoc, '-', color='C0', label='ad hoc line')
ax.set_xlabel('Income')
ax.set_ylabel('Balance')

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show()

In [None]:
da_reg = LinearDiscriminantAnalysis()
da_reg.fit(default_df.drop(columns=['default_Yes']), default_df['default_Yes'])

result_df = default_df.copy()
result_df.index = result_df.index + 1
result_df['Dec. Function'] = da_reg.decision_function(default_df.drop(columns=['default_Yes']))
result_df['Pred'] = da_reg.predict(default_df.drop(columns=['default_Yes']))
result_df['p(default)'] = da_reg.predict_proba(default_df.drop(columns=['default_Yes']))[:, 1]

print(result_df)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(result_df['default_Yes'],result_df['Pred']))

# Comparsion with Logistic Regression

In [None]:
import statsmodels.formula.api as smf
ret_logit=smf.logit(formula='default_Yes~1+income+balance', data=default_df)
results_logit=ret_logit.fit(disp=0)
print(results_logit.summary())

In [None]:
y=default_df['default_Yes']
X=default_df.drop(columns=['default_Yes'])

In [None]:
pred=results_logit.predict(X)

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20,8))
sns.scatterplot(x='income',y='default_Yes',data=default_df,ax=ax1)
sns.scatterplot(x='income',y=pred,data=default_df,color='red',marker='+',ax=ax1)

sns.scatterplot(x='balance',y='default_Yes',data=default_df,ax=ax2)
sns.scatterplot(x='income',y=pred,data=default_df,color='red',marker='+',ax=ax2)
plt.show()

In [None]:

default_df['p(def)']=results_logit.predict(X)
default_df=default_df.assign(pred = lambda x: (x['p(def)'] >= 0.50).astype(int))
default_df

In [None]:
from sklearn.metrics import classification_report
print(classification_report(default_df['default_Yes'],default_df['pred']))