In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import numpy as np
import pandas as pd


In [6]:
# Load dataset
df = pd.read_csv("loan_data_created.csv")
df.head()


Unnamed: 0,customer_id,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score,default
0,8153374,0,5221.545193,3915.471226,78039.38546,5,605,0
1,7442532,5,1958.928726,8228.75252,26648.43525,2,572,1
2,2256073,0,3363.009259,2027.83085,65866.71246,4,602,0
3,4885975,0,4766.648001,2501.730397,74356.88347,5,612,0
4,4700614,1,1345.827718,1768.826187,23448.32631,6,631,0


In [8]:
# Payment-to-income ratio
df['payment_to_income'] = df['loan_amt_outstanding'] / df['income']

# Debt-to-income ratio
df['debt_to_income'] = df['total_debt_outstanding'] / df['income']



In [9]:
features = ['credit_lines_outstanding','debt_to_income','payment_to_income','years_employed','fico_score']

# Fit logistic regression
clf = LogisticRegression(random_state=0, solver='liblinear', tol=1e-5, max_iter=10000)
clf.fit(df[features], df['default'])

# Display learned coefficients
print("Model Coefficients:", clf.coef_)
print("Intercept:", clf.intercept_)


Model Coefficients: [[ 8.18520373  0.54490854  0.01994244 -2.77630853 -0.02418391]]
Intercept: [-0.09162643]


In [10]:
y_pred = clf.predict(df[features])

# ROC curve and AUC
fpr, tpr, thresholds = metrics.roc_curve(df['default'], y_pred)

print("Misclassification Rate:", (abs(df['default'] - y_pred).sum()) / len(df))
print("ROC-AUC:", metrics.auc(fpr, tpr))


Misclassification Rate: 0.0037
ROC-AUC: 0.9925106069101026
