In [None]:
''' Build a model using Python that will estimate the probability of default for a borrower. A default occurs when a borrower stops 
making the required payments on a debt. The risk team has begun to look at the existing book of loans to see if more defaults should be 
expected in the future and, if so, what the expected loss will be. They have collected data on customers and now want to build a 
predictive model that can estimate the probability of default based on customer characteristics. you are asked if you can try building a 
prototype predictive model, which can then be tested and incorporated into their loss allowances.
You should produce a function that can take in the properties of a loan and output the expected loss.

In [5]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



In [6]:
df = pd.read_csv('E:\\Shirin Gangal\\Python\\Python_learning\\Udemy ML\\Task 3 and 4_Loan_Data.csv')
df.head()

Unnamed: 0,customer_id,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score,default
0,8153374,0,5221.545193,3915.471226,78039.38546,5,605,0
1,7442532,5,1958.928726,8228.75252,26648.43525,2,572,1
2,2256073,0,3363.009259,2027.83085,65866.71246,4,602,0
3,4885975,0,4766.648001,2501.730397,74356.88347,5,612,0
4,4700614,1,1345.827718,1768.826187,23448.32631,6,631,0


In [7]:
data = df.drop(columns=['customer_id', 'default'])
default = df['default']
data.head()

Unnamed: 0,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score
0,0,5221.545193,3915.471226,78039.38546,5,605
1,5,1958.928726,8228.75252,26648.43525,2,572
2,0,3363.009259,2027.83085,65866.71246,4,602
3,0,4766.648001,2501.730397,74356.88347,5,612
4,1,1345.827718,1768.826187,23448.32631,6,631


In [68]:
dataTrain, dataTest, defaultTrain, defaultTest = train_test_split(data, default, test_size = 0.2)
model = DecisionTreeClassifier()
models = DecisionTreeClassifier()


In [69]:
model.fit(data, default)
models.fit(dataTrain, defaultTrain)
 
predictions = models.predict(dataTest)

score = accuracy_score(defaultTest, predictions)

In [75]:
print(score)

0.9925


In [78]:
credit = int(input("Credit lines outstanding: "))
loan = int(input("Loan amt outstanding: "))
debt = int(input("Total debt outstanding: "))
income = int(input("Income: "))
years = int(input("Years employed: "))
fisco = int(input("Fisco score: "))

Credit lines outstanding:  4
Loan amt outstanding:  6000
Total debt outstanding:  8000
Income:  40000
Years employed:  3
Fisco score:  450


In [79]:
def defaultPrediction(credit, loan, debt, income, years, fisco):
    sample = pd.DataFrame([[credit, loan, debt, income, years, fisco]], columns = data.columns)
    prediction = model.predict(sample)
    return prediction[0]

defprediction = defaultPrediction(credit, loan, debt, income, years, fisco)

if defprediction == 1:
    print("\nProbability of default is " + str(round((score*100), 4)) + "%")
else:
    print("\nProbability of default is " + str(round((100-score*100), 4)) + "%")




Probability of default is 99.25%


In [80]:
loanLoss = loan*score*(1-0.1)
print("The expected loss of the loan in the event of a default is: $" + str(loanLoss))
#Now that we have the predicted probabilities of default (PD) for each borrower, we can calculate the Expected Loss using the formula mentioned earlier:
#Expected Loss=Loan Amount×PD×(1−Recovery Rate)
print(loan*score*(1- 0.1))

The expected loss of the loan in the event of a default is: $5359.5
5359.5


In [2]:
# using logistic regression: 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import roc_auc_score

In [8]:
X = data[['credit_lines_outstanding', 'loan_amt_outstanding', 'total_debt_outstanding', 'income', 'years_employed', 'fico_score']]
y = df['default']  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)





In [10]:
model = LogisticRegression(class_weight='balanced')  # Add class weight if needed
model.fit(X_train_scaled, y_train)

# Step 5: Predict probabilities
y_pred_prob = model.predict_proba(X_test_scaled)[:, 1] # this gives prob of class 1 that is prob of default. 
# the first index is prob of class 0 in predict_proba
print(y_pred_prob)

[2.84815932e-08 1.63245191e-02 9.99999997e-01 ... 5.60439281e-05
 2.28091574e-09 4.30756623e-01]


In [11]:
roc_auc = roc_auc_score(y_test, y_pred_prob)
print(f'ROC AUC Score: {roc_auc}')

ROC AUC Score: 0.9999739083242882


In [12]:
credit = int(input("Credit lines outstanding: "))
loan = int(input("Loan amt outstanding: "))
debt = int(input("Total debt outstanding: "))
income = int(input("Income: "))
years = int(input("Years employed: "))
fisco = int(input("Fisco score: "))


Credit lines outstanding:  3
Loan amt outstanding:  5000
Total debt outstanding:  7000
Income:  40000
Years employed:  5
Fisco score:  600


In [13]:
import numpy as np
user_input = np.array([[credit, loan, debt, income, years, fisco]])

In [14]:
user_input

array([[    3,  5000,  7000, 40000,     5,   600]])

In [15]:
user_input_scaled = scaler.transform(user_input)




In [20]:
probability = model.predict_proba(user_input_scaled)[:,1]

In [19]:
probability

array([[0.88109174, 0.11890826]])

In [21]:
print(f"The probability of default on the loan is: {probability[0]:.4f}")

The probability of default on the loan is: 0.1189


In [22]:
loanLoss = loan*probability[0]*(1-0.1)
print("The expected loss of the loan in the event of a default is: $" + str(loanLoss))
#Now that we have the predicted probabilities of default (PD) for each borrower, we can calculate the Expected Loss using the formula mentioned earlier:
#Expected Loss=Loan Amount×PD×(1−Recovery Rate)


The expected loss of the loan in the event of a default is: $535.0871757889189
