In [1]:
# dependencies

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

# Split the Data into Training and Testing Sets

In [5]:
# Read the lending_data.csv data
df = pd.read_csv("lending_data.csv")
df

Unnamed: 0,loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,loan_status
0,10700.0,7.672,52800,0.431818,5,1,22800,0
1,8400.0,6.692,43600,0.311927,3,0,13600,0
2,9000.0,6.963,46100,0.349241,3,0,16100,0
3,10700.0,7.664,52700,0.430740,5,1,22700,0
4,10800.0,7.698,53000,0.433962,5,1,23000,0
...,...,...,...,...,...,...,...,...
77531,19100.0,11.261,86600,0.653580,12,2,56600,1
77532,17700.0,10.662,80900,0.629172,11,2,50900,1
77533,17600.0,10.595,80300,0.626401,11,2,50300,1
77534,16300.0,10.068,75300,0.601594,10,2,45300,1


In [6]:
# Create labels (y) and features (X)
y = df["loan_status"]
X = df.drop(columns=["loan_status"])

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Create a Logistic Regression Model with the Original Data

In [8]:
# Fit a logistic regression model
logreg = LogisticRegression()
logreg.fit(X_train, y_train)

In [9]:
# Predictions using the testing data
y_pred = logreg.predict(X_test)

In [10]:
# Evaluate the model's performance
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [12]:
print("Confusion Matrix:")
print(conf_matrix)


Confusion Matrix:
[[18699    93]
 [   53   539]]


In [13]:
print("\nClassification Report:")
print(class_report)


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     18792
           1       0.85      0.91      0.88       592

    accuracy                           0.99     19384
   macro avg       0.93      0.95      0.94     19384
weighted avg       0.99      0.99      0.99     19384



In [28]:
print("Precision for label 0 (healthy loan): 1.00 (High precision indicates that very few healthy loans were incorrectly classified as high-risk.)")
print("")
print("-------------------------------------------------------------------------------------------------------------------------")
print("")
print("Recall for label 0: 0.99 (High recall indicates that the model correctly identified the majority of healthy loans.)")
print("")
print("-------------------------------------------------------------------------------------------------------------------------")
print("")
print("Precision for label 1 (high-risk loan): 0.85 (Precision indicates that 85% of loans classified as high-risk were actually high-risk.)")
print("")
print("-------------------------------------------------------------------------------------------------------------------------")
print("")
print("Recall for label 1: 0.91 (Recall indicates that the model correctly identified 91% of the high-risk loans.)")
print("")
print("-------------------------------------------------------------------------------------------------------------------------")
print("")
print("In summary, the logistic regression model performs really good for classifying healthy loans (label 0) with high precision and recall. It also performs well for identifying high-risk loans (label 1), but with slightly lower precision compared to recall. The model's ability to correctly predict both labels indicates its effectiveness in credit risk assessment")






Precision for label 0 (healthy loan): 1.00 (High precision indicates that very few healthy loans were incorrectly classified as high-risk.)

-------------------------------------------------------------------------------------------------------------------------

Recall for label 0: 0.99 (High recall indicates that the model correctly identified the majority of healthy loans.)

-------------------------------------------------------------------------------------------------------------------------

Precision for label 1 (high-risk loan): 0.85 (Precision indicates that 85% of loans classified as high-risk were actually high-risk.)

-------------------------------------------------------------------------------------------------------------------------

Recall for label 1: 0.91 (Recall indicates that the model correctly identified 91% of the high-risk loans.)

-------------------------------------------------------------------------------------------------------------------------

In sum

# Write a Credit Risk Analysis Report

In [29]:
# Answer in the README file