# Credit Risk Default Classification

This project builds a machine learning classification model to predict loan default risk.

## Overview
The objective is to estimate borrower default probability using structured financial data.

## Methodology
- Logistic Regression
- Decision Tree
- Confusion Matrix
- ROC-AUC evaluation

## Outcome
The project demonstrates application of statistical learning techniques to financial risk assessment problems in regulated lending environments.

## Tools
Python, Pandas, NumPy, Scikit-learn

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score


df = pd.read_csv('Task34.csv')


features = ['credit_lines_outstanding', 'loan_amt_outstanding', 'total_debt_outstanding', 'income', 'years_employed', 'fico_score']
X = df[features]
y = df['default']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)


y_pred_prob_log_reg = log_reg.predict_proba(X_test)[:, 1]
auc_log_reg = roc_auc_score(y_test, y_pred_prob_log_reg)


rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)


y_pred_prob_rf = rf_clf.predict_proba(X_test)[:, 1]
auc_rf = roc_auc_score(y_test, y_pred_prob_rf)

print(f'Logistic Regression AUC: {auc_log_reg}')
print(f'Random Forest AUC: {auc_rf}')

RECOVERY_RATE = 0.10

def predict_expected_loss(credit_lines_outstanding, loan_amt_outstanding, total_debt_outstanding, income, years_employed, fico_score):
    input_data = pd.DataFrame({
        'credit_lines_outstanding': [credit_lines_outstanding],
        'loan_amt_outstanding': [loan_amt_outstanding],
        'total_debt_outstanding': [total_debt_outstanding],
        'income': [income],
        'years_employed': [years_employed],
        'fico_score': [fico_score]
    })
 
    pd_default = rf_clf.predict_proba(input_data)[:, 1][0]
  
    expected_loss = pd_default * loan_amt_outstanding * (1 - RECOVERY_RATE)
    return expected_loss

Logistic Regression AUC: 0.9999784447023711
Random Forest AUC: 0.9998598905654121
