In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('/content/Task 3 and 4_Loan_Data.csv')

In [3]:
df.head()

Unnamed: 0,customer_id,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score,default
0,8153374,0,5221.545193,3915.471226,78039.38546,5,605,0
1,7442532,5,1958.928726,8228.75252,26648.43525,2,572,1
2,2256073,0,3363.009259,2027.83085,65866.71246,4,602,0
3,4885975,0,4766.648001,2501.730397,74356.88347,5,612,0
4,4700614,1,1345.827718,1768.826187,23448.32631,6,631,0


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
# Convert default to binary (0/1)
df['default'] = df['default'].apply(lambda x: 1 if x else 0)


# Define features (X) and target (y)
X = df.drop(['default', 'customer_id'], axis=1)
y = df['default']


# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Scale data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier()
}


# Train and evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    print(f"Model: {name}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print()



Model: Logistic Regression
Accuracy: 0.996
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1652
           1       0.99      0.98      0.99       348

    accuracy                           1.00      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       1.00      1.00      1.00      2000

Confusion Matrix:
[[1650    2]
 [   7  341]]

Model: Decision Tree
Accuracy: 0.994
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1652
           1       0.98      0.99      0.98       348

    accuracy                           0.99      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       0.99      0.99      0.99      2000

Confusion Matrix:
[[1645    7]
 [   5  343]]

Model: Random Forest
Accuracy: 0.995
Classification Report:
              precision    recall  f1-score   support

           0 

In [6]:
def calculate_expected_loss(
    loan_amt_outstanding,
    total_debt_outstanding,
    income,
    years_employed,
    fico_score,
    credit_lines_outstanding,
    model
):
    """
    Calculate expected loss for a loan.

    Args:
        loan_amt_outstanding (float): Loan amount outstanding.
        total_debt_outstanding (float): Total debt outstanding.
        income (float): Income.
        years_employed (int): Years employed.
        fico_score (int): FICO score.
        credit_lines_outstanding (int): Credit lines outstanding.
        model: Trained model.

    Returns:
        float: Expected loss.
    """
    # Scale input data
    scaler = StandardScaler()
    data = scaler.fit_transform([
        [loan_amt_outstanding, total_debt_outstanding, income, years_employed, fico_score, credit_lines_outstanding]
    ])

    # Predict probability of default
    prob_default = model.predict_proba(data)[:, 1][0]

    # Calculate expected loss
    expected_loss = prob_default * loan_amt_outstanding

    return expected_loss


# Example usage
loan_amt_outstanding = 5000.0
total_debt_outstanding = 20000.0
income = 50000.0
years_employed = 5
fico_score = 600
credit_lines_outstanding = 2

# Choose a trained model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

expected_loss = calculate_expected_loss(
    loan_amt_outstanding,
    total_debt_outstanding,
    income,
    years_employed,
    fico_score,
    credit_lines_outstanding,
    model
)

print(f"Expected Loss: {expected_loss:.2f}")


Expected Loss: 0.01
