<a href="https://colab.research.google.com/github/Codes-of-Hermit/Finance_codes/blob/main/Credit_Risk_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# 1. Load and Preprocess Data
from google.colab import files
uploaded = files.upload()
def load_data(filename='Task 3 and 4_Loan_Data.csv'):
    # Load the dataframe
    df = pd.read_csv(filename)
    return df

# 1. Call the function to get the dataframe
df = load_data()

X = df.drop(['customer_id', 'default'], axis=1)
y = df['default']

# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (Standardization is important for Logistic Regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. Train the Model (Logistic Regression)
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Validation (Optional print of accuracy)
print(f"Model Accuracy on Test Set: {accuracy_score(y_test, model.predict(X_test_scaled)):.4f}")

# 3. The Expected Loss Function
def calculate_expected_loss(loan_properties, model, scaler):
    """
    Takes in the properties of a loan and outputs the expected loss.

    Args:
        loan_properties (dict): A dictionary containing the loan details.
            Keys: 'credit_lines_outstanding', 'loan_amt_outstanding',
                  'total_debt_outstanding', 'income', 'years_employed', 'fico_score'.
        model: The trained LogisticRegression model.
        scaler: The StandardScaler used during training.

    Returns:
        float: The expected loss in currency units.
    """
    # Convert input dictionary to DataFrame
    input_df = pd.DataFrame([loan_properties])

    # Ensure columns order matches training
    feature_cols = ['credit_lines_outstanding', 'loan_amt_outstanding',
                    'total_debt_outstanding', 'income', 'years_employed', 'fico_score']
    input_df = input_df[feature_cols]

    # Scale features using the trained scaler
    features_scaled = scaler.transform(input_df)

    # 1. Probability of Default (PD)
    # predict_proba returns [prob_0, prob_1]; we want prob_1 (default)
    pd_probability = model.predict_proba(features_scaled)[:, 1][0]

    # 2. Loss Given Default (LGD)
    # Recovery rate is 10%, so we lose 90% of the value
    recovery_rate = 0.10
    lgd = 1 - recovery_rate

    # 3. Exposure at Default (EAD)
    # This is the current outstanding loan amount
    ead = loan_properties['loan_amt_outstanding']

    # Calculate Expected Loss: EL = PD * LGD * EAD
    expected_loss = pd_probability * lgd * ead

    return expected_loss

# --- Example Usage ---
new_loan = {
    'credit_lines_outstanding': 2,
    'loan_amt_outstanding': 5000.00,
    'total_debt_outstanding': 8000.00,
    'income': 70000.00,
    'years_employed': 5,
    'fico_score': 650
}

loss = calculate_expected_loss(new_loan, model, scaler)
print(f"Expected Loss for this loan: ${loss:.2f}")

Saving Task 3 and 4_Loan_Data.csv to Task 3 and 4_Loan_Data (1).csv
Model Accuracy on Test Set: 0.9955
Expected Loss for this loan: $0.03
