In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
import base64
import requests
import json



# Load the Data

In [3]:

# Replace with your GitHub details
github_token = "your token"
repo_owner = "your username"
repo_name = "your repo name"
file_path = "data/Task 3 and 4_Loan_Data.csv"

# GitHub API URL
github_api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}"

# Request the file with authentication
headers = {"Authorization": f"token {github_token}"}
response = requests.get(github_api_url, headers=headers)

if response.status_code == 200:
    file_info = response.json()
    file_download_url = file_info['download_url']

    # Load the CSV file
    df = pd.read_csv(file_download_url)

else:
    print(f"❌ Error: Unable to fetch file. Status Code: {response.status_code}")


# Train the Machine Learning Model
We train the XGBoost model on the dataset and implement functions for:

- Probability of Default (PD)
- Expected Loss (EL)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

# ✅ Preprocess Data
df['DTI_ratio'] = df['total_debt_outstanding'] / df['income']  # Debt-to-Income Ratio
df.drop(columns=['customer_id'], inplace=True)  # Drop customer_id

# Define Features (X) and Target (y)
X = df.drop(columns=['default'])
y = df['default']

# Train-Test Split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale Numerical Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ✅ Train XGBoost Model
best_model = XGBClassifier(use_label_encoder=False, eval_metric="logloss")
best_model.fit(X_train_scaled, y_train)

print("✅ XGBoost Model Successfully Trained!")


Parameters: { "use_label_encoder" } are not used.



✅ XGBoost Model Successfully Trained!


# Implement Probability of Default (PD) & Expected Loss (EL) Functions

In [5]:
import numpy as np

def predict_pd(credit_lines, loan_amt, total_debt, income, years_employed, fico_score):
    """
    Function to predict Probability of Default (PD).
    """
    dti_ratio = total_debt / income  # Debt-to-Income Ratio

    # Create input feature array
    customer_features = np.array([[credit_lines, loan_amt, total_debt, income, years_employed, fico_score, dti_ratio]])

    # Scale input features
    customer_features_scaled = scaler.transform(customer_features)

    # Predict Probability of Default
    pd_value = best_model.predict_proba(customer_features_scaled)[:, 1][0]

    return pd_value

def expected_loss(credit_lines, loan_amt, total_debt, income, years_employed, fico_score):
    """
    Function to compute Expected Loss (EL) in GBP.
    """
    recovery_rate = 0.10  # 10% Recovery Rate
    pd_value = predict_pd(credit_lines, loan_amt, total_debt, income, years_employed, fico_score)
    el_value = pd_value * loan_amt * (1 - recovery_rate)

    return el_value


# Generate Predictions & Save to GitHub
We will:

1. Apply the model to the test set to generate PD & EL.
2. Save results to CSV.
3. Upload processed file back to GitHub.

In [6]:
# ✅ Apply Model to Test Data
test_results = X_test.copy()
test_results['PD'] = best_model.predict_proba(X_test_scaled)[:, 1]  # Probability of Default
test_results['EL'] = test_results['loan_amt_outstanding'] * test_results['PD'] * (1 - 0.10)  # Expected Loss (10% Recovery)

# ✅ Save Processed Data
output_filename = "loan_predictions.csv"
test_results.to_csv(output_filename, index=False)
print(f"✅ Predictions saved to {output_filename}")


✅ Predictions saved to loan_predictions.csv


# Upload Predictions to GitHub

In [9]:
import base64
import json

# ✅ Define upload details
upload_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/data/{output_filename}"

def upload_to_github(file_path, upload_url):
    with open(file_path, "rb") as file:
        content = file.read()
        encoded_content = base64.b64encode(content).decode("utf-8")  # Proper Base64 encoding

    # Check if file exists to get `sha`
    response = requests.get(upload_url, headers={"Authorization": f"Bearer {github_token}"})
    sha = response.json().get("sha", None) if response.status_code == 200 else None

    # ✅ Construct the payload
    payload = {
        "message": f"Updating {file_path}",
        "content": encoded_content,
        "branch": "main"
    }

    if sha:
        payload["sha"] = sha  # Required for updating existing file

    # ✅ Upload file
    response = requests.put(upload_url, headers={"Authorization": f"Bearer {github_token}"}, json=payload)

    if response.status_code in [200, 201]:
        print(f"✅ Successfully uploaded {file_path} to GitHub.")
    else:
        print(f"❌ Failed to upload {file_path}. Error: {response.text}")

# ✅ Upload predictions
upload_to_github(output_filename, upload_url)


✅ Successfully uploaded loan_predictions.csv to GitHub.
