In [1]:
# ===============================================================
# South Memphis Community Investment Fund (SMCIF)
# Loan/Funding Approval Probability Model - Demo Version
# ---------------------------------------------------------------
# I'm creating this prototype to show how we can predict the
# probability of approving a loan or business funding request.
# This version uses demo (synthetic) data just for structure.
# Once we get real data from SBA, CFPB, or partner institutions,
# we’ll retrain the model with actual business and approval data.
# ===============================================================


In [2]:
# Importing the essential libraries.
# These will help me generate data, build the model, and evaluate it.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report


In [3]:
# ---------------------------------------------------------------
# For now, I’m generating a synthetic dataset so I can test the model.
# Later, this section will be replaced with actual loan or funding data
# from SMCIF operations, government data (SBA), or local financial partners.
# ---------------------------------------------------------------

np.random.seed(42)
n_samples = 300  # sample size for demonstration

demo_data = pd.DataFrame({
    "annual_revenue": np.random.randint(50000, 500000, n_samples),
    "revenue_growth": np.random.uniform(-0.2, 0.5, n_samples),
    "debt_to_equity": np.random.uniform(0.1, 1.5, n_samples),
    "business_age": np.random.randint(1, 20, n_samples),
    "owner_experience": np.random.randint(0, 15, n_samples),
    "credit_score": np.random.randint(550, 800, n_samples),
    "num_employees": np.random.randint(1, 50, n_samples),
    "cashflow_coverage": np.random.uniform(0.5, 3.0, n_samples),
    "collateral_value": np.random.randint(10000, 200000, n_samples)
})

# Simulating an “approved” column (1 = approved, 0 = not approved)
# based on logical business indicators.
demo_data["approved"] = (
    (demo_data["credit_score"] > 650) &
    (demo_data["cashflow_coverage"] > 1.2) &
    (demo_data["revenue_growth"] > 0)
).astype(int)

demo_data.head()


Unnamed: 0,annual_revenue,revenue_growth,debt_to_equity,business_age,owner_experience,credit_score,num_employees,cashflow_coverage,collateral_value,approved
0,171958,0.458366,1.386762,15,11,667,24,1.18156,122990,0
1,196867,0.0783,0.454186,4,11,742,20,1.533873,79519,1
2,181932,0.162426,1.073576,12,12,571,37,0.804715,19503,0
3,415838,0.386397,0.205608,13,1,767,4,0.952873,105457,0
4,309178,0.272983,0.332702,2,2,669,28,2.202795,198938,1


In [4]:
# ---------------------------------------------------------------
# Splitting my dataset into features (X) and target (y).
# Then scaling (normalizing) the data to help the model perform better.
# ---------------------------------------------------------------

X = demo_data.drop(columns=["approved"])
y = demo_data["approved"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


In [5]:
# ---------------------------------------------------------------
# I’m using Logistic Regression for now because it’s interpretable
# and transparent — important for financial models and fairness.
# ---------------------------------------------------------------

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [6]:
# ---------------------------------------------------------------
# Checking how well my demo model performs using accuracy and ROC-AUC.
# Once we get real-world data, this will give us a more meaningful picture.
# ---------------------------------------------------------------

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("Demo Model Accuracy:", round(accuracy_score(y_test, y_pred), 3))
print("Demo ROC-AUC:", round(roc_auc_score(y_test, y_proba), 3))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Demo Model Accuracy: 0.867
Demo ROC-AUC: 0.923

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.93      0.91        44
           1       0.79      0.69      0.73        16

    accuracy                           0.87        60
   macro avg       0.84      0.81      0.82        60
weighted avg       0.86      0.87      0.86        60



In [7]:
# ---------------------------------------------------------------
# Trying out the model with a new business example.
# The output will show the probability of approval (in percentage).
# ---------------------------------------------------------------

new_business = pd.DataFrame({
    "annual_revenue": [220000],
    "revenue_growth": [0.15],
    "debt_to_equity": [0.6],
    "business_age": [5],
    "owner_experience": [8],
    "credit_score": [710],
    "num_employees": [6],
    "cashflow_coverage": [1.8],
    "collateral_value": [60000]
})

# Scale the input data (same scaling as the training data)
new_business_scaled = scaler.transform(new_business)

# Predict the approval probability
prob = model.predict_proba(new_business_scaled)[0][1]
print("Predicted Probability of Loan Approval:", round(prob * 100, 2), "%")


Predicted Probability of Loan Approval: 27.01 %


In [8]:
# ===============================================================
# WHEN WE GET REAL DATA (FUTURE STEP)
# ---------------------------------------------------------------
# 1. Remove the synthetic data generation section (Cell 3).
# 2. Load actual business and loan application data instead.
# 3. Make sure it includes the same kinds of columns (financial, credit, etc).
# 4. Retrain and revalidate the model with real-world outcomes.
# 5. Optionally, upgrade to XGBoost or LightGBM for higher accuracy
#    once we have 5,000+ records or complex patterns in data.
# ===============================================================
