In [1]:

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
import tensorflow as tf
import pandas as pd
import numpy as np
import joblib
import pickle
from pathlib import Path

In [7]:

import pandas as pd
import joblib
import pickle
import tensorflow as tf
from pathlib import Path
import numpy as np
import xgboost as xgb
from catboost import CatBoostClassifier
import lightgbm as lgb
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression

df_new = pd.read_csv("Synthetic_SBA_Loans_test.csv")
preprocessor = joblib.load("saved_models/preprocessor.pkl")
model_files = {
    "CatBoost_model.pkl": "CatBoost",
    "GradientBoosting_model.pkl": "GradientBoosting",
    "LightGBM_model.pkl": "LightGBM",
    "LogisticRegression_model.pkl": "LogisticRegression",
    "meta_learner.pkl": "MetaLearner",
    "neural_net_model.h5": "NeuralNet",
    "RandomForest_model.pkl": "RandomForest",
    "xgboost_model.pkl": "XGBoost"
}

selected_columns = [
    'Business Ownership (1)', 'Business Ownership (2)', 'Business Ownership (3)',
    'Business Ownership (4)', 'Business Ownership (5)', 'Annual Revenue (2 years ago)',
    'Annual Revenue (1 year ago)', 'Annual Revenue (latest year)', 'Net Profit Margin',
    'Business Debt (2 years ago)', 'Business Debt (1 year ago)', 'Business Debt (latest year)',
    'NOI (2 years ago)', 'NOI (1 year ago)', 'NOI (latest year)', 'Managerial Experience',
    'Years in Business', 'Collateral Availability', 'Acquisition Request', 'Working Capital',
    'Business Expansion', 'Equipment Purchase or Leasing', 'Inventory Purchase',
    'Real Estate Acquisition or Improvement', 'Business Acquisition or Buyout',
    'Refinancing Existing Debt', 'Emergency Funds', 'Franchise Financing',
    'Contract Financing', 'Licensing or Permits', 'Line of Credit Establishment',
    'Eligibility Score'
]

df_new_cleaned = df_new[selected_columns]
X_df = pd.DataFrame(df_new_cleaned)

model_preds_dict = {}

for file_name, model_label in model_files.items():
    try:
        model_path = Path("saved_models") / file_name
        print(f"Predicting with {model_label}...")

        if file_name.endswith(".pkl"):
            with open(model_path, "rb") as f:
                model = joblib.load(f)

            if model_label == "MetaLearner":
                continue  # skip for now

            preds = model.predict(X_df)

        elif file_name.endswith(".h5"):
            keras_model = tf.keras.models.load_model(model_path)
            preds = keras_model.predict(X_df)
            preds = preds.argmax(axis=1) if preds.ndim > 1 else preds.round()

        df_new[f"{model_label}_Prediction"] = preds
        print(f"{model_label} prediction complete.")

    except Exception as e:
        print(f"Error loading or predicting with {model_label}: {e}")

# Meta Learner
try:
    nn_probs = df_new["NeuralNet_Prediction"].values
    xgb_probs = df_new["XGBoost_Prediction"].values
    lgb_probs = df_new["LightGBM_Prediction"].values
    meta_input = np.column_stack((nn_probs, xgb_probs,lgb_probs))
    with open("saved_models/meta_learner.pkl", "rb") as f:
        meta_learner = joblib.load(f)
    meta_preds = meta_learner.predict(meta_input)
    df_new["Meta_Learner_Prediction"] = meta_preds
    print(f"Meta prediction complete.")
except Exception as e:
    print(f"Meta learner error: {e}")

df_new.to_csv("Predicted_SBA_Loans.csv", index=False)
print("Predictions complete. Output saved to 'Predicted_SBA_Loans.csv'")




Predicting with CatBoost...
CatBoost prediction complete.
Predicting with GradientBoosting...
GradientBoosting prediction complete.
Predicting with LightGBM...
LightGBM prediction complete.
Predicting with LogisticRegression...
LogisticRegression prediction complete.
Predicting with MetaLearner...
Predicting with NeuralNet...
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
NeuralNet prediction complete.
Predicting with RandomForest...
RandomForest prediction complete.
Predicting with XGBoost...
XGBoost prediction complete.
Meta prediction complete.
Predictions complete. Output saved to 'Predicted_SBA_Loans.csv'


In [None]:
df = pd.read_csv("C:/Users/Jun/Desktop/VSCODE/Parlay/Predicted_SBA_Loans.csv")

eligibility_counts = df['XGBoost_Prediction'].value_counts()
eligibility_counts

XGBoost_Prediction
1    3472
0    2594
Name: count, dtype: int64

In [None]:
def check_pickle_header(filepath):
    with open(filepath, "rb") as f:
        first_bytes = f.read(8)  # read first 8 bytes
    print(f"{filepath}: {first_bytes}")
check_pickle_header("saved_models/LightGBM_model.pkl")

saved_models/LightGBM_model.pkl: b'\x80\x04\x95\xdb\x01\x00\x00\x00'


In [15]:
from tensorflow.keras.models import load_model

model = load_model("saved_models/neural_net_model.h5")
print(model.input_shape)  # Example output: (None, 32)



(None, 32)
