In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle

# Load dataset
df = pd.read_csv("Preprocessed_Loan_Default.csv")
df = pd.get_dummies(df, drop_first=True)

# Define features and target
X = df.drop('Loan_Status_Non-Default', axis=1)
y = df['Loan_Status_Non-Default']

# Scale features to [0,1] for chi2
X_scaled_for_chi2 = MinMaxScaler().fit_transform(X)

# Select top 5 features using Chi-Square
selector = SelectKBest(score_func=chi2, k=5)
X_selected = selector.fit_transform(X_scaled_for_chi2, y)
selected_columns = X.columns[selector.get_support()].tolist()

print("Top 5 features selected using Chi-Square:")
print(selected_columns)

# Use only selected features from original (non-scaled) X
X_top5 = X[selected_columns]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_top5, y, test_size=0.25, random_state=0)

# Standard scaling for model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Naive Bayes model
model = GaussianNB()
model.fit(X_train_scaled, y_train)

# Evaluation
y_pred = model.predict(X_test_scaled)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Save model, scaler, and feature names
with open("naive_bayes_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

with open("feature_names.pkl", "wb") as f:
    pickle.dump(selected_columns, f)


Top 5 features selected using Chi-Square:
['Credit_Score', 'Interest_Rate', 'Gender_Female', 'Employment_Status_Employed', 'Location_Urban']
Confusion Matrix:
 [[   0  501]
 [   0 1999]]
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       501
           1       0.80      1.00      0.89      1999

    accuracy                           0.80      2500
   macro avg       0.40      0.50      0.44      2500
weighted avg       0.64      0.80      0.71      2500

Accuracy Score: 0.7996


  'precision', 'predicted', average, warn_for)


In [3]:
import pandas as pd
import pickle

# Load model, scaler, and selected feature names
with open("naive_bayes_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

with open("feature_names.pkl", "rb") as f:
    feature_names = pickle.load(f)

# Create a sample user input — only provide actual values for relevant features
user_input = {
    'Income': 1,
    'Credit_Score': 2,
    'Existing_Loan_Balance': 5,
    'Loan_Amount': 3,
    'Interest_Rate': 1,
    # Ensure dummy variables are provided if selected
    'Employment_Status_Employed': 1,
    'Gender_Female': 0,
    'Location_Urban': 1
}

# Create DataFrame from input
user_df = pd.DataFrame([user_input])

# Ensure all required features are present (add any missing ones with 0)
for col in feature_names:
    if col not in user_df.columns:
        user_df[col] = 0  # default to 0 for dummy variables

# Reorder to match training feature order
user_df = user_df[feature_names]

# Scale user input
user_scaled = scaler.transform(user_df)

# Predict
prediction = model.predict(user_scaled)[0]
probability = model.predict_proba(user_scaled)[0][1]

# Display result
result = "Non-Default" if prediction == 1 else "Default"
print(f"Loan Prediction: {result}")
print(f"Probability of Non-Default: {probability:.2f}")


Loan Prediction: Non-Default
Probability of Non-Default: 0.79
