In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Step 1: Load your newly balanced dataset
df = pd.read_csv("midoriloan_balanced.csv")

# Step 2: Split features and target
X = df.drop("loan_status", axis=1)
y = df["loan_status"]

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Step 4: Train model with balanced weight
model = RandomForestClassifier(
    n_estimators=100,
    class_weight='balanced',
    random_state=42
)
model.fit(X_train, y_train)

# Step 5: Evaluate performance
y_pred = model.predict(X_test)
print("Classification Report:\n")
print(classification_report(y_test, y_pred))

# Step 6: Try a test input
new_user = [[30000, 500000, 550, 0, 0]]
pred = model.predict(new_user)
proba = model.predict_proba(new_user)[0]

print("\nPrediction:")
print("Loan Approved" if pred[0] == 1 else "Loan Rejected")
print("Confidence:", proba)


Classification Report:

              precision    recall  f1-score   support

           0       0.55      0.54      0.55       565
           1       0.58      0.58      0.58       600

    accuracy                           0.56      1165
   macro avg       0.56      0.56      0.56      1165
weighted avg       0.56      0.56      0.56      1165


Prediction:
Loan Rejected
Confidence: [0.98 0.02]




In [3]:
import joblib
joblib.dump(model, "loan_model.pkl")


['loan_model.pkl']