In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [6]:
data = pd.read_csv('loan-train.csv')

In [7]:
data.ffill(inplace=True)

label_cols = ['Gender','Married','Education','Self_Employed','Property_Area']
for col in label_cols:
    data[col] = LabelEncoder().fit_transform(data[col])

data.drop(['Loan_ID'], axis=1, inplace=True)
data['Dependents'] = data['Dependents'].replace('3+', 3).astype(int)

X = data.drop(['Loan_Status'], axis=1)
y = LabelEncoder().fit_transform(data['Loan_Status'])


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) 

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)

In [9]:
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.47      0.61        43
           1       0.77      0.96      0.86        80

    accuracy                           0.79       123
   macro avg       0.82      0.71      0.73       123
weighted avg       0.80      0.79      0.77       123

[[20 23]
 [ 3 77]]


In [10]:
import joblib
joblib.dump(clf, 'loan_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']