## Loading the libraries

In [8]:
!pip install xgboost

Collecting xgboost
  Using cached xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.2-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/150.0 MB 325.1 kB/s eta 0:07:42
   ---------------------------------------- 0.1/150.0 MB 465.5 kB/s eta 0:05:23
   ---------------------------------------- 0.1/150.0 MB 581.0 kB/s eta 0:04:18
   ---------------------------------------- 0.1/150.0 MB 581.0 kB/s eta 0:04:18
   ---------------------------------------- 0.1/150.0 MB 514.3 kB/s eta 0:04:52
   ---------------------------------------- 0.1/150.0 MB 514.3 kB/s eta 0:04:52
   ---------------------------------------- 0.1/150.0 MB 514.3 kB/s eta 0:04:52
   ---------------------------------------- 0.1/150.0 MB 514.3 kB/s eta 0:04:52
   ---------------------------------------- 0.2/150.0 MB 533.8 kB/s e

In [21]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib
import warnings
warnings.filterwarnings('ignore')

In [23]:
train = pd.read_csv('Training Dataset.csv')
test = pd.read_csv('Test Dataset.csv')

## Data Preprocessing and cleaning

In [26]:
# Fill missing values
for col in ['Gender', 'Married', 'Dependents', 'Self_Employed', 'Credit_History']:
    train[col].fillna(train[col].mode()[0], inplace=True)
    test[col].fillna(test[col].mode()[0], inplace=True)

train['LoanAmount'].fillna(train['LoanAmount'].median(), inplace=True)
test['LoanAmount'].fillna(test['LoanAmount'].median(), inplace=True)

train['Loan_Amount_Term'].fillna(train['Loan_Amount_Term'].mode()[0], inplace=True)
test['Loan_Amount_Term'].fillna(test['Loan_Amount_Term'].mode()[0], inplace=True)

train.drop('Loan_ID', axis=1, inplace=True)
test_ids = test['Loan_ID']
test.drop('Loan_ID', axis=1, inplace=True)

## Encoding categorical features

In [29]:
le = LabelEncoder()
cols = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Dependents']
for col in cols:
    train[col] = le.fit_transform(train[col])
    test[col] = le.transform(test[col])

# Target encoding
train['Loan_Status'] = train['Loan_Status'].map({'Y': 1, 'N': 0})

X = train.drop('Loan_Status', axis=1)
y = train['Loan_Status']

## Creating Model and it's evaluation

In [40]:
# Train XGBoost
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X, y)

train_preds = model.predict(X)
acc = accuracy_score(y, train_preds)
print(f"Training Accuracy: {acc * 100:.2f}")

Training Accuracy: 100.00


## Saving the model

In [43]:
joblib.dump(model, 'loan_eligibility_model.pkl')
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']

## Sample Output creating on test set

In [46]:

test_preds = model.predict(test)
output = pd.DataFrame({'Loan_ID': test_ids, 'Loan_Status': ['Y' if p==1 else 'N' for p in test_preds]})
output.to_csv('test_submission.csv', index=False)