In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Load the dataset
data_path = 'loan_approval_dataset.csv'
df = pd.read_csv(data_path)

In [4]:
# Display the first few rows of the dataframe
print("Initial Data Preview:")
df.head()

Initial Data Preview:


Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,2,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected
3,4,3,Graduate,No,8200000,30700000,8,467,18200000,3300000,23300000,7900000,Rejected
4,5,5,Not Graduate,Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000,Rejected


In [5]:
# Identify categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns

In [6]:
# Convert categorical columns to numerical
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [8]:
# Display the first few rows after encoding
print("Data Preview After Encoding:")
df.head()

Data Preview After Encoding:


Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,0,0,9600000,29900000,12,778,2400000,17600000,22700000,8000000,0
1,2,0,1,1,4100000,12200000,8,417,2700000,2200000,8800000,3300000,1
2,3,3,0,0,9100000,29700000,20,506,7100000,4500000,33300000,12800000,1
3,4,3,0,0,8200000,30700000,8,467,18200000,3300000,23300000,7900000,1
4,5,5,1,1,9800000,24200000,20,382,12400000,8200000,29400000,5000000,1


In [9]:
# Split the dataset into features and target variable
X = df.drop(' loan_status', axis=1)
y = df[' loan_status']

In [10]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Convert data into DMatrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [12]:
# Parameters for XGBoost
params = {
    'max_depth': 6,
    'eta': 0.1,
    'objective': 'binary:logistic',
    'eval_metric': 'error'
}

In [13]:
# Train the model
print("Training the model...")
bst = xgb.train(params, dtrain, num_boost_round=100)

Training the model...


In [14]:
# Make predictions
print("Making predictions on the test set...")
preds = bst.predict(dtest)
predictions = [round(value) for value in preds]

Making predictions on the test set...


In [15]:
# Evaluate accuracy
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

In [16]:
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print(f'Recall: {recall * 100:.2f}%')
print(f'F1 Score: {f1 * 100:.2f}%')

Accuracy: 98.01%
Precision: 98.08%
Recall: 96.54%
F1 Score: 97.31%


In [17]:
# Placeholder for four-vector optimization algorithm
def four_vector_optimization(params):
    # Custom optimization logic here
    optimized_params = params
    # Example modification
    optimized_params['max_depth'] = 8
    optimized_params['eta'] = 0.05
    return optimized_params

In [18]:
# Optimize parameters using four-vector optimization
optimized_params = four_vector_optimization(params)

In [19]:
# Train the model with optimized parameters
print("Training the model with optimized parameters...")
bst_optimized = xgb.train(optimized_params, dtrain, num_boost_round=100)

Training the model with optimized parameters...


In [20]:
# Make predictions with optimized model
print("Making predictions on the test set with optimized model...")
preds_optimized = bst_optimized.predict(dtest)
predictions_optimized = [round(value) for value in preds_optimized]

Making predictions on the test set with optimized model...


In [21]:
# Evaluate accuracy of optimized model
accuracy_optimized = accuracy_score(y_test, predictions_optimized)
precision_optimized = precision_score(y_test, predictions_optimized)
recall_optimized = recall_score(y_test, predictions_optimized)
f1_optimized = f1_score(y_test, predictions_optimized)

In [22]:
print(f'Optimized Accuracy: {accuracy_optimized * 100:.2f}%')
print(f'Optimized Precision: {precision_optimized * 100:.2f}%')
print(f'Optimized Recall: {recall_optimized * 100:.2f}%')
print(f'Optimized F1 Score: {f1_optimized * 100:.2f}%')

Optimized Accuracy: 98.01%
Optimized Precision: 98.08%
Optimized Recall: 96.54%
Optimized F1 Score: 97.31%
