In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, classification_report
from sklearn.naive_bayes import GaussianNB

In [17]:
# Load the dataset
df = pd.read_csv("./Datasets/loan_approval_dataset.csv")

# Data Preparation

In [18]:
# Handle categorical encoding
categorical_columns = df.select_dtypes(include=["object"]).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [20]:
# Split the data into features (X) and target (y)
X = df.drop("Employment_Status", axis=1)  # Replace "Target" with the actual target column name
y = df["Employment_Status"]

In [21]:
# Standardize numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [22]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply Machine Learning Algorithms

In [23]:
from sklearn.neighbors import KNeighborsClassifier

# Train a KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict on the test set
y_pred_knn = knn.predict(X_test)

In [24]:
from sklearn.naive_bayes import GaussianNB

# Train a Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

# Predict on the test set
y_pred_nb = nb.predict(X_test)

In [25]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Train a Decision Tree model
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Train a Random Forest model
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build a simple deep learning model
model = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")
])

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Predict on the test set
y_pred_dl = (model.predict(X_test) > 0.5).astype(int).flatten()


KeyboardInterrupt



# model evaluation

In [29]:
def evaluate_model(y_test, y_pred, model_name):
    print(f"Evaluation Metrics for {model_name}:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")
    print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.2f}")
    print(f"Recall: {recall_score(y_test, y_pred, average='weighted'):.2f}")
    print("\n")

# Evaluate all models
evaluate_model(y_test, y_pred_knn, "K-Nearest Neighbors")
evaluate_model(y_test, y_pred_nb, "Naive Bayes")
evaluate_model(y_test, y_pred_dt, "Decision Tree")
evaluate_model(y_test, y_pred_rf, "Random Forest")

Evaluation Metrics for K-Nearest Neighbors:
Accuracy: 0.34
Confusion Matrix:
[[156 128  52]
 [148 125  56]
 [156 124  55]]
Precision: 0.34
Recall: 0.34


Evaluation Metrics for Naive Bayes:
Accuracy: 0.33
Confusion Matrix:
[[114 144  78]
 [118 149  62]
 [106 164  65]]
Precision: 0.33
Recall: 0.33


Evaluation Metrics for Decision Tree:
Accuracy: 0.33
Confusion Matrix:
[[103 116 117]
 [121 101 107]
 [ 87 122 126]]
Precision: 0.33
Recall: 0.33


Evaluation Metrics for Random Forest:
Accuracy: 0.33
Confusion Matrix:
[[ 98 149  89]
 [124 136  69]
 [117 121  97]]
Precision: 0.33
Recall: 0.33


