In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv('loan_approval_dataset.csv')
X = dataset.iloc[:, [4, 7]].values  # Assuming [5]=Credit Score, [6]=Salary
y = dataset.iloc[:, -1].values

# Encode target variable
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)  # Now y contains 0 and 1 instead of 'approved'/'denied'

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


# Save inverse-transformed versions for plotting
X_train_orig = sc.inverse_transform(X_train)
X_test_orig = sc.inverse_transform(X_test)

from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)


In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

# Predictions on training set
y_train_pred = classifier.predict(X_train)

# Confusion matrices
cm_train = confusion_matrix(y_train, y_train_pred)
cm_test = confusion_matrix(y_test, y_pred)

# Accuracy scores
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_pred)

print("Training Confusion Matrix:\n", cm_train)
print("Test Confusion Matrix:\n", cm_test)
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)


In [None]:
from matplotlib.colors import ListedColormap

# For decision boundary, stay in standardized space
X1, X2 = np.meshgrid(
    np.arange(start=X_train[:, 0].min() - 1, stop=X_train[:, 0].max() + 1, step=0.01),
    np.arange(start=X_train[:, 1].min() - 1, stop=X_train[:, 1].max() + 1, step=0.01)
)
grid = np.c_[X1.ravel(), X2.ravel()]
Z = classifier.predict(grid).reshape(X1.shape)

# Plot original (unscaled) values on axes
plt.figure(figsize=(10, 6))
plt.contourf(X1, X2, Z, alpha=0.75, cmap=ListedColormap(('red', 'green')))

# Use inverse-transformed data for scatter plot
plt.scatter(X_train_orig[:, 0], X_train_orig[:, 1], c=y_train, cmap=ListedColormap(('red', 'green')))
plt.title('Logistic Regression (Training set)')
plt.xlabel('Credit Score')
plt.ylabel('Salary')
plt.show()


In [None]:
X1, X2 = np.meshgrid(
    np.arange(start=X_test[:, 0].min() - 1, stop=X_test[:, 0].max() + 1, step=0.01),
    np.arange(start=X_test[:, 1].min() - 1, stop=X_test[:, 1].max() + 1, step=0.01)
)
grid = np.c_[X1.ravel(), X2.ravel()]
Z = classifier.predict(grid).reshape(X1.shape)

plt.figure(figsize=(10, 6))
plt.contourf(X1, X2, Z, alpha=0.75, cmap=ListedColormap(('red', 'green')))
plt.scatter(X_test_orig[:, 0], X_test_orig[:, 1], c=y_test, cmap=ListedColormap(('red', 'green')))
plt.title('Logistic Regression (Test set)')
plt.xlabel('Credit Score')
plt.ylabel('Salary')
plt.show()
