In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split, validation_curve, learning_curve
from sklearn.preprocessing import StandardScaler, LabelEncoder
import shap
from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_auc_score, classification_report, confusion_matrix, roc_curve, auc, RocCurveDisplay
import matplotlib.pyplot as plt

In [None]:
# Загрузка данных из CSV файла
data = pd.read_csv('loan_approval_dataset.csv')

In [None]:
# Вывод информации о данных и первых пяти строках для проверки
data.info()
data.head()

In [None]:
# Удаление пробелов и приведение к нижнему регистру для категориальных переменных
data['education'] = data['education'].str.strip().str.lower()
data['self_employed'] = data['self_employed'].str.strip().str.lower()
data['loan_status'] = data['loan_status'].str.strip().str.lower()

In [None]:
# Преобразование категориальных переменных в числовые значения
data['education'] = data['education'].map({'graduate': 1, 'not graduate': 0})
data['self_employed'] = data['self_employed'].map({'yes': 1, 'no': 0})
data['loan_status'] = data['loan_status'].map({'approved': 1, 'rejected': 0})

# Отделение целевой переменной
X = data.drop(['loan_id', 'loan_status'], axis=1)
y = data['loan_status']

# Стандартизация данных
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Разделение на тренировочные и тестовые данные
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Преобразование в тензоры PyTorch
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

In [None]:
# Определение архитектуры нейронной сети
class CreditApprovalNet(nn.Module):
    def __init__(self):
        super(CreditApprovalNet, self).__init__()
        self.fc1 = nn.Linear(11, 8)  # Входной слой с 11 признаками
        self.fc2 = nn.Linear(8, 16)  # Скрытый слой с 16 нейронами
        self.fc3 = nn.Linear(16, 1)  # Выходной слой с 1 нейроном
        self.relu = nn.ReLU()  # Функция активации ReLU
        self.sigmoid = nn.Sigmoid()  # Функция активации Sigmoid для выходного слоя

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

In [None]:
# Инициализация модели, функции потерь и оптимизатора
model = CreditApprovalNet()
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# === Обучение модели ===
num_epochs = 50
batch_size = 16
train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor), batch_size=batch_size, shuffle=True)

for epoch in range(num_epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        # Обнуление градиентов
        optimizer.zero_grad()

        # Прямой проход
        outputs = model(batch_X)

        # Вычисление функции потерь
        loss = criterion(outputs, batch_y)

        # Обратное распространение
        loss.backward()

        # Обновление параметров
        optimizer.step()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

In [None]:
# Прогнозы на тестовых данных
model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_tensor).numpy()
    y_pred = (y_pred_prob >= 0.5).astype(int)

# Метрики
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
# ROC-кривая
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'r--')
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()


In [None]:
# Матрица ошибок
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
plt.title("Confusion Matrix")
sns.heatmap(cm, annot=True, fmt="d", cmap='Blues', xticklabels=['Rejected', 'Approved'], yticklabels=['Rejected', 'Approved'])
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.show()

In [None]:
# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.grid()
plt.show()

In [None]:
# Cumulative Gains Chart
sorted_indices = np.argsort(-y_pred_prob.flatten())
sorted_y_test = y_test.values[sorted_indices]
cumulative_positive = np.cumsum(sorted_y_test)

plt.figure(figsize=(8, 6))
plt.plot(np.arange(len(cumulative_positive)) / len(cumulative_positive),
         cumulative_positive / cumulative_positive[-1], label='Model', color='blue')
plt.plot([0, 1], [0, 1], linestyle='--', label='Random', color='red')
plt.xlabel('Percentage of Samples')
plt.ylabel('Percentage of Positives')
plt.title('Cumulative Gains Chart')
plt.legend()
plt.grid()
plt.show()

In [None]:
# Distribution of Predicted Probabilities
plt.figure(figsize=(8, 6))
plt.hist(y_pred_prob[y_test.values == 1], bins=20, alpha=0.7, label='Approved', color='green')
plt.hist(y_pred_prob[y_test.values == 0], bins=20, alpha=0.7, label='Rejected', color='red')
plt.xlabel('Predicted Probability')
plt.ylabel('Frequency')
plt.title('Distribution of Predicted Probabilities')
plt.legend()
plt.show()

In [None]:
# Сохранение модели
torch.save(model.state_dict(), 'credit_approval_net.pth')

In [None]:
# Загрузка модели и предсказание на новых данных
model = CreditApprovalNet()
model.load_state_dict(torch.load('credit_approval_net.pth'))
model.eval()

In [None]:
# Пример пользовательских данных
custom_data = {
    'no_of_dependents': [3],
    'education': [1],
    'self_employed': [0],
    'income_annum': [7000000],
    'loan_amount': [20000000],
    'loan_term': [15],
    'cibil_score': [750],
    'residential_assets_value': [5000000],
    'commercial_assets_value': [3000000],
    'luxury_assets_value': [15000000],
    'bank_asset_value': [8000000]
}

custom_data_df = pd.DataFrame(custom_data)
custom_data_scaled = scaler.transform(custom_data_df)
custom_data_tensor = torch.tensor(custom_data_scaled, dtype=torch.float32)

# Предсказание
with torch.no_grad():
    custom_prediction_prob = model(custom_data_tensor).item()
    custom_prediction = 1 if custom_prediction_prob >= 0.5 else 0

status = 'Одобрено' if custom_prediction == 1 else 'Отклонено'
print(f"Результат предсказания для пользовательских данных: {status}")
print(f"Вероятность: {custom_prediction_prob:.2f} для одобрения")