In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# RNN

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score, classification_report

In [3]:
# Укажем путь к данным
DATA_PATH = r"/content/drive/MyDrive/home-credit-default-risk"

# Загружаем данные
app_train = pd.read_csv(os.path.join(DATA_PATH, "application_train.csv"))

# Выбираем только числовые признаки
num_features = app_train.select_dtypes(include=[np.number]).drop(columns=["TARGET", "SK_ID_CURR"])

# Заполняем пропущенные значения медианными
imputer = SimpleImputer(strategy='median')
num_features_imputed = pd.DataFrame(imputer.fit_transform(num_features), columns=num_features.columns)

# Добавляем новые признаки
num_features_imputed['CREDIT_INCOME_RATIO'] = num_features_imputed['AMT_CREDIT'] / (num_features_imputed['AMT_INCOME_TOTAL'] + 1)
num_features_imputed['ANNUITY_INCOME_RATIO'] = num_features_imputed['AMT_ANNUITY'] / (num_features_imputed['AMT_INCOME_TOTAL'] + 1)
num_features_imputed['DAYS_EMPLOYED_RATIO'] = num_features_imputed['DAYS_EMPLOYED'] / (num_features_imputed['DAYS_BIRTH'] + 1)

In [4]:
# Масштабируем данные
scaler = StandardScaler()
X = scaler.fit_transform(num_features_imputed)
y = app_train["TARGET"]

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [5]:
# Создание модели нейронной сети
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),  # Dropout для борьбы с переобучением
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Выходной слой для бинарной классификации
])

# Компиляция модели
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Обучение модели
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=512, verbose=1)

Epoch 1/20
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - AUC: 0.6234 - loss: 0.3260 - val_AUC: 0.7228 - val_loss: 0.2568
Epoch 2/20
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - AUC: 0.7025 - loss: 0.2652 - val_AUC: 0.7281 - val_loss: 0.2547
Epoch 3/20
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - AUC: 0.7099 - loss: 0.2605 - val_AUC: 0.7330 - val_loss: 0.2534
Epoch 4/20
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - AUC: 0.7208 - loss: 0.2586 - val_AUC: 0.7348 - val_loss: 0.2531
Epoch 5/20
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - AUC: 0.7225 - loss: 0.2575 - val_AUC: 0.7366 - val_loss: 0.2524
Epoch 6/20
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - AUC: 0.7259 - loss: 0.2546 - val_AUC: 0.7366 - val_loss: 0.2522
Epoch 7/20
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/

In [7]:
# Предсказания
y_prob = model.predict(X_test).flatten()
y_pred = (y_prob > 0.5).astype(int)

[1m1922/1922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step


In [8]:
# Оценка качества модели
auc_score = roc_auc_score(y_test, y_prob)
print(f"AUC-ROC Score (Neural Network): {auc_score:.4f}")
print(classification_report(y_test, y_pred))

AUC-ROC Score (Neural Network): 0.7391
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     56538
           1       0.43      0.00      0.00      4965

    accuracy                           0.92     61503
   macro avg       0.68      0.50      0.48     61503
weighted avg       0.88      0.92      0.88     61503

