# Imports

In [12]:
%run Imports.ipynb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

name = 'Kred'

key_featsubgroups = pd.read_pickle('../pickle/2_FS/' + name + '/key_featsubgroups.pkl')
df = pd.read_pickle('../pickle/2_FS/' + name + '/2_df_new_.pkl')

In [3]:
print(df[target].value_counts()/df.shape[0])
print('df_shape: ', df.shape)

arrears
1   0.646
0   0.354
Name: count, dtype: float64
df_shape:  (129457, 418)


# 2) Create Model prediction functions

## 2.1) Split dataset into train/testing while excluding demographic features

In [4]:
def split_data_4(df, key_featsubgroups=key_featsubgroups, target=target, test_size=0.2, random_state=42):
    """
    Splits the dataset into training and testing sets while excluding demographic features.

    Parameters:
    df (DataFrame): The dataset containing features and target variable.
    key_featsubgroups (DataFrame): A mapping of feature subgroups.
    target (str): The name of the target variable.
    test_size (float, optional): The proportion of the dataset to allocate for testing. Default is 0.2.
    random_state (int, optional): Random seed for reproducibility. Default is 42.

    Returns:
    tuple: X_train, X_test, y_train, y_test (training and testing datasets)
    """

    # Extract demographic features
    demo_feat = key_featsubgroups.loc[key_featsubgroups['subgroup'] == 'demo', 'list_features'].values[0]
    print("Demographic Features:", demo_feat)

    # Separate features (X) and target variable (y), excluding demographic features
    X = df.drop(columns=[target] + demo_feat)
    y = df[target]

    # Split the dataset into training (80%) and testing (20%) sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    # Print dataset shapes
    print(f"Training Features Shape: {X_train.shape}")
    print(f"Training Labels Shape: {y_train.shape}")
    print(f"Testing Features Shape: {X_test.shape}")
    print(f"Testing Labels Shape: {y_test.shape}")

    return X_train, X_test, y_train, y_test

## 2.2) Model training and prediction

In [5]:
x_train, x_test, y_train, y_test = split_data_4(df)

Demographic Features: ['clientdata.demo.gender', 'clientdata.demo.age_year', 'clientdata.demo.age_month', 'clientdata.demo.children', 'clientdata.demo.children_singleparent', 'clientdata.demo.maritalstatus_expand_SINGLE', 'clientdata.demo.maritalstatus_expand_MARRIED', 'clientdata.demo.maritalstatus_expand_DIVORCED', 'clientdata.demo.maritalstatus_expand_WIDOWED', 'clientdata.demo.maritalstatus_expand_newvalue', 'clientdata.demo.maritalstatus_woe']
Training Features Shape: (103565, 406)
Training Labels Shape: (103565,)
Testing Features Shape: (25892, 406)
Testing Labels Shape: (25892,)


# 7) Neural Network

In [None]:
def nn(x_train, n_layers: int = 2, units: int = 64, loss='binary_crossentropy', metrics=['accuracy', 'mse']):
    """
    Builds a neural network with a variable number of hidden Dense layers.

    Parameters:
    n_layers (int): Number of hidden Dense layers.
    units (int): Number of neurons in each hidden layer.

    Returns:
    model: The compiled Keras model.
    """
    layers = []

    # Add the first hidden layer with input shape
    layers.append(Dense(units, activation='relu', input_shape=(x_train.shape[1],)))

    # Add (n_layers - 1) more hidden layers
    for _ in range(n_layers - 1):
        layers.append(Dense(units, activation='relu'))

    # Add output layer (binary classification)
    layers.append(Dense(1, activation='sigmoid'))

    # Create and compile model
    model = Sequential(layers)
    model.compile(optimizer='adam', loss=loss, metrics=metrics, )

    return model

## 7.1) NN without tuning 

In [7]:
model = Sequential([
    Dense(1, input_shape=(406,))
])

model.summary()

E0000 00:00:1748435335.916288 1542535 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
E0000 00:00:1748435335.917306 1542535 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
E0000 00:00:1748435335.918241 1542535 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
E0000 00:00:1748435335.919188 1542535 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1748435335.972762 1542535 gpu_device.cc:2341] Cannot

In [8]:
output = model(x_train)

## 7.2) NN with tuning

In [None]:
n = nn(x_train, n_layers=100, )

In [11]:
n.summary()
n.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 13ms/step - accuracy: 0.6440 - loss: 0.6531 - val_accuracy: 0.6419 - val_loss: 0.6523
Epoch 2/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 12ms/step - accuracy: 0.6467 - loss: 0.6497 - val_accuracy: 0.6419 - val_loss: 0.6523
Epoch 3/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step - accuracy: 0.6472 - loss: 0.6493 - val_accuracy: 0.6419 - val_loss: 0.6528
Epoch 4/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step - accuracy: 0.6468 - loss: 0.6497 - val_accuracy: 0.6419 - val_loss: 0.6529
Epoch 5/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - accuracy: 0.6477 - loss: 0.6492 - val_accuracy: 0.6419 - val_loss: 0.6529
Epoch 6/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step - accuracy: 0.6460 - loss: 0.6502 - val_accuracy: 0.6419 - val_loss: 0.6523
Epoc

<keras.src.callbacks.history.History at 0x7f760c333e10>