# Imports

In [6]:
%run Imports.ipynb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

name = 'Kred'

key_featsubgroups = pd.read_pickle('../pickle/2_FS/' + name + '/key_featsubgroups.pkl')
df = pd.read_pickle('../pickle/2_FS/' + name + '/2_df_new_.pkl')

In [2]:
print(df[target].value_counts()/df.shape[0])
print('df_shape: ', df.shape)

arrears
1   0.646
0   0.354
Name: count, dtype: float64
df_shape:  (129457, 418)


# 2) Create Model prediction functions

## 2.1) Split dataset into train/testing while excluding demographic features

In [3]:
def split_data_4(df, key_featsubgroups=key_featsubgroups, target=target, test_size=0.2, random_state=42):
    """
    Splits the dataset into training and testing sets while excluding demographic features.

    Parameters:
    df (DataFrame): The dataset containing features and target variable.
    key_featsubgroups (DataFrame): A mapping of feature subgroups.
    target (str): The name of the target variable.
    test_size (float, optional): The proportion of the dataset to allocate for testing. Default is 0.2.
    random_state (int, optional): Random seed for reproducibility. Default is 42.

    Returns:
    tuple: X_train, X_test, y_train, y_test (training and testing datasets)
    """

    # Extract demographic features
    demo_feat = key_featsubgroups.loc[key_featsubgroups['subgroup'] == 'demo', 'list_features'].values[0]
    print("Demographic Features:", demo_feat)

    # Separate features (X) and target variable (y), excluding demographic features
    X = df.drop(columns=[target] + demo_feat)
    y = df[target]

    # Split the dataset into training (80%) and testing (20%) sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    # Print dataset shapes
    print(f"Training Features Shape: {X_train.shape}")
    print(f"Training Labels Shape: {y_train.shape}")
    print(f"Testing Features Shape: {X_test.shape}")
    print(f"Testing Labels Shape: {y_test.shape}")

    return X_train, X_test, y_train, y_test

## 2.2) Model training and prediction

In [4]:
x_train, x_test, y_train, y_test = split_data_4(df)

Demographic Features: ['clientdata.demo.gender', 'clientdata.demo.age_year', 'clientdata.demo.age_month', 'clientdata.demo.children', 'clientdata.demo.children_singleparent', 'clientdata.demo.maritalstatus_expand_SINGLE', 'clientdata.demo.maritalstatus_expand_MARRIED', 'clientdata.demo.maritalstatus_expand_DIVORCED', 'clientdata.demo.maritalstatus_expand_WIDOWED', 'clientdata.demo.maritalstatus_expand_newvalue', 'clientdata.demo.maritalstatus_woe']
Training Features Shape: (103565, 406)
Training Labels Shape: (103565,)
Testing Features Shape: (25892, 406)
Testing Labels Shape: (25892,)


# 7) Neural Network

In [11]:
def nn(x_train, n_layers: int = 2, units: int = 64, loss='binary_crossentropy', metrics=['accuracy']):
    """
    Builds a neural network with a variable number of hidden Dense layers.

    Parameters:
    n_layers (int): Number of hidden Dense layers.
    units (int): Number of neurons in each hidden layer.

    Returns:
    model: The compiled Keras model.
    """
    layers = []

    # Add the first hidden layer with input shape
    layers.append(Dense(units, activation='relu', input_shape=(x_train.shape[1],)))

    # Add (n_layers - 1) more hidden layers
    for _ in range(n_layers - 1):
        layers.append(Dense(units, activation='relu'))

    # Add output layer (binary classification)
    layers.append(Dense(1, activation='sigmoid'))

    # Create and compile model
    model = Sequential(layers)
    model.compile(optimizer='adam', loss=loss, metrics=metrics)

    return model

## 7.1) NN without tuning 

In [12]:
model = Sequential([
    Dense(1, input_shape=(406,))
])

model.summary()

In [13]:
output = model(x_train)

## 7.2) NN with tuning

In [15]:
n = nn(x_train, n_layers=10)

In [16]:
n.summary()
n.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.5587 - loss: 3471.3469 - val_accuracy: 0.6396 - val_loss: 673.3017
Epoch 2/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.5973 - loss: 506.1803 - val_accuracy: 0.5248 - val_loss: 0.7627
Epoch 3/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6096 - loss: 479.8875 - val_accuracy: 0.6417 - val_loss: 0.6803
Epoch 4/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6333 - loss: 32.1241 - val_accuracy: 0.6419 - val_loss: 49.3318
Epoch 5/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6477 - loss: 1.6743 - val_accuracy: 0.6422 - val_loss: 0.6529
Epoch 6/10
[1m2590/2590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6446 - loss: 0.6538 - val_accuracy: 0.6420 - val_loss: 0.6522
Epoch

<keras.src.callbacks.history.History at 0x1f9cfebbb80>