In [4]:
#Code Source : DeepSeek 100%
#----------------------------

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks

# Load your dataset (replace with your actual data loading)
df = pd.read_csv('loan_data.csv')

# One-hot encode the categorical 'purpose' column
df = pd.get_dummies(df, columns=['purpose'], drop_first=True)

X = df.drop('not.fully.paid', axis=1)
y = df['not.fully.paid']



# For demonstration, I'll assume X and y are loaded

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Feature selection methods
def select_features(X, y, method='all', k=10):
    if method == 'anova':
        selector = SelectKBest(f_classif, k=k)
    elif method == 'mutual_info':
        selector = SelectKBest(mutual_info_classif, k=k)
    elif method == 'random_forest':
        model = RandomForestClassifier(n_estimators=100)
        model.fit(X, y)
        importances = model.feature_importances_
        indices = np.argsort(importances)[-k:]
        return indices
    else:  # 'all' - combine methods
        # ANOVA
        selector_anova = SelectKBest(f_classif, k=k)
        selector_anova.fit(X, y)
        anova_idx = selector_anova.get_support(indices=True)

        # Mutual Info
        selector_mi = SelectKBest(mutual_info_classif, k=k)
        selector_mi.fit(X, y)
        mi_idx = selector_mi.get_support(indices=True)

        # Random Forest
        model = RandomForestClassifier(n_estimators=100)
        model.fit(X, y)
        importances = model.feature_importances_
        rf_idx = np.argsort(importances)[-k:]

        # Combine all selected features
        combined_idx = np.union1d(anova_idx, mi_idx)
        combined_idx = np.union1d(combined_idx, rf_idx)
        return combined_idx

    selector.fit(X, y)
    return selector.get_support(indices=True)

# Select top features (adjust k based on your dataset size)
selected_features = select_features(X_train_scaled, y_train, method='all', k=15)

X_train_selected = X_train_scaled[:, selected_features]
X_test_selected = X_test_scaled[:, selected_features]




In [6]:
#----------- CREATE MODEL
# model architecture

def create_model(input_shape, output_units=1):
    model = keras.Sequential([
        layers.Input(shape=(input_shape,)),

        # First hidden layer with batch normalization
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        # Second hidden layer
        layers.Dense(64, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),

        # Third hidden layer
        layers.Dense(32, activation='relu'),
        layers.BatchNormalization(),

        # Output layer (adjust activation based on problem type)
        layers.Dense(output_units, activation='sigmoid' if output_units == 1 else 'softmax')
    ])

    # Compile model (adjust loss based on problem type)
    loss_fn = 'binary_crossentropy' if output_units == 1 else 'sparse_categorical_crossentropy'
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss=loss_fn,
        metrics=['accuracy']
    )

    return model


In [7]:
# Create model
model = create_model(input_shape=X_train_selected.shape[1],
                    output_units=1 if len(np.unique(y_train)) == 2 else len(np.unique(y_train)))


In [8]:
# training
# Define callbacks
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True,
    min_delta=0.001
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=1e-6
)


In [9]:
# Train the model
history = model.fit(
    X_train_selected, y_train,
    validation_data=(X_test_selected, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)


Epoch 1/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - accuracy: 0.6579 - loss: 0.6707 - val_accuracy: 0.8377 - val_loss: 0.4235 - learning_rate: 0.0010
Epoch 2/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8396 - loss: 0.4346 - val_accuracy: 0.8419 - val_loss: 0.4162 - learning_rate: 0.0010
Epoch 3/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8386 - loss: 0.4247 - val_accuracy: 0.8387 - val_loss: 0.4082 - learning_rate: 0.0010
Epoch 4/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8411 - loss: 0.4096 - val_accuracy: 0.8377 - val_loss: 0.4059 - learning_rate: 0.0010
Epoch 5/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8358 - loss: 0.4235 - val_accuracy: 0.8366 - val_loss: 0.4075 - learning_rate: 0.0010
Epoch 6/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [10]:

# eVAluate
# Evaluate the model
loss, accuracy = model.evaluate(X_test_selected, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Test Loss: 0.4046
Test Accuracy: 0.8387


In [11]:

# Hyperparameter tuning (optional)
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=(X_train_selected.shape[1],)))

    # Tune number of layers
    for i in range(hp.Int('num_layers', 1, 4)):
        model.add(layers.Dense(
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            activation='relu'
        ))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(
            hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)
        ))

    model.add(layers.Dense(1 if len(np.unique(y_train)) == 2 else len(np.unique(y_train)),
              activation='sigmoid' if len(np.unique(y_train)) == 2 else 'softmax'))

    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')),
        loss='binary_crossentropy' if len(np.unique(y_train)) == 2 else 'sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model



In [13]:
!pip install keras_tuner

Collecting keras_tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.7 kt-legacy-1.0.5


In [14]:

# Uncomment to run hyperparameter tuning
import keras_tuner as kt
tuner = kt.BayesianOptimization(
    build_model,
    objective='val_loss',
    max_trials=20,
    directory='tuning',
    project_name='feature_selected_model'
)
tuner.search(X_train_selected, y_train, epochs=50, validation_data=(X_test_selected, y_test))
best_model = tuner.get_best_models(num_models=1)[0]


Trial 3 Complete [00h 01m 16s]
val_loss: 0.40037262439727783

Best val_loss So Far: 0.40037262439727783
Total elapsed time: 00h 03m 34s

Search: Running Trial #4

Value             |Best Value So Far |Hyperparameter
4                 |4                 |num_layers
32                |96                |units_0
0.5               |0.3               |dropout_0
0.00010027        |0.00099596        |learning_rate
128               |256               |units_1
0.2               |0.3               |dropout_1
32                |32                |units_2
0.1               |0.2               |dropout_2
96                |32                |units_3
0.2               |0.1               |dropout_3

Epoch 1/50
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 25ms/step - accuracy: 0.5100 - loss: 0.8634 - val_accuracy: 0.7249 - val_loss: 0.6104
Epoch 2/50
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6106 - loss: 0.7039 - val_accuracy: 0.7991

KeyboardInterrupt: 