# Imports

In [1]:
import tensorflow as tf
# from tensorflow.python.framework.ops import disable_eager_execution 
# disable_eager_execution()
# from tensorflow.python.framework.ops import enable_eager_execution
# enable_eager_execution()
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Import common tensorflow layers and activations
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Layer
from tensorflow.keras.layers import Lambda, Multiply, Add, Rescaling
from tensorflow.keras.activations import relu, sigmoid, softmax
from tensorflow.keras import Model
from tensorflow.keras import Sequential
from tensorflow.keras import Input

from tabnet import TabNet
from tabnet import TabNetClassifier

# Tabnet experiment Hyperparams

In [3]:
BATCH_SIZE = 16384
LAMBDA = 0.0001
N_A = 64
N_D = 64
VIRTUAL_BATCH_SIZE = 512
BATCH_MOMENTUM = 0.7
N_STEPS = 5
GAMMA = 1.5
LEARNING_RATE = tf.keras.optimizers.schedules.ExponentialDecay(
    0.02,
    decay_steps=500,
    decay_rate=0.95
    )
OPTIMIZER = tf.keras.optimizers.Adam(LEARNING_RATE)

# Data Defn

In [4]:
# Load forest_cov data
forest_cov = pd.read_csv("./covtype.data")
float_cols = ['Elevation', 'Aspect', 'Slope', 'Hydrology_X', 'Hydrology_Y',
    'Roadways_X', 'Hillshade_9am', 'Hillshade_noon', 'Hillshade_3pm',
    'Firepoints_X']
bool_cols = [
    *[f"Wilderness_Area_{i}" for i in range(4)],
    *[f"Soil_Type_{i}" for i in range(40)]
    ]
response_cols = [
    'Cover_Type'
]
forest_cov.columns = [
    *float_cols,
    *bool_cols,
    *response_cols
    ]
outputs = ['Cover_Type']
response = 'Cover_Type'
forest_cov.drop([x for x in outputs if x != response], axis=1, inplace=True)


# Split data into train and test
data_train, data_test = train_test_split(forest_cov, test_size=0.3, random_state=42)
data_val, data_test = train_test_split(data_test, test_size=0.33, random_state=42)

# Split into X and y
X_train = data_train.drop(response, axis=1, errors='ignore')
y_train = data_train[response] - 1
X_val = data_val.drop(response, axis=1, errors='ignore')
y_val = data_val[response] - 1
X_test = data_test.drop(response, axis=1, errors='ignore')
y_test = data_test[response] - 1

# Fillna
for col in X_train.columns:
    if X_train[col].isna().sum() > 0:
        print(f"Column {col} has {X_train[col].isna().sum()} missing values")
    X_train[col].fillna(X_train[col].mean(), inplace=True)
    X_val[col].fillna(X_train[col].mean(), inplace=True)
    X_test[col].fillna(X_train[col].mean(), inplace=True)

# Scale input data - save scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Make tensorflow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train[..., np.newaxis]))
train_dataset = train_dataset.shuffle(buffer_size=4048).batch(BATCH_SIZE, drop_remainder=True)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val[..., np.newaxis]))
val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test[..., np.newaxis]))
test_dataset = test_dataset.batch(BATCH_SIZE, drop_remainder=True)

  train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train[..., np.newaxis]))
2023-05-11 13:09:04.685081: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-05-11 13:09:04.685112: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (al3615): /proc/driver/nvidia/version does not exist
2023-05-11 13:09:04.685487: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val[..., np.newaxis]))
  test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test[..., np.newaxis]))


In [7]:
forest_cov.shape, forest_cov.columns, forest_cov[[f"Wilderness_Area_{i}" for i in range(4)]].sum(axis=1).value_counts(), forest_cov[[f"Soil_Type_{i}" for i in range(40)]].sum(axis=1).value_counts()

((581011, 55),
 Index(['Elevation', 'Aspect', 'Slope', 'Hydrology_X', 'Hydrology_Y',
        'Roadways_X', 'Hillshade_9am', 'Hillshade_noon', 'Hillshade_3pm',
        'Firepoints_X', 'Wilderness_Area_0', 'Wilderness_Area_1',
        'Wilderness_Area_2', 'Wilderness_Area_3', 'Soil_Type_0', 'Soil_Type_1',
        'Soil_Type_2', 'Soil_Type_3', 'Soil_Type_4', 'Soil_Type_5',
        'Soil_Type_6', 'Soil_Type_7', 'Soil_Type_8', 'Soil_Type_9',
        'Soil_Type_10', 'Soil_Type_11', 'Soil_Type_12', 'Soil_Type_13',
        'Soil_Type_14', 'Soil_Type_15', 'Soil_Type_16', 'Soil_Type_17',
        'Soil_Type_18', 'Soil_Type_19', 'Soil_Type_20', 'Soil_Type_21',
        'Soil_Type_22', 'Soil_Type_23', 'Soil_Type_24', 'Soil_Type_25',
        'Soil_Type_26', 'Soil_Type_27', 'Soil_Type_28', 'Soil_Type_29',
        'Soil_Type_30', 'Soil_Type_31', 'Soil_Type_32', 'Soil_Type_33',
        'Soil_Type_34', 'Soil_Type_35', 'Soil_Type_36', 'Soil_Type_37',
        'Soil_Type_38', 'Soil_Type_39', 'Cover_Type'],


In [6]:
test_dataset.element_spec

(TensorSpec(shape=(16384, 54), dtype=tf.float64, name=None),
 TensorSpec(shape=(16384, 1), dtype=tf.int64, name=None))

# Model creation and training

In [7]:
tabnet = TabNet(
    dim_features=X_train.shape[1],
    dim_attention=128,
    dim_output=64,
    sparsity=0.0001,
    num_steps=5,
    gamma=1.5,
    feature_shared_layers=2,
    feature_transformer_layers=2,
    output_activation=None
    )

tabnet.compile( 
    optimizer=tf.keras.optimizers.Adam(
        learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=0.02,
            decay_steps=500,
            decay_rate=0.95
        ), 
    ), 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[
        tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
    ]
)
tabnet.build(X_train[:1].shape)

In [8]:
tabnet.summary()

Model: "tab_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
shared_feature_layer (Shared multiple                  90624     
_________________________________________________________________
feat_0 (FeatureTransformer)  multiple                  190720    
_________________________________________________________________
feat_1 (FeatureTransformer)  multiple                  190720    
_________________________________________________________________
feat_2 (FeatureTransformer)  multiple                  190720    
_________________________________________________________________
feat_3 (FeatureTransformer)  multiple                  190720    
_________________________________________________________________
feat_4 (FeatureTransformer)  multiple                  190720    
_________________________________________________________________
feat_5 (FeatureTransformer)  multiple                  1907

In [9]:
X_train.shape[1]

54

In [10]:
import tensorflow as tf
import tabnet
from importlib import reload
reload(tabnet)

from tabnet import TabNetClassifier

online_implementation = TabNetClassifier(
    feature_columns=None,
    num_classes=7,
    output_dim=64,
    feature_dim=128,
    num_features=X_train.shape[1],
    num_decision_steps=5,
    relaxation_factor=1.5,
    sparsity_coefficient=0.0001,
    virtual_batch_size=512,
    norm_type="batch",
    batch_momentum=0.7,
)

online_implementation.compile(
    optimizer=tf.keras.optimizers.Adam(
        learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=0.02,
            decay_steps=500,
            decay_rate=0.95
        ),
    ),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=[
        tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
    ]
)
online_implementation(next(iter(train_dataset))[0])

[TabNet]: 64 features will be used for decision steps.


2023-05-05 17:56:01.339649: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 175697424 exceeds 10% of free system memory.
2023-05-05 17:56:01.983108: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


<tf.Tensor: shape=(16384, 7), dtype=float32, numpy=
array([[0.12565674, 0.16860135, 0.14593603, ..., 0.1298019 , 0.11464456,
        0.14293875],
       [0.14519764, 0.14022234, 0.14879064, ..., 0.13947473, 0.14394005,
        0.1384698 ],
       [0.14177747, 0.1417857 , 0.14354606, ..., 0.14288153, 0.14341688,
        0.14415672],
       ...,
       [0.14239503, 0.14264832, 0.14400764, ..., 0.14283682, 0.14187367,
        0.14437875],
       [0.14314249, 0.14363249, 0.14279087, ..., 0.14159256, 0.14391036,
        0.14236593],
       [0.1424368 , 0.14261298, 0.14288384, ..., 0.14241643, 0.14342001,
        0.14298274]], dtype=float32)>

In [11]:

history_online = online_implementation.fit(
    train_dataset,
    epochs=130000,
    validation_data=val_dataset,
    # callbacks=[early_stopping, lr_on_plateau]
)


2023-05-05 17:56:02.710597: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 175697424 exceeds 10% of free system memory.
2023-05-05 17:56:02.790012: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 175697424 exceeds 10% of free system memory.


Epoch 1/130000

InvalidArgumentError:  Input to reshape is a tensor with 3453696 values, but the requested shape requires a multiple of 131072
	 [[node tab_net_classifier/tab_net_1/transform_block/transformblock_bn_f1/Reshape (defined at home/alexander/projects/deep_learning_vs_gbdt/venv/lib/python3.9/site-packages/tabnet/tabnet.py:34) ]] [Op:__inference_train_function_15214]

Errors may have originated from an input operation.
Input Source operations connected to node tab_net_classifier/tab_net_1/transform_block/transformblock_bn_f1/Reshape:
 tab_net_classifier/tab_net_1/transform_block/transformblock_dense_f1/MatMul (defined at home/alexander/projects/deep_learning_vs_gbdt/venv/lib/python3.9/site-packages/tabnet/tabnet.py:33)

Function call stack:
train_function


In [None]:
# early_stopping = tf.keras.callbacks.EarlyStopping(patience=30, restore_best_weights=True, monitor="val_auc")
# lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5, min_lr=1e-5)
history = tabnet.fit(
    train_dataset, 
    epochs=130000, 
    validation_data=val_dataset, 
    # callbacks=[early_stopping, lr_on_plateau]
    )

In [None]:
tabnet.history.history.keys()

In [None]:
# Plot history loss and RMSE for training and validation set; train solid line, validation dashed line
fig, (top_ax, bottom_ax) = plt.subplots(2, 1, figsize=(10, 10), sharex=True)

hist = tabnet.history.history

top_ax.plot(hist['loss'], label='train_loss', c='b')
top_ax.plot(hist['val_loss'], label='val_loss', linestyle='--', c='b')
second_ax = top_ax.twinx()
# Plot precision and recall on second axis in orange and red respectively
second_ax.plot(hist['precision'], label='train_precision', c='orange')
second_ax.plot(hist['val_precision'], label='val_precision', linestyle='--', c='orange')
second_ax.plot(hist['recall'], label='train_recall', c='r')
second_ax.plot(hist['val_recall'], label='val_recall', linestyle='--', c='r')
top_ax.set_ylabel('loss')
second_ax.set_ylabel('Precision/Recall')
# Merge top ax legend entries
handles, labels = top_ax.get_legend_handles_labels()
handles2, labels2 = second_ax.get_legend_handles_labels()
top_ax.legend(handles + handles2, labels + labels2)


bottom_ax.plot(hist['lr'], label='lr', c='g')
bottom_ax.set_xlabel('Epoch')
bottom_ax.set_ylabel('Learning rate')

# New plot with auc and accuracy
fig, ax = plt.subplots()
ax.plot(hist['auc'], label='train_auc', c='orange')
ax.plot(hist['val_auc'], label='val_auc', linestyle='--', c='orange')
twinax = ax.twinx()
twinax.plot(hist['binary_accuracy'], label='train_accuracy', c='r')
twinax.plot(hist['val_binary_accuracy'], label='val_accuracy', linestyle='--', c='r')
ax.set_xlabel('Epoch')
ax.set_ylabel('AUC')
twinax.set_ylabel('Accuracy')
# Merge legend entries
handles, labels = ax.get_legend_handles_labels()
twinhandles, twinlabels = twinax.get_legend_handles_labels()
ax.legend(handles + twinhandles, labels + twinlabels)




In [None]:
# Evaluate model on test set
tabnet.evaluate(test_dataset)