In [1]:
import pandas as pd
from tabnet.classifier import TabNetClassifier
from tabnet.autoencoder import TabNetAutoencoder
import tabnet_utils
from sklearn.model_selection import train_test_split
from datetime import datetime
import tensorflow
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score

In [11]:
df = pd.read_csv('/home/karim/Research/DSAA2022/UJIndoorLoc/trainingData.csv')
data = df.sample(frac=1).reset_index(drop=True)

In [12]:
data.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7510.613928,4864873.0,2,1,9,1,4,18,1371736047
1,100,100,100,100,100,100,100,100,100,100,...,100,-7339.153887,4864749.0,3,2,242,1,2,23,1371716926
2,100,100,100,100,100,100,100,100,100,100,...,100,-7310.70086,4864801.0,1,2,134,1,7,6,1371723203
3,100,100,100,100,100,100,100,100,100,100,...,100,-7560.377608,4864859.0,1,1,108,1,11,13,1371720906
4,100,100,100,100,100,100,100,100,100,100,...,100,-7321.027693,4864812.0,3,2,126,1,11,13,1371716554


In [13]:
data['BUILDINGID'].value_counts()

2    9492
0    5249
1    5196
Name: BUILDINGID, dtype: int64

In [14]:
# Remove column name 'A'
data = data.drop(['SPACEID', 'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'], axis = 1)  

In [15]:
_, types = tabnet_utils.convert_data_types(data) 
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,:-1], data.iloc[:,-1])
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [16]:
X_train.shape, X_valid.shape, X_test.shape

((11961, 523), (2991, 523), (4985, 523))

In [17]:
columns = [tabnet_utils.get_feature(f, types[k]) for k, f in X_train.iteritems()] 

In [18]:
y_train = tensorflow.keras.utils.to_categorical(y_train, 3)
y_valid = tensorflow.keras.utils.to_categorical(y_valid, 3)
y_test = tensorflow.keras.utils.to_categorical(y_test, 3)

In [19]:
train, valid, test = tabnet_utils.df_to_dataset(X_train, y_train), tabnet_utils.df_to_dataset(X_valid, y_valid), tabnet_utils.df_to_dataset(X_test, y_test) 

2022-05-10 18:19:39.766690: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-10 18:19:41.768375: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22840 MB memory:  -> device: 0, name: Quadro RTX 6000, pci bus id: 0000:17:00.0, compute capability: 7.5
2022-05-10 18:19:41.769258: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22822 MB memory:  -> device: 1, name: Quadro RTX 6000, pci bus id: 0000:73:00.0, compute capability: 7.5
2022-05-10 18:19:41.769875: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:G

In [20]:
config = tensorflow.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tensorflow.compat.v1.Session(config=config)

m = TabNetClassifier(feature_columns=columns, num_features=523, num_outputs=3, feature_dim=64, output_dim=32, num_decision_steps=2, relaxation_factor=1.2, 
                                  sparsity_coefficient=0.0001, batch_momentum=0.9, classifier_activation=tensorflow.keras.activations.softmax)

logdir = "/home/karim/Research/DSAA2022/logs/" + "/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=logdir)

m.compile(tensorflow.keras.optimizers.Adam(0.01), loss=tensorflow.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
m.fit(train, epochs=30, validation_data=valid, callbacks=[tensorboard_callback])

m.summary()

2022-05-10 18:19:52.774073: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22840 MB memory:  -> device: 0, name: Quadro RTX 6000, pci bus id: 0000:17:00.0, compute capability: 7.5
2022-05-10 18:19:52.774765: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22822 MB memory:  -> device: 1, name: Quadro RTX 6000, pci bus id: 0000:73:00.0, compute capability: 7.5
2022-05-10 18:19:52.775394: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 22840 MB memory:  -> device: 2, name: Quadro RTX 6000, pci bus id: 0000:a6:00.0, compute capability: 7.5


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Model: "tab_net_classifier"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 tab_net_encoder (TabNetEnco  multiple                 201700    
 der)                                                            
                                                                 
Total params: 201,700
Trainable params: 196,514
Non-trainable params: 5,186
_________________________________________________________________
Model: "tab_net_encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Par

In [21]:
y_pred = m.predict(test).round()

In [22]:
y_test_argmax = np.argmax(y_test, axis=1)
y_pred = np.argmax(y_pred, axis=1)

In [23]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test_argmax, y_pred, average='weighted'))

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
confusion_matrix = confusion_matrix(y_test_argmax, y_pred)
print(confusion_matrix)

print(classification_report(y_test_argmax, y_pred))

(0.9957925782121444, 0.9957873620862587, 0.995787902964338, None)
[[1290    1    3]
 [   6 1323    3]
 [   4    4 2351]]
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1294
           1       1.00      0.99      0.99      1332
           2       1.00      1.00      1.00      2359

    accuracy                           1.00      4985
   macro avg       1.00      1.00      1.00      4985
weighted avg       1.00      1.00      1.00      4985

