In [1]:
import pandas as pd
from tabnet.classifier import TabNetClassifier
from tabnet.autoencoder import TabNetAutoencoder
import tabnet_utils
from sklearn.model_selection import train_test_split
from datetime import datetime
import tensorflow
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score

In [3]:
df = pd.read_csv('Datasets/UJIndoorLoc/trainingData/trainingData.csv')
data = df.sample(frac=1).reset_index(drop=True)

In [4]:
data['BUILDINGID'].value_counts()
data = data.drop(['SPACEID', 'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'], axis = 1)  

In [5]:
data.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,-60,100,100,100,-7352.3397,4864838.0,3,2
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7560.377608,4864859.0,1,1
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7616.6889,4864976.0,3,0
3,100,100,100,100,100,100,100,100,100,100,...,100,100,-64,100,100,100,-7325.2341,4864818.0,2,2
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7609.2504,4864978.0,1,0


In [6]:
# Create a Pandas dataframe with all the features
X = data.loc[:, data.columns != 'BUILDINGID']
y = data['BUILDINGID']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
X_train.shape, X_test.shape, y_train.unique()

((13955, 523), (5982, 523), array([0, 2, 1]))

In [None]:
import numpy as np
import xgboost as xgb

xg_train = xgb.DMatrix(X_train, label=y_train)
xg_test = xgb.DMatrix(X_test, label=y_test)
# setup parameters for xgboost
param = {}
# use softmax multi-class classification
param['objective'] = 'multi:softmax'
# scale weight of positive examples
param['eta'] = 0.01
param['max_depth'] = 10
param['nthread'] = 10
param['num_class'] = 3

watchlist = [(xg_train, 'train'), (xg_test, 'test')]
num_round = 100
bst = xgb.train(param, xg_train, num_round, watchlist)
# get prediction
pred = bst.predict(xg_test)
error_rate = np.sum(pred != y_test) / y_test.shape[0]
print('Test error using softmax = {}'.format(error_rate))

# do the same thing again, but output probabilities
param['objective'] = 'multi:softprob'
bst = xgb.train(param, xg_train, num_round, watchlist)
# Note: this convention has been changed since xgboost-unity
# get prediction, this is in 1D array, need reshape to (ndata, nclass)
pred_prob = bst.predict(xg_test).reshape(y_test.shape[0], 3)
pred_label = np.argmax(pred_prob, axis=1)
error_rate = np.sum(pred_label != y_test) / y_test.shape[0]
print('Test error using softprob = {}'.format(error_rate))

[0]	train-mlogloss:1.08368	test-mlogloss:1.08368




[1]	train-mlogloss:1.06902	test-mlogloss:1.06902
[2]	train-mlogloss:1.05464	test-mlogloss:1.05463
[3]	train-mlogloss:1.04051	test-mlogloss:1.04050
[4]	train-mlogloss:1.02664	test-mlogloss:1.02663
[5]	train-mlogloss:1.01302	test-mlogloss:1.01301
[6]	train-mlogloss:0.99964	test-mlogloss:0.99963
[7]	train-mlogloss:0.98649	test-mlogloss:0.98648
[8]	train-mlogloss:0.97357	test-mlogloss:0.97356
[9]	train-mlogloss:0.96088	test-mlogloss:0.96086
[10]	train-mlogloss:0.94840	test-mlogloss:0.94838
[11]	train-mlogloss:0.93613	test-mlogloss:0.93611
[12]	train-mlogloss:0.92407	test-mlogloss:0.92405
[13]	train-mlogloss:0.91221	test-mlogloss:0.91219
[14]	train-mlogloss:0.90054	test-mlogloss:0.90052
[15]	train-mlogloss:0.88907	test-mlogloss:0.88905
[16]	train-mlogloss:0.87778	test-mlogloss:0.87776
[17]	train-mlogloss:0.86668	test-mlogloss:0.86666


In [33]:
y_test.shape

(2585,)

In [35]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test, pred_label, average='weighted'))

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
confusion_matrix = confusion_matrix(y_test, pred_label)
print(confusion_matrix)

print(classification_report(y_test, pred_label))

(0.9063193509758725, 0.9083172147001934, 0.9047232102840718, None)
[[ 25   0   0 ...   0   0   0]
 [  0  91   0 ...   1   0   0]
 [  0   0 286 ...   2   0   0]
 ...
 [  0   0   3 ... 117   1   0]
 [  0   0   0 ...   6   8   0]
 [  0   0   0 ...   0   1  23]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       0.88      0.85      0.86       107
           2       0.97      0.98      0.97       292
           3       0.92      0.77      0.84        74
           4       0.67      0.44      0.53         9
           5       0.77      0.89      0.82       113
           6       0.92      1.00      0.96        12
           7       0.75      0.33      0.46        36
           8       0.92      0.98      0.95        45
           9       0.80      0.89      0.84       108
          10       0.84      0.84      0.84        19
          11       0.93      0.94      0.94       133
          12       0.93      0.89     