# Section Classification for WiFi Localization via Keras/Tensorflow
This notebook shows classification results of sections instead of zones. The classifier used for this task is the same as introduced in section '20180831_model_tuning/improved_classifier'.

## Prerequisites
- Python packages intalled
    - pandas
    - numpy
    - tensorflow
    - keras
    - scikit-learn
- Training data file 'fingerprints_gt_ver3_unpacked.csv' containing labelled data of WiFi fingerprints. (unpack it in the subdirectory '../data', if necessary)

In [31]:
import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import np_utils
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder

## Load data

In [63]:
df = pd.read_csv('~/data_sections/fingerprints_train_rev5.csv')
df = df.append(pd.read_csv('~/data_sections/fingerprints_test_rev5.csv'))

print(df.shape)
df.head(5)

(335663, 7)


Unnamed: 0.1,Unnamed: 0,fr_observation_time,fr_values,fr_mac_address_id,zo_name,se_name,prev_fp_idx
0,73426,2015-12-08 13:47:17,"{'236': '-81', '237': '-74'}",2002427,346,Anson's EG,73421.0
1,342941,2015-12-10 12:54:18,"{'59': '-81', '60': '-68', '62': '-78', '63': ...",2002427,507,Peter Polzer,342937.0
2,187205,2015-12-09 10:20:32,"{'74': '-70', '75': '-71', '76': '-77', '77': ...",2002427,303,Tesla,187198.0
3,215433,2015-12-09 12:07:59,"{'58': '-72', '65': '-66', '67': '-68', '172':...",2002427,262,ZARA EG,215427.0
4,4397,2015-12-08 10:15:45,"{'14': '-76', '16': '-79', '32': '-67', '33': ...",2002427,226,Vero Moda,4386.0


## Convert section name to IDs

In [64]:
le = preprocessing.LabelEncoder()
le.fit(df_train['se_name'])
classes = pd.Series(le.transform(df['se_name']))
print(classes.shape)
classes.head(5)

(335663,)


0      2
1    177
2    212
3    236
4    226
dtype: int64

## Unpack features (RSSI dictionary)

In [65]:
features = df['fr_values'].apply(lambda x : dict(eval(x)))
features = features.apply(pd.Series)
features.fillna(-100, inplace=True)
features.head(4)

Unnamed: 0,1,10,101,102,103,104,105,107,109,11,...,89,9,90,91,92,93,94,97,98,99
0,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,...,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100
1,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,...,-100,-100,-100,-100,-100,-74,-100,-100,-100,-100
2,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,...,-100,-100,-100,-68,-77,-83,-100,-100,-100,-100
3,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,...,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100


## Normalize data
Normilize features and split data into training, evaluation and test sets.

In [66]:
X = features.astype('float32')
Y = classes.astype('int')

std_scale = preprocessing.MinMaxScaler().fit(X)
X = std_scale.transform(X)

# split data for training
X_train, X_rest, y_train, y_rest = train_test_split(X, Y, stratify=Y, train_size=0.8, test_size=0.2)
y_train = y_train.to_frame()
y_rest = y_rest.to_frame()

# split rest data into eval and test sets
X_test, X_eval, y_test, y_eval = train_test_split(X_rest, y_rest, train_size=0.5, test_size=0.5)

print('Training data shape:', X_train.shape, y_train.shape)
print('Eval data shape:', X_eval.shape, y_eval.shape)
print('Test data shape:', X_test.shape, y_test.shape)

Training data shape: (268530, 261) (268530, 1)
Eval data shape: (33567, 261) (33567, 1)
Test data shape: (33566, 261) (33566, 1)


## Generate one-hot matrices of target zones

In [67]:
# convert 1-dimensional class arrays to one-hot class matrices
enc = OneHotEncoder(sparse=False)
enc.fit(Y.reshape(-1, 1))

Y_train = enc.transform(y_train.values.reshape(-1,1))
Y_eval = enc.transform(y_eval.values.reshape(-1,1))
Y_test = enc.transform(y_test.values.reshape(-1,1))
    
Y_train.shape, Y_eval.shape, Y_test.shape

  app.launch_new_instance()


((268530, 240), (33567, 240), (33566, 240))

## Classification

In [68]:
model = Sequential()
model.add(Dense(1000, input_dim=261, kernel_initializer='uniform', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1000, kernel_initializer='uniform', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(500, kernel_initializer='uniform', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(Y_train.shape[1], kernel_initializer='uniform', activation='softmax'))

adamOpt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.001, amsgrad=False)

model.compile(loss='categorical_crossentropy', optimizer=adamOpt, metrics=['categorical_accuracy'])
model.fit(X_train, Y_train, validation_data=(X_eval, Y_eval), batch_size=512, epochs=30, verbose=2)

Train on 268530 samples, validate on 33567 samples
Epoch 1/30
 - 72s - loss: 1.1052 - categorical_accuracy: 0.6454 - val_loss: 0.6487 - val_categorical_accuracy: 0.7705
Epoch 2/30
 - 71s - loss: 0.6360 - categorical_accuracy: 0.7795 - val_loss: 0.5352 - val_categorical_accuracy: 0.8134
Epoch 3/30
 - 71s - loss: 0.5324 - categorical_accuracy: 0.8138 - val_loss: 0.4755 - val_categorical_accuracy: 0.8334
Epoch 4/30
 - 70s - loss: 0.4700 - categorical_accuracy: 0.8346 - val_loss: 0.4462 - val_categorical_accuracy: 0.8428
Epoch 5/30
 - 71s - loss: 0.4258 - categorical_accuracy: 0.8506 - val_loss: 0.4214 - val_categorical_accuracy: 0.8538
Epoch 6/30
 - 71s - loss: 0.3919 - categorical_accuracy: 0.8615 - val_loss: 0.4111 - val_categorical_accuracy: 0.8573
Epoch 7/30
 - 71s - loss: 0.3644 - categorical_accuracy: 0.8709 - val_loss: 0.4003 - val_categorical_accuracy: 0.8616
Epoch 8/30
 - 71s - loss: 0.3413 - categorical_accuracy: 0.8778 - val_loss: 0.3937 - val_categorical_accuracy: 0.8639
Epoch

<keras.callbacks.History at 0x7feedb246be0>

## Results
### Accuracy 88%

In [69]:
print(model.metrics_names)
model.evaluate(X_test, Y_test, verbose=1)

['loss', 'categorical_accuracy']


[0.39417095220160697, 0.8805040814932954]

### F1 score 0.88
This relatively high F1 score of 0.88 is achieved also with good precision of 0.88.

Note: A significant reduction of the score for classes with little supported observations.

In [70]:
pred = model.predict(X_test, batch_size=32, verbose=2)
Y_predicted = np.argmax(pred, axis=1)
Y_truth = np.argmax(Y_test, axis=1)
report = classification_report(Y_truth, Y_predicted)
print(report)

             precision    recall  f1-score   support

          0       0.91      0.87      0.89       102
          1       0.86      0.87      0.87       111
          2       0.90      0.94      0.92       256
          3       0.95      0.88      0.92       135
          4       0.94      0.89      0.92        75
          5       0.85      0.89      0.87        99
          6       0.90      0.93      0.92       173
          7       0.90      0.93      0.91       139
          8       0.82      0.85      0.83       112
          9       0.95      0.85      0.90       103
         10       0.82      0.84      0.83        67
         11       0.80      0.76      0.78        59
         12       0.95      0.85      0.90       124
         13       0.84      0.81      0.82        57
         14       0.94      0.92      0.93        37
         15       0.81      0.60      0.69       106
         16       0.89      0.91      0.90       105
         17       0.82      0.70      0.75   