## Imports

In [1]:
# Pytorch
import torch
from torch.nn import CrossEntropyLoss, Conv2d, Linear, ReLU, Sequential, Sigmoid, Tanh, Softmax, MSELoss
from torch.optim import SGD, Adam

# helper
from os.path import join
import numpy as np
from math import sqrt

# Visualization
import pandas as pd

## Data acquisition

In [2]:
DATA_FOLDER = 'datasets/'
BC_NEW_DENSE_FOLDER = join(DATA_FOLDER, 'bc_new_dense')
ZIG_ZAG2_PATH = join(BC_NEW_DENSE_FOLDER, 'zig_zag2.csv')
ANCHORS_PATH = join(BC_NEW_DENSE_FOLDER, 'anchors_bc_atrium_dense.csv')

In [3]:
data_raw = pd.read_csv(ZIG_ZAG2_PATH)
data_raw

Unnamed: 0,timestamp,device_id,system_id,anchor_id,px,py,pz,theta_x,theta_y,theta_z,...,dist_var,txpower,rssi,ble_channel,calib_param_tx,calib_param_n,acc_x,acc_y,acc_z,is_step_detected
0,1557062000727,14954135790684542069,7586,,,,,0.947485,0.055430,1.289884,...,,,,,,,-0.275427,8.064031,5.166058,0.0
1,1557062000731,16616973326163504182,7592,dc:8b:28:54:d7:8d,,,,,,,...,0.178929,,-71.0,,,,,,,
2,1557062000741,14954135790684542069,7586,,,,,0.947991,0.051225,1.293103,...,,,,,,,-0.519719,8.078402,5.486990,0.0
3,1557062000748,16616973326163504182,7592,b8:08:cf:a0:bf:1d,,,,,,,...,0.822649,,-61.0,,,,,,,
4,1557062000748,16616973326163504182,7582,1-250,,,,,,,...,,-77.0,-83.0,,-77.0,2.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22874,1557062212254,28486,115200,0x6F3F,3.75000,7.111000,1.300000,,,,...,,,-80.0,,,,,,,
22875,1557062210445,16616973326163504182,7592,dc:8b:28:54:d7:f1,,,,,,,...,5.866084,,-65.0,,,,,,,
22876,1557062210447,14954135790684542069,7586,,,,,1.068343,-0.014032,-1.169306,...,,,,,,,-1.175955,9.929751,4.260740,0.0
22877,1557062212306,28486,115200,0x6F2B,3.87000,6.940000,1.300000,,,,...,,,-86.0,,,,,,,


In [4]:
data_anchors = pd.read_csv(ANCHORS_PATH)
data_anchors

Unnamed: 0,system_id,anchor_id,px,py,pz,theta_x,theta_y,theta_z,scale_x,scale_y
0,115200,0x6F2B,15.107,25.247,1.812,,,,,
1,115200,0x6F29,2.055,45.65,1.878,,,,,
2,115200,0x6F13,13.827,7.928,1.905,,,,,
3,115200,0x6F3F,1.935,3.726,1.997,,,,,
4,7581,u1uwhdqdy4vx2q0,0.0,10.49,1.61,1.5708,0.0,1.5708,1.05,0.63
5,7581,9iaj4ym9u2shxdh,6.539,0.96,1.555,1.5708,0.0,3.14159,0.575,0.42
6,7581,xehph6l77u7smwo,0.0,18.708,1.435,1.5708,0.0,1.5708,0.42,0.59
7,7581,53saut0wvhrpeyk,13.848,7.367,1.456,1.5708,0.0,-1.5708,0.35,0.54
8,7592,dc:8b:28:54:da:bc,0.225,30.847,0.936,,,,,
9,7592,dc:8b:28:54:d8:05,2.725,7.314,5.16,,,,,


In [7]:
a = data_anchors.fillna(0)

In [12]:
b = a.iloc[:,3:]

In [15]:
torch.FloatTensor(b.values).shape

torch.Size([22, 7])

## Data exploration and cleaning

In [39]:
data_raw.columns

Index(['timestamp', 'device_id', 'system_id', 'anchor_id', 'px', 'py', 'pz',
       'theta_x', 'theta_y', 'theta_z', 'dx', 'dy', 'dz', 'distance',
       'dist_error', 'dist_var', 'txpower', 'rssi', 'ble_channel',
       'calib_param_tx', 'calib_param_n', 'acc_x', 'acc_y', 'acc_z',
       'is_step_detected'],
      dtype='object')

In [40]:
columns_of_interest = ['timestamp', 'system_id', 'px', 'py', 'pz', 'distance', 'rssi', 'anchor_id']

In [41]:
data_of_interest = data_raw[columns_of_interest]
data_of_interest.head()

Unnamed: 0,timestamp,system_id,px,py,pz,distance,rssi,anchor_id
0,1557062000727,7586,,,,,,
1,1557062000731,7592,,,,43.299,-71.0,dc:8b:28:54:d7:8d
2,1557062000741,7586,,,,,,
3,1557062000748,7592,,,,10.181,-61.0,b8:08:cf:a0:bf:1d
4,1557062000748,7582,,,,1.995262,-83.0,1-250


In [42]:
data_of_interest.describe()

Unnamed: 0,timestamp,system_id,px,py,pz,distance,rssi
count,22879.0,22879.0,9543.0,9543.0,9543.0,10075.0,10075.0
mean,1557062000000.0,41433.665326,6.686781,13.26327,1.260599,17.860901,-82.573995
std,60588.79,49968.774968,3.190699,8.130622,0.09649,10.951266,14.081788
min,1557062000000.0,7581.0,0.0,0.0,0.30112,0.281838,-109.0
25%,1557062000000.0,7586.0,4.090456,7.307947,1.3,9.785,-93.0
50%,1557062000000.0,7586.0,6.574,11.994,1.3,16.0,-87.0
75%,1557062000000.0,115200.0,9.119,19.859157,1.3,23.6235,-72.0
max,1557062000000.0,115200.0,24.328,30.862,1.571681,64.387,0.0


In [43]:
data_anchors.anchor_id.unique()

array(['0x6F2B', '0x6F29', '0x6F13', '0x6F3F', 'u1uwhdqdy4vx2q0',
       '9iaj4ym9u2shxdh', 'xehph6l77u7smwo', '53saut0wvhrpeyk',
       'dc:8b:28:54:da:bc', 'dc:8b:28:54:d8:05', 'dc:8b:28:54:d7:f1',
       'dc:8b:28:54:d7:d8', 'dc:8b:28:54:e2:ff', 'dc:8b:28:54:d7:8d',
       'b8:08:cf:a0:bf:1d', '1-250', '2-235', '2-231', '1-249', '1-248',
       '17eba1f6-bd0c-2b8b-8559-75e8b171730e',
       '17eba1fb-bd0c-2b8b-8530-fcfec019262c'], dtype=object)

## Data processing

In [6]:
N = len(data)
split_ratio = 0.8
split_index = int(split_ratio * N)

In [7]:
data_torch = torch.tensor(data)
data_torch

tensor([[-64, -56, -61,  ..., -82, -81,   1],
        [-68, -57, -61,  ..., -85, -85,   1],
        [-63, -60, -60,  ..., -85, -84,   1],
        ...,
        [-62, -59, -46,  ..., -87, -88,   4],
        [-62, -58, -52,  ..., -90, -85,   4],
        [-59, -50, -45,  ..., -88, -87,   4]])

In [8]:
# generate random indices to split the data
random_indices = np.random.permutation(2000)
train_indices = random_indices[:split_index]
test_indices = random_indices[split_index:]

# split the data into train and test samples
train_data = data_torch[train_indices]
test_data = data_torch[test_indices]

# separate the input columns from the target column
train_input = train_data[:, :7].float()
train_target = train_data[:, 7] - 1
test_input = test_data[:, :7].float()
test_target = test_data[:, 7] - 1

## Train and evaluation functions

In [9]:
criterion = CrossEntropyLoss()
batch_size = 10
nb_epochs = 25

In [10]:
def train_model(model, train_input, train_target, nb_epochs=nb_epochs, batch_size=batch_size):
    """Train a model"""
    
    optimizer = Adam(model.parameters())

    for _ in range(nb_epochs):
        for b in range(0, train_input.size(0), batch_size):
            output = model(train_input.narrow(0, b, batch_size))
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

In [11]:
def compute_nb_errors(model, data_input, data_target):
    """Compute the number of mismatched predictions"""
    
    nb_errors = 0
    for b in range(0, data_input.size(0), batch_size):
        output = model(data_input.narrow(0, b, batch_size))
        _, predictions = output.max(1)
        nb_errors += (predictions != data_target.narrow(0, b, batch_size)).sum()
        
    return int(nb_errors)

In [12]:
def weight_reset(m):
    """Reinitilize every linear and convolutional layer parameters in the model"""
    if isinstance(m, Conv2d) or isinstance(m, Linear):
        m.reset_parameters()

In [13]:
def train_and_evaluate(model, nb_trials=10):
    p_errs = torch.zeros(nb_trials)
    
    for i in range(nb_trials):
        model.apply(weight_reset)
        
        # train
        train_model(model, train_input, train_target)

        # evaluate
        n_err = compute_nb_errors(model, test_input, test_target)
        p_err = n_err / N
        p_errs[i] = p_err
    
    p_err_mean = p_errs.mean().item()
    std = p_errs.std().item()
    ci_95 = 1.96 * std / sqrt(nb_trials)
    
    return {'error_rate': p_err_mean,
            'std': std,
            'confidence_interval_95': ci_95
           }

In [14]:
model = Sequential(
    Linear(7, 4)
)

In [15]:
train_and_evaluate(model)

{'error_rate': 0.011749999597668648,
 'std': 0.0016372401732951403,
 'confidence_interval_95': 0.0010147719727709198}