## Imports

In [1]:
# Pytorch
import torch
from torch.nn import CrossEntropyLoss, Conv2d, Linear, ReLU, Sequential, Sigmoid, Tanh, Softmax, MSELoss
from torch.optim import SGD, Adam

# helper
from os.path import join
import numpy as np
from math import sqrt

# Visualization
import pandas as pd

## Data acquisition

In [2]:
DATA_FOLDER = 'datasets/'
WIFI_DATA_PATH = join(DATA_FOLDER, 'wifi_localization.txt')

In [3]:
with open(WIFI_DATA_PATH) as file:
    lines = file.readlines()
    data = [list(map(int, line.rstrip('\n').split('\t'))) for line in lines]

## Data exploration

In [4]:
data_pd = pd.DataFrame(data, columns=['wifi_1', 'wifi_2', 'wifi_3', 'wifi_4',
                            'wifi_5', 'wifi_6', 'wifi_7', 'room'])
data_pd.head()

Unnamed: 0,wifi_1,wifi_2,wifi_3,wifi_4,wifi_5,wifi_6,wifi_7,room
0,-64,-56,-61,-66,-71,-82,-81,1
1,-68,-57,-61,-65,-71,-85,-85,1
2,-63,-60,-60,-67,-76,-85,-84,1
3,-61,-60,-68,-62,-77,-90,-80,1
4,-63,-65,-60,-63,-77,-81,-87,1


In [5]:
data_pd.describe()

Unnamed: 0,wifi_1,wifi_2,wifi_3,wifi_4,wifi_5,wifi_6,wifi_7,room
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,-52.3305,-55.6235,-54.964,-53.5665,-62.6405,-80.985,-81.7265,2.5
std,11.321677,3.417688,5.316186,11.471982,9.105093,6.516672,6.519812,1.118314
min,-74.0,-74.0,-73.0,-77.0,-89.0,-97.0,-98.0,1.0
25%,-61.0,-58.0,-58.0,-63.0,-69.0,-86.0,-87.0,1.75
50%,-55.0,-56.0,-55.0,-56.0,-64.0,-82.0,-83.0,2.5
75%,-46.0,-53.0,-51.0,-46.0,-56.0,-77.0,-78.0,3.25
max,-10.0,-45.0,-40.0,-11.0,-36.0,-61.0,-63.0,4.0


## Data processing

In [6]:
N = len(data)
split_ratio = 0.8
split_index = int(split_ratio * N)

In [7]:
data_torch = torch.tensor(data)
data_torch

tensor([[-64, -56, -61,  ..., -82, -81,   1],
        [-68, -57, -61,  ..., -85, -85,   1],
        [-63, -60, -60,  ..., -85, -84,   1],
        ...,
        [-62, -59, -46,  ..., -87, -88,   4],
        [-62, -58, -52,  ..., -90, -85,   4],
        [-59, -50, -45,  ..., -88, -87,   4]])

In [8]:
# generate random indices to split the data
random_indices = np.random.permutation(2000)
train_indices = random_indices[:split_index]
test_indices = random_indices[split_index:]

# split the data into train and test samples
train_data = data_torch[train_indices]
test_data = data_torch[test_indices]

# separate the input columns from the target column
train_input = train_data[:, :7].float()
train_target = train_data[:, 7] - 1
test_input = test_data[:, :7].float()
test_target = test_data[:, 7] - 1

## Train and evaluation functions

In [9]:
criterion = CrossEntropyLoss()
batch_size = 10
nb_epochs = 25

In [10]:
def train_model(model, train_input, train_target, nb_epochs=nb_epochs, batch_size=batch_size):
    """Train a model"""
    
    optimizer = Adam(model.parameters())

    for _ in range(nb_epochs):
        for b in range(0, train_input.size(0), batch_size):
            output = model(train_input.narrow(0, b, batch_size))
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

In [11]:
def compute_nb_errors(model, data_input, data_target):
    """Compute the number of mismatched predictions"""
    
    nb_errors = 0
    for b in range(0, data_input.size(0), batch_size):
        output = model(data_input.narrow(0, b, batch_size))
        _, predictions = output.max(1)
        nb_errors += (predictions != data_target.narrow(0, b, batch_size)).sum()
        
    return int(nb_errors)

In [12]:
def weight_reset(m):
    """Reinitilize every linear and convolutional layer parameters in the model"""
    if isinstance(m, Conv2d) or isinstance(m, Linear):
        m.reset_parameters()

In [13]:
def train_and_evaluate(model, nb_trials=10):
    p_errs = torch.zeros(nb_trials)
    
    for i in range(nb_trials):
        model.apply(weight_reset)
        
        # train
        train_model(model, train_input, train_target)

        # evaluate
        n_err = compute_nb_errors(model, test_input, test_target)
        p_err = n_err / N
        p_errs[i] = p_err
    
    p_err_mean = p_errs.mean().item()
    std = p_errs.std().item()
    ci_95 = 1.96 * std / sqrt(nb_trials)
    
    return {'error_rate': p_err_mean,
            'std': std,
            'confidence_interval_95': ci_95
           }

In [14]:
model = Sequential(
    Linear(7, 4)
)

In [15]:
train_and_evaluate(model)

{'error_rate': 0.011749999597668648,
 'std': 0.0016372401732951403,
 'confidence_interval_95': 0.0010147719727709198}