### Load libraries and external data

In [28]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [29]:
import colored
from colored import stylize
from data import dataloader
import datetime
import json
from neuralnet import nn
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
#import pixiedust_node #v≥0.2.5

#### Load data from Firebase.
Requires [Firebase service account credentials](https://console.firebase.google.com/project/tingle-pilot-collected-data/settings/serviceaccounts/adminsdk) in JSON format saved in `./firebase-credentials`.

In [3]:
notepath = "data/notes.csv"
datapath = "data/pilot_data.csv"
corrections_path = "data/corrections.json"

if(
    os.path.exists(notepath) and
    os.path.exists(datapath)
):
    notes = pd.read_csv(notepath)
    pilot_data = pd.read_csv(datapath)
else:
    pilot_data, notes = dataloader.load_from_firebase(
        notes=True,
        start=datetime.datetime(2018,3,7),
        combine=True,
        marked=False
    )
    pilot_data.to_csv(
        datapath,
        index=False
    )
    notes.to_csv(
        notepath,
        index=False
    )

if os.path.exists(corrections_path):
    with open("data/corrections.json", "r") as c:
        corrections = json.load(c)
else:
    corrections = {}

In [133]:
corrections

{12: {'hand': {'R': {'value': 'L'}}},
 15: {'step': {30: {'column': 'step_c2'}}}}

In [4]:
pilot_data = dataloader.index_participants(pilot_data)
pilot_data_corrected = dataloader.correct_corrections(pilot_data, corrections)

In [5]:
data_points = {}
for i, r in pilot_data_corrected.iterrows():
    if r.participant not in data_points:
        data_points[
            r.participant
        ] = {
            k: {} for k in list(
                range(
                    1,
                    48
                )
            )
        }
    if r.coordinator1 == True and r.coordinator2 == True:
        data_points[
            r.participant
        ][
            r.step
        ][2] = data_points[
            r.participant
        ][
            r.step
        ].get(
            2,
            0
        ) + 1 
    elif r.coordinator1 == True or r.coordinator2 == True:
        data_points[
            r.participant
        ][
            r.step
        ][1] = data_points[
            r.participant
        ][
            r.step
        ].get(
            1,
            0
        ) + 1

In [33]:
for s in data_points:
    print(str(s))
    for t in data_points[s]:
        ttl = sum(
            data_points[
                s
            ][
                t
            ].values(
            )
        ) if t in data_points[
            s
        ] else str(
            0
        )
        t2 = str(t)
        ttl2 = str(ttl)
        print("{0}: {1}".format(
            t2,
            ttl2
        ), end="\t")
    print("\n")

1
1: 8	2: 12	3: 12	4: 17	5: 15	6: 16	7: 16	8: 16	9: 14	10: 15	11: 18	12: 15	13: 79	14: 15	15: 17	16: 15	17: 80	18: 14	19: 15	20: 17	21: 78	22: 11	23: 18	24: 19	25: 82	26: 14	27: 16	28: 15	29: 78	30: 13	31: 16	32: 15	33: 76	34: 82	35: 80	36: 79	37: 82	38: 86	39: 78	40: 78	41: 77	42: 29	43: 20	44: 21	45: 39	46: 36	47: 52	

2
1: 15	2: 16	3: 15	4: 16	5: 16	6: 16	7: 16	8: 17	9: 21	10: 18	11: 19	12: 18	13: 79	14: 16	15: 16	16: 16	17: 78	18: 18	19: 17	20: 16	21: 80	22: 13	23: 15	24: 17	25: 78	26: 14	27: 16	28: 16	29: 76	30: 16	31: 16	32: 18	33: 78	34: 82	35: 78	36: 77	37: 72	38: 75	39: 72	40: 77	41: 58	42: 29	43: 17	44: 19	45: 43	46: 37	47: 45	

3
1: 18	2: 23	3: 27	4: 20	5: 16	6: 17	7: 23	8: 17	9: 14	10: 31	11: 16	12: 18	13: 79	14: 18	15: 20	16: 18	17: 81	18: 12	19: 15	20: 18	21: 79	22: 15	23: 15	24: 13	25: 81	26: 16	27: 16	28: 14	29: 79	30: 16	31: 18	32: 16	33: 79	34: 77	35: 60	36: 81	37: 80	38: 79	39: 79	40: 75	41: 54	42: 27	43: 17	44: 17	45: 50	46: 41	47: 67	

4
1: 18	2: 17	3: 18	4: 15	5: 

#### Load [Synaptic](http://caza.la/synaptic/)
If "Error: Cannot find module 'synaptic'", create and run these two cells:

1. ```
cd neuralnet
```

2. ```sh
!npm init -y
!npm install -s synaptic
```

In [None]:
%%node
var lstm = require('../../tingle-pilot-study/neuralnet/lstm.js');

---
### See all targets, number of available samples and iteration blocks

In [None]:
with open("neuralnet/targets.json", 'r') as fp:
    targets = json.load(fp)

{target: training, [all offtarget], [all onbody offtarget], [all nontraining rotation], [all offbody]}

In [None]:
for target in list(pilot_data.target.unique()):
    ib =max(
        pilot_data.loc[
            pilot_data.target==target
        ]["step"].dropna()
    )
    print(": ".join([
        target,
        "{0} on-target samples in step {1}".format(
            str(len(pilot_data.loc[
                (pilot_data.target == target) &
                (pilot_data.ontarget)
            ])),
            "%.0f" % ib
        )
    ]))

---
### Extract training and testing data
Define targets of interest and corresponding offtargets

with open(
    'data/targets.json',
    'r'
) as fp:
    targets = json.load(
        fp
    )[0]

Set parameters for nn:

In [None]:
input_signals = [
    "distance",
    "thermopile1",
    "thermopile2",
    "thermopile3",
    "thermopile4"
]
n_samples = [300, 250, 200, 150, 100, 50]
steps = list(range(1, 48))

Get training inputs and outputs, inputs that should evaluate ~true and inputs that should evaluate ~false

In [None]:
targ = "eyebrow"
data = nn.define_trainer_data(
    pilot_data,
    {
        "target": [targets[targ]],
        "offtarget": targets[targ][1]
    },
    input_signals,
    steps,
    n_samples=n_samples[0]
)

Preview all inputs and training outputs

In [None]:
data

**Note**: These data take some time to copy across environments. Give the notebook some time between running cells across Python and JavaScript.

---
### Train and test

In [None]:
%%node
var networks = {};
for n_sample in n_samples:
    for (var target in data) {
      networks[target] = lstm.train_lstm([5,5,2,1], data[target]["train"], 0.06, 0.06, 3000);
    }

In [None]:
%%node
var test_outputs = {};
for (var target in data) {
    test_outputs[target] = {"true":[],"false":[]};
    for(var iteration=0; iteration < data[target]["test_true"].length; iteration++){
      test_outputs[target]["true"].push(networks[target].activate(data[target]["test_true"][iteration]));
      }
    for(var iteration=0; iteration < data[target]["test_false"].length; iteration++){
      test_outputs[target]["false"].push(networks[target].activate(data[target]["test_false"][iteration]));
    }
}

In [None]:
test_outputs

---
### See outputs

In [None]:
def calc_confusion(negative, positive):
    """
    Function to calculate a confusion matrix
    
    Parameters
    ----------
    negative: list of floats
        outputs of neural nets with true negative inputs
        
    positive: list of floats
        outputs of neural nets with true positive inputs
        
    Returns
    -------
    confusion: matrix of floats
        tn, fp, fn, tp
        see http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
    """
    ytrue = [
        *[
            0 for output in negative
        ],
        *[
            1 for output in positive
        ]
    ]
    ypredicted = [
        *[
            int(round(o)) for o in negative
        ],
        *[
            int(round(o)) for o in positive
        ]
    ]
    return(confusion_matrix(ytrue, ypredicted))

If the training is adequate x ≈ 0 ∀ x in the following:

In [None]:
f = {target: [
        outputs for outputs in test_outputs[target]['false']
] for target in targets}
f

If the training is adequate x ≈ 1 ∀ x in the following:

In [None]:
t = {target: [
        outputs for outputs in test_outputs[target]['true']
] for target in targets}
t

Finally, if training is adequate, f ≪ t:

In [None]:
f_mean = {
    target: np.mean(f[target]) for target in targets if target in f and len(f[target])
}
t_mean = {
    target: np.mean(t[target]) for target in targets if target in t and len(t[target])
}
for target in t_mean:
    print(target)
    print(
        "f = {0}\nt = {1}\n{0} ≪ {1} ?\n".format(
            str(f_mean[target]),
            str(t_mean[target])
        ) if f_mean[target] < t_mean[target] else "f = {0}\nt = {1}\n{2}".format(
            str(f_mean[target]),
            str(t_mean[target]),
            stylize(
                "Nope. f > t\n",
                colored.fg("red")
            )
        )
    )
for target in t_mean:
    print("{0}: f = {1:.4f} < t = {2:.4f}".format(
        target,
        f_mean[target],
        t_mean[target]
    ))