### Load libraries and external data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import colored
from colored import stylize
from data import dataloader
import datetime
import json
from neuralnet import nn
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import pixiedust_node #v≥0.2.5

Pixiedust database opened successfully


#### Load data from Firebase.
Requires [Firebase service account credentials](https://console.firebase.google.com/project/tingle-pilot-collected-data/settings/serviceaccounts/adminsdk) in JSON format saved in `./firebase-credentials`.

In [3]:
notepath = "data/notes.csv"
datapath = "data/pilot_data.csv"
corrections_path = "data/corrections.json"

if(
    os.path.exists(notepath) and
    os.path.exists(datapath)
):
    notes = pd.read_csv(notepath)
    pilot_data = pd.read_csv(datapath)
else:
    pilot_data, notes = dataloader.load_from_firebase(
        notes=True,
        start=datetime.datetime(2018,3,7),
        combine=True,
        marked=False
    )
    pilot_data.to_csv(
        datapath,
        index=False
    )
    notes.to_csv(
        notepath,
        index=False
    )

if os.path.exists(corrections_path):
    with open("data/corrections.json", "r") as c:
        corrections = json.load(c)
else:
    corrections = {}

pixiedust_node 0.2.5 started. Cells starting '%%node' may contain Node.js code.


In [4]:
corrections

{'12': {'hand': {'R': {'value': 'L'}}},
 '15': {'step': {'30': {'column': 'step_c2'}}}}

In [5]:
pilot_data = dataloader.index_participants(pilot_data)
pilot_data_corrected = dataloader.correct_corrections(pilot_data, corrections)

In [None]:
data_points = {}
for i, r in pilot_data_corrected.iterrows():
    if r.participant not in data_points:
        data_points[
            r.participant
        ] = {
            k: {} for k in list(
                range(
                    1,
                    48
                )
            )
        }
    if r.coordinator1 == True and r.coordinator2 == True:
        data_points[
            r.participant
        ][
            r.step
        ][2] = data_points[
            r.participant
        ][
            r.step
        ].get(
            2,
            0
        ) + 1 
    elif r.coordinator1 == True or r.coordinator2 == True:
        data_points[
            r.participant
        ][
            r.step
        ][1] = data_points[
            r.participant
        ][
            r.step
        ].get(
            1,
            0
        ) + 1

In [None]:
for s in data_points:
    print(str(s))
    for t in data_points[s]:
        ttl = sum(
            data_points[
                s
            ][
                t
            ].values(
            )
        ) if t in data_points[
            s
        ] else str(
            0
        )
        t2 = str(t)
        ttl2 = str(ttl)
        print("{0}: {1}".format(
            t2,
            ttl2
        ), end="\t")
    print("\n")

#### Load [Synaptic](http://caza.la/synaptic/)
If "Error: Cannot find module 'synaptic'", create and run these two cells:

1. ```
cd neuralnet
```

2. ```sh
!npm init -y
!npm install -s synaptic
```

In [None]:
%%node
var lstm = require('../../tingle-pilot-study/neuralnet/lstm.js');

---
### See all targets, number of available samples and iteration blocks

In [6]:
with open("neuralnet/targets.json", 'r') as fp:
    targets = json.load(fp)

In [7]:
pd.options.display.max_columns = 100
pilot_data_corrected[
    (pilot_data_corrected["ontarget"]==True)
    &
    (pilot_data_corrected.target != "none")
][
    ["step", "target", "participant", "ontarget"]
].groupby(
    ["step", "target", "participant"]
).count().unstack(fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2
1,food,0,4,12,9,0,12,11,11,3,2,2,0,1,1,0,0,13,4,2,1,1,0,1,6,0,0,3,2,1,0,0,6,2,1,5,2,3,0,2,2,3,7,0,3,3,3,3,6,3,8
2,food,8,9,11,11,12,13,13,12,4,8,0,6,0,2,0,4,9,2,6,3,9,13,4,2,3,4,3,4,2,0,0,2,4,1,4,2,4,0,6,7,6,4,3,2,4,3,5,1,6,8
3,food,5,11,14,11,9,11,15,19,3,2,2,4,1,4,2,3,3,5,1,3,2,4,3,1,1,3,2,4,0,2,1,2,4,2,3,4,4,5,6,5,5,4,3,6,4,4,5,6,3,4
4,thumb,13,11,10,12,8,12,10,10,10,2,9,2,0,3,0,1,2,5,2,1,2,12,2,5,1,3,1,2,3,1,1,1,3,4,3,2,1,3,3,5,3,3,2,2,3,3,10,4,3,11
5,thumb,9,11,11,12,11,13,12,12,8,0,2,5,4,1,0,1,4,1,3,5,2,3,1,2,2,3,2,2,3,0,5,1,2,2,0,2,2,2,7,6,3,1,3,4,4,3,12,3,6,4
6,thumb,11,11,11,12,12,13,12,11,4,0,1,4,0,0,0,3,3,3,2,5,2,3,2,4,1,6,3,1,5,0,5,1,1,2,2,1,2,6,4,3,4,3,3,4,5,4,9,7,4,6
7,nails,9,10,16,10,13,12,11,11,2,0,1,6,1,1,0,2,6,2,2,0,1,1,1,0,0,6,3,2,2,0,1,3,2,0,2,2,1,8,4,6,3,3,1,6,7,7,10,3,4,10
8,nails,10,12,11,12,13,11,12,13,4,0,0,1,0,0,0,2,3,1,3,1,3,3,2,3,2,4,3,2,2,1,0,3,2,3,2,2,2,7,4,3,5,3,4,1,4,4,6,1,3,4
9,nails,9,15,11,11,13,12,12,13,3,0,0,4,0,0,0,3,4,3,3,1,2,4,3,6,1,2,3,0,3,2,2,2,2,3,2,3,2,6,4,1,4,3,2,3,3,3,10,3,2,4
10,smoke,10,13,11,15,11,14,11,13,9,0,0,3,3,3,1,11,2,9,2,0,1,1,2,2,0,4,5,2,1,0,2,0,1,1,1,2,2,3,6,3,3,2,3,4,11,3,4,9,2,5


{target: training, [all offtarget], [all onbody offtarget], [all nontraining rotation], [all offbody]}

In [None]:
for target in list(pilot_data.target.unique()):
    ib = set()
    ib.update(
        list(
            pilot_data.loc[
                pilot_data.target==target
            ]["step"].dropna()
        )
    )
    
    print(": ".join([
        target,
        "{0} on-target samples in {2} {1}".format(
            str(len(pilot_data.loc[
                (pilot_data.target == target) &
                (pilot_data.ontarget)
            ])),
            ", ".join(
                ["%.0f" % step for step in ib],
            ),
            "step" if len(ib) == 1 else "steps"
        )
    ]))

---
### Extract training and testing data
Define targets of interest and corresponding offtargets

with open(
    'data/targets.json',
    'r'
) as fp:
    targets = json.load(
        fp
    )[0]

Set parameters for nn:

In [None]:
input_signals = [
    "distance",
    "thermopile1",
    "thermopile2",
    "thermopile3",
    "thermopile4"
]
n_samples = [300, 250, 200, 150, 100, 50]
steps = list(range(1, 48))

Get training inputs and outputs, inputs that should evaluate ~true and inputs that should evaluate ~false

In [None]:
targ = "eyebrow"
data = nn.define_trainer_data(
    pilot_data,
    {
        "target": [targets[targ]],
        "offtarget": targets[targ][1]
    },
    input_signals,
    steps,
    n_samples=n_samples[0]
)

Preview all inputs and training outputs

In [None]:
data

**Note**: These data take some time to copy across environments. Give the notebook some time between running cells across Python and JavaScript.

---
### Train and test

In [None]:
%%node
var networks = {};
for n_sample in n_samples:
    for (var target in data) {
      networks[target] = lstm.train_lstm([5,5,2,1], data[target]["train"], 0.06, 0.06, 3000);
    }

In [None]:
%%node
var test_outputs = {};
for (var target in data) {
    test_outputs[target] = {"true":[],"false":[]};
    for(var iteration=0; iteration < data[target]["test_true"].length; iteration++){
      test_outputs[target]["true"].push(networks[target].activate(data[target]["test_true"][iteration]));
      }
    for(var iteration=0; iteration < data[target]["test_false"].length; iteration++){
      test_outputs[target]["false"].push(networks[target].activate(data[target]["test_false"][iteration]));
    }
}

In [None]:
test_outputs

---
### See outputs

In [None]:
def calc_confusion(negative, positive):
    """
    Function to calculate a confusion matrix
    
    Parameters
    ----------
    negative: list of floats
        outputs of neural nets with true negative inputs
        
    positive: list of floats
        outputs of neural nets with true positive inputs
        
    Returns
    -------
    confusion: matrix of floats
        tn, fp, fn, tp
        see http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
    """
    ytrue = [
        *[
            0 for output in negative
        ],
        *[
            1 for output in positive
        ]
    ]
    ypredicted = [
        *[
            int(round(o)) for o in negative
        ],
        *[
            int(round(o)) for o in positive
        ]
    ]
    return(confusion_matrix(ytrue, ypredicted))

If the training is adequate x ≈ 0 ∀ x in the following:

In [None]:
f = {target: [
        outputs for outputs in test_outputs[target]['false']
] for target in targets}
f

If the training is adequate x ≈ 1 ∀ x in the following:

In [None]:
t = {target: [
        outputs for outputs in test_outputs[target]['true']
] for target in targets}
t

Finally, if training is adequate, f ≪ t:

In [None]:
f_mean = {
    target: np.mean(f[target]) for target in targets if target in f and len(f[target])
}
t_mean = {
    target: np.mean(t[target]) for target in targets if target in t and len(t[target])
}
for target in t_mean:
    print(target)
    print(
        "f = {0}\nt = {1}\n{0} ≪ {1} ?\n".format(
            str(f_mean[target]),
            str(t_mean[target])
        ) if f_mean[target] < t_mean[target] else "f = {0}\nt = {1}\n{2}".format(
            str(f_mean[target]),
            str(t_mean[target]),
            stylize(
                "Nope. f > t\n",
                colored.fg("red")
            )
        )
    )
for target in t_mean:
    print("{0}: f = {1:.4f} < t = {2:.4f}".format(
        target,
        f_mean[target],
        t_mean[target]
    ))