### Load libraries and external data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import colored
from colored import stylize
from data import dataloader
import datetime
import json
from neuralnet import nn
import numpy as np
import pandas as pd
import requests
from sklearn.metrics import confusion_matrix
import pixiedust_node #v≥0.2.5

Pixiedust database opened successfully


#### Load data from Firebase.
Requires [Firebase service account credentials](https://console.firebase.google.com/project/tingle-pilot-collected-data/settings/serviceaccounts/adminsdk) in JSON format saved in `./firebase-credentials`.

In [21]:
notepath = "data/notes.csv"
datapath = "data/pilot_data.csv"
corrections_path = "data/corrections.json"

if(
    os.path.exists(notepath) and
    os.path.exists(datapath)
):
    notes = pd.read_csv(notepath)
    pilot_data = pd.read_csv(datapath)
else:
    pilot_data, notes = dataloader.load_from_firebase(
        notes=True,
        start=datetime.datetime(2018,3,6,9),
        stop=datetime.datetime(2018,3,7,18,32,47),
        combine=True,
        marked=False
    )
    pilot_data.to_csv(
        datapath,
        index=False
    )
    notes.to_csv(
        notepath,
        index=False
    )

if os.path.exists(corrections_path):
    with open("data/corrections.json", "r") as c:
        corrections = json.load(c)
else:
    corrections = {}

In [22]:
pilot_data = dataloader.index_participants(
    pilot_data,
    -4
)

In [23]:
pilot_data = pilot_data[pilot_data.participant != -2]

In [24]:
pilot_data = dataloader.correct_targets(
    pilot_data,
    'http://matter.childmind.org/js/tinglePilotAppScript.json'
)

In [25]:
pd.options.display.max_columns = 100
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,4,4,10,7,0,4,12,9,0,12,11,11,3,2
2,food,3,5,7,11,8,9,11,11,12,13,13,12,4,8
3,food,5,4,7,6,5,11,14,11,9,11,15,19,3,2
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,2
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,0
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,4,0
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,2,0
8,nails,15,4,15,10,10,12,11,12,13,11,12,13,4,0
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,3,0
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,0


In [27]:
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,4,4,10,7,0,4,12,9,0,12,11,11,3,2
2,food,3,5,7,11,8,9,11,11,12,13,13,12,4,8
3,food,5,4,7,6,5,11,14,11,9,11,15,19,3,2
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,2
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,0
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,4,0
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,2,0
8,nails,15,4,15,10,10,12,11,12,13,11,12,13,4,0
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,3,0
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,0


In [10]:
pilot_data = dataloader.correct_corrections(
    pilot_data,
    corrections
)

In [11]:
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,4,4,10,7,0,4,12,9,0,12,11,11,3,2
2,food,3,5,7,11,8,9,11,11,12,13,13,12,4,8
3,food,5,4,7,6,5,11,14,11,9,11,15,19,3,2
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,2
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,0
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,4,0
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,2,0
8,nails,15,4,15,10,10,12,11,12,13,11,12,13,4,0
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,3,0
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,0


In [12]:
pilot_data = dataloader.update_too_few(
    pilot_data,
    "< 5"
)

In [13]:
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,5,6,10,7,8,15,12,9,18,12,11,11,18,19
2,food,3,5,7,11,8,9,11,11,12,13,13,12,21,8
3,food,5,6,7,6,5,11,14,11,9,11,15,19,21,23
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,19
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,7
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,20,17
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,16,13
8,nails,15,7,15,10,10,12,11,12,13,11,12,13,23,12
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,16,15
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,17


In [20]:
sorted(pilot_data.target.unique())`

['back-head',
 'cheek',
 'eyebrow',
 'food',
 'nails',
 'nose',
 'offbody-+',
 'offbody-X',
 'offbody-ceiling',
 'offbody-floor',
 'offbody-spiral',
 'paint-back-neck',
 'paint-ear',
 'paint-front-body',
 'paint-front-neck',
 'paint-opposite-cheek',
 'paint-top-head',
 'rotate-above-ear',
 'rotate-back-head',
 'rotate-cheek',
 'rotate-chin',
 'rotate-eyebrow',
 'rotate-mouth',
 'rotate-nose',
 'rotate-opposite-cheek',
 'rotate-top-head',
 'smoke',
 'thumb',
 'top-head']

#### Load [Synaptic](http://caza.la/synaptic/)
If "Error: Cannot find module 'synaptic'", create and run these two cells:

1. ```
cd neuralnet
```

2. ```sh
!npm init -y
!npm install -s synaptic
```

In [16]:
%%node
var lstm = require('../../tingle_pilot_study/neuralnet/lstm.js');

---
### See all targets, number of available samples and iteration blocks

In [None]:
with open("neuralnet/targets.json", 'r') as fp:
    targets = json.load(fp)

In [None]:
pd.options.display.max_columns = 100 # unabridged
dataloader.count_ontarget_samples(pilot_data, True)

{target: training, [all offtarget], [all onbody offtarget], [all nontraining rotation], [all offbody]}

In [None]:
for target in list(pilot_data.target.unique()):
    ib = set()
    ib.update(
        list(
            pilot_data.loc[
                pilot_data.target==target
            ]["step"].dropna()
        )
    )
    
    print(": ".join([
        target,
        "{0} on-target samples in {2} {1}".format(
            str(len(pilot_data.loc[
                (pilot_data.target == target) &
                (pilot_data.ontarget)
            ])),
            ", ".join(
                ["%.0f" % step for step in ib],
            ),
            "step" if len(ib) == 1 else "steps"
        )
    ]))

---
### Extract training and testing data
Define targets of interest and corresponding offtargets

with open(
    'data/targets.json',
    'r'
) as fp:
    targets = json.load(
        fp
    )[0]

Set parameters for nn:

In [None]:
input_signals = [
    "distance",
    "thermopile1",
    "thermopile2",
    "thermopile3",
    "thermopile4"
]
n_samples = [300, 250, 200, 150, 100, 50]
steps = list(range(1, 48))

Get training inputs and outputs, inputs that should evaluate ~true and inputs that should evaluate ~false

In [None]:
targ = "eyebrow"
train = nn.define_trainer_data(
    pilot_data.loc[pilot_data.participant < 8],
    {
        "target": [targets[targ][0]],
        "offtarget": targets[targ][1]
    },
    input_signals,
    steps,
    n_samples=n_samples[0]
)

In [None]:
train

In [None]:
test = nn.define_activation(
    pilot_data.loc[pilot_data.participant < 8],
    [targ],
    input_signals,
    steps,
    n_samples=n_samples[0]
)

In [None]:
test_off = nn.define_activation(
    pilot_data.loc[pilot_data.participant < 8],
    targets[targ][1],
    input_signals,
    steps,
    n_samples=n_samples[0],
    exclude=n_samples[0]
)

Preview all inputs and training outputs

**Note**: These data take some time to copy across environments. Give the notebook some time between running cells across Python and JavaScript.

---
### Train and test

In [None]:
%%node
var networks = {};
networks[target] = lstm.train_lstm([5,5,5,1], train, 0.06, 0.06, 3000);

In [None]:
%%node
var test_outputs = {};
test_outputs[target] = {"true":[],"false":[]};
for(var iteration=0; iteration < test.length; iteration++){
  test_outputs[target]["true"].push(networks[target].activate(test[iteration]));
  }
for(var iteration=0; iteration < test_off.length; iteration++){
  test_outputs[target]["false"].push(networks[target].activate(test_off[iteration]));
}

---
### See outputs

In [None]:
def calc_confusion(negative, positive):
    """
    Function to calculate a confusion matrix
    
    Parameters
    ----------
    negative: list of floats
        outputs of neural nets with true negative inputs
        
    positive: list of floats
        outputs of neural nets with true positive inputs
        
    Returns
    -------
    confusion: matrix of floats
        tn, fp, fn, tp
        see http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
    """
    ytrue = [
        *[
            0 for output in negative
        ],
        *[
            1 for output in positive
        ]
    ]
    ypredicted = [
        *[
            int(round(o)) for o in negative
        ],
        *[
            int(round(o)) for o in positive
        ]
    ]
    return(confusion_matrix(ytrue, ypredicted))

If the training is adequate x ≈ 0 ∀ x in the following:

In [None]:
f = {target: [
        outputs for outputs in test_outputs[target]['false']
] for target in test_outputs}
f

If the training is adequate x ≈ 1 ∀ x in the following:

In [None]:
t = {target: [
        outputs for outputs in test_outputs[target]['true']
] for target in test_outputs}
t

Finally, if training is adequate, f ≪ t:

In [None]:
f_mean = {
    target: np.mean(f[target]) for target in targets if target in f and len(f[target])
}
t_mean = {
    target: np.mean(t[target]) for target in targets if target in t and len(t[target])
}
for target in t_mean:
    print(target)
    print(
        "f = {0}\nt = {1}\n{0} ≪ {1} ?\n".format(
            str(f_mean[target]),
            str(t_mean[target])
        ) if f_mean[target] < t_mean[target] else "f = {0}\nt = {1}\n{2}".format(
            str(f_mean[target]),
            str(t_mean[target]),
            stylize(
                "Nope. f > t\n",
                colored.fg("red")
            )
        )
    )
for target in t_mean:
    print("{0}: f = {1:.4f} < t = {2:.4f}".format(
        target,
        f_mean[target],
        t_mean[target]
    ))