### Load libraries and external data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import colored
from colored import stylize
from data import dataloader
import datetime
import json
from neuralnet import nn
import numpy as np
import pandas as pd
import requests
from sklearn.metrics import confusion_matrix
import pixiedust_node #v≥0.2.5

Pixiedust database opened successfully


#### Load data from Firebase.
Requires [Firebase service account credentials](https://console.firebase.google.com/project/tingle-pilot-collected-data/settings/serviceaccounts/adminsdk) in JSON format saved in `./firebase-credentials`.

In [3]:
notepath = "data/notes.csv"
datapath = "data/pilot_data.csv"
corrections_path = "data/corrections.json"

if(
    os.path.exists(notepath) and
    os.path.exists(datapath)
):
    notes = pd.read_csv(notepath)
    pilot_data = pd.read_csv(datapath)
    print(stylize(
        "Data loaded from local file!",
        colored.fg(
            "green"
        )
    ))
else:
    pilot_data, notes = dataloader.load_from_firebase(
        notes=True,
        start=datetime.datetime(2018,3,6,9),
        stop=datetime.datetime(2018,3,7,18,32,47),
        combine=True,
        marked=False
    )
    pilot_data.to_csv(
        datapath,
        index=False
    )
    notes.to_csv(
        notepath,
        index=False
    )

if os.path.exists(corrections_path):
    with open("data/corrections.json", "r") as c:
        corrections = json.load(c)
else:
    corrections = {}

pixiedust_node 0.2.5 started. Cells starting '%%node' may contain Node.js code.
[38;5;2mData loaded from local file![0m


In [4]:
pilot_data = dataloader.index_participants(
    pilot_data,
    -4
)

In [5]:
pilot_data = pilot_data[pilot_data.participant != -2]

In [6]:
pilot_data = dataloader.correct_targets(
    pilot_data,
    'http://matter.childmind.org/js/tinglePilotAppScript.json'
)

In [7]:
pd.options.display.max_columns = 100
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,4,4,10,7,0,4,12,9,0,12,11,11,3,2
2,food,3,5,7,11,8,9,11,11,12,13,13,12,4,8
3,food,5,4,7,6,5,11,14,11,9,11,15,19,3,2
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,2
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,0
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,4,0
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,2,0
8,nails,15,4,15,10,10,12,11,12,13,11,12,13,4,0
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,3,0
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,0


In [8]:
pilot_data = dataloader.correct_corrections(
    pilot_data,
    corrections
)

In [9]:
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,4,4,10,7,0,4,12,9,0,12,11,11,3,2
2,food,3,5,7,11,8,9,11,11,12,13,13,12,4,8
3,food,5,4,7,6,5,11,14,11,9,11,15,19,3,2
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,2
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,0
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,4,0
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,2,0
8,nails,15,4,15,10,10,12,11,12,13,11,12,13,4,0
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,3,0
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,0


In [10]:
pilot_data = dataloader.update_too_few(
    pilot_data,
    "< 5"
)

In [11]:
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,5,6,10,7,8,15,12,9,18,12,11,11,18,19
2,food,3,5,7,11,8,9,11,11,12,13,13,12,21,8
3,food,5,6,7,6,5,11,14,11,9,11,15,19,21,23
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,19
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,7
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,20,17
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,16,13
8,nails,15,7,15,10,10,12,11,12,13,11,12,13,23,12
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,16,15
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,17


#### Load [Synaptic](http://caza.la/synaptic/)
If "Error: Cannot find module 'synaptic'", create and run these two cells:

1. ```
cd neuralnet
```

2. ```sh
!npm init -y
!npm install -s synaptic
```

In [12]:
%%node
var lstm = require('../../tingle_pilot_study/neuralnet/lstm.js');

---
### See all targets and number of available samples

In [13]:
with open("neuralnet/targets.json", 'r') as fp:
    targets = json.load(fp)

In [14]:
pd.options.display.max_columns = 100 # unabridged
dataloader.count_ontarget_samples(pilot_data, True)

Unnamed: 0_level_0,Unnamed: 1_level_0,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget,ontarget
Unnamed: 0_level_1,participant,-4,-3,-1,0,1,2,3,4,5,6,8,9,10,11
step,target,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1,food,5,6,10,7,8,15,12,9,18,12,11,11,18,19
2,food,3,5,7,11,8,9,11,11,12,13,13,12,21,8
3,food,5,6,7,6,5,11,14,11,9,11,15,19,21,23
4,thumb,12,5,9,9,13,11,10,12,8,12,10,10,10,19
5,thumb,11,5,10,8,9,11,11,12,11,13,12,12,8,7
6,thumb,12,5,11,9,11,11,11,12,12,13,12,11,20,17
7,nails,11,6,8,10,9,10,16,10,13,12,11,11,16,13
8,nails,15,7,15,10,10,12,11,12,13,11,12,13,23,12
9,nails,13,8,12,6,9,15,11,11,13,12,12,13,16,15
10,smoke,14,19,12,11,10,13,11,15,11,14,11,13,9,17


In [15]:
targets

{'onbody': [{'target': ['food', 'thumb', 'nails', 'smoke'],
   'train-ontarget': ['rotate-mouth'],
   'train-onbody-offtarget': {'paint': ['paint-back-neck',
     'paint-ear',
     'paint-front-body',
     'paint-opposite-cheek',
     'paint-top-head'],
    'rotate': ['rotate-above-ear',
     'rotate-back-head',
     'rotate-cheek',
     'rotate-eyebrow',
     'rotate-nose',
     'rotate-opposite-cheek',
     'rotate-top-head']}},
  {'target': ['nose'],
   'train-ontarget': ['rotate-nose'],
   'train-onbody-offtarget': {'paint': ['paint-back-neck',
     'paint-ear',
     'paint-front-body',
     'paint-front-neck',
     'paint-opposite-cheek',
     'paint-top-head'],
    'rotate': ['rotate-above-ear',
     'rotate-back-head',
     'rotate-cheek',
     'rotate-chin',
     'rotate-eyebrow',
     'rotate-mouth',
     'rotate-opposite-cheek',
     'rotate-top-head']}},
  {'target': ['cheek'],
   'train-ontarget': ['rotate-cheek'],
   'train-onbody-offtarget': {'paint': ['paint-back-neck',


---
### Extract training and testing data
Define targets of interest and corresponding offtargets

with open(
    'data/targets.json',
    'r'
) as fp:
    targets = json.load(
        fp
    )[0]

Set parameters for nn:

In [16]:
input_signals = [
    "distance",
    "thermopile1",
    "thermopile2",
    "thermopile3",
    "thermopile4"
]
n_samples = [300, 250, 200, 150, 100, 50]

In [17]:
six = pilot_data[pilot_data.participant == 6]

Get training inputs and outputs, inputs that should evaluate ~true and inputs that should evaluate ~false

In [18]:
train = {}
for participant in list(
    dataloader.count_ontarget_samples(
        six,
        True
    ).columns.levels[1]
):
    participant = int(participant)
    train[
        participant
    ] = {}
    for obt in targets['onbody']:
        for targ in ["nose"]:
            train[
                participant
            ][
                targ
            ] = {}
            train[
                participant
            ][
                targ
            ][
                "train"
            ] = {}
            for method in [
                "paint",
                "rotate"
            ]:
                train[
                    participant
                ][
                    targ
                ][
                    method
                ] = nn.define_trainer_data(
                    six[
                        six.participant==participant
                    ].dropna(
                        how="any",
                        subset=[
                            *input_signals,
                            "ontarget"
                        ]
                    ),
                    {
                        "target": obt[
                            "train-ontarget"
                        ],
                        "offtarget": obt[
                            "train-onbody-offtarget"
                        ][
                            method
                        ]
                    },
                    input_signals
                )
            train[
                participant
            ][
                targ
            ][
                'offbody'
            ] = nn.define_trainer_data(
                six[
                    six.participant==participant
                ].dropna(
                    how="any",
                    subset=[
                        *input_signals,
                        "ontarget"
                    ]
                ),
                {
                    "target": obt[
                        "train-ontarget"
                    ],
                    "offtarget": targets[
                        "train-offbody"
                    ]
                },
                input_signals
            )
            train[
                participant
            ][
                targ
            ][
                "test"
            ] = nn.define_trainer_data(
                    six[
                        six.participant==participant
                    ].dropna(
                        how="any",
                        subset=[
                            *input_signals,
                            "ontarget"
                        ]
                    ),
                    {
                        "target": obt[
                            "train-ontarget"
                        ],
                        "offtarget": []
                    },
                    input_signals
                )

In [19]:
six_unsplit = []
for a in train['6']['nose']:
    six_unsplit = [
        *six_unsplit,
        *train['6']['nose'][a]
    ]

Train / test split

In [20]:
from sklearn.model_selection import train_test_split

In [21]:
X_train, X_test = train_test_split(six_unsplit, test_size=0.25, random_state=42)

Preview all inputs and training outputs

**Note**: These data take some time to copy across environments. Give the notebook some time between running cells across Python and JavaScript.

---
### Train and test

In [22]:
%%node
var networks = {};
networks["nose"] = lstm.train_lstm([5,5,5,1], X_train, 0.06, 0.06, 3000);

Error in callback <bound method VarWatcher.post_execute of <pixiedust_node.node.VarWatcher object at 0x7fe0444d5358>> (for post_execute):


RuntimeError: dictionary changed size during iteration

TRAINING 🏋 interations:3000 🏋 minimum error:0.06 🏋 rate:0.06
iterations 5 error 0.34728576179429527 rate 0.06
iterations 10 error 0.33053205287658116 rate 0.06
iterations 15 error 0.2617278680987736 rate 0.06
iterations 20 error 0.21123736613409394 rate 0.06
iterations 25 error 0.19334410795620383 rate 0.06
iterations 30 error 0.1837228167735956 rate 0.06
iterations 35 error 0.17272546505914022 rate 0.06
iterations 40 error 0.16190952585890456 rate 0.06
iterations 45 error 0.15326236790945477 rate 0.06
iterations 50 error 0.144127554056922 rate 0.06
iterations 55 error 0.1337614876616833 rate 0.06
iterations 60 error 0.12333615201978793 rate 0.06
iterations 65 error 0.11540487780517283 rate 0.06
iterations 70 error 0.10952297853326534 rate 0.06
iterations 75 error 0.10291492528986974 rate 0.06
iterations 80 error 0.0939643911806368 rate 0.06
iterations 85 error 0.09202006315102408 rate 0.06
iterations 90 error 0.09256844080421074 rate 0.06
iterations 95 error 0.08456265698765379 rate 0

In [23]:
%%node
test_out = lstm.test_lstms(X_test, networks["nose"]);

In [39]:
for t in test_out:
    print(pd.Series([o for u in test_out[t] for o in u]).describe())

count    2.750000e+02
mean     6.251199e-02
std      1.857540e-01
min      4.248764e-11
25%      7.342378e-06
50%      1.638272e-04
75%      8.349585e-03
max      9.940326e-01
dtype: float64
count    78.000000
mean      0.822893
std       0.302086
min       0.022121
25%       0.862004
50%       0.974631
75%       0.996599
max       0.999968
dtype: float64


In [None]:
%%node
var test_outputs = {};
test_outputs["nose"] = {"true":[],"false":[]};
for(var iteration=0; iteration < X_test.length; iteration++){
  if (X_test[iteration]["output"][0] == 1) {
    test_outputs["nose"]["true"].push(networks["nose"].activate(X_test[iteration]["input"]));
  } else {
    test_outputs["nose"]["false"].push(networks["nose"].activate(X_test[iteration]["input"]));
  }
}

---
### See outputs

In [None]:
test_outputs

In [None]:
def calc_confusion(negative, positive):
    """
    Function to calculate a confusion matrix
    
    Parameters
    ----------
    negative: list of floats
        outputs of neural nets with true negative inputs
        
    positive: list of floats
        outputs of neural nets with true positive inputs
        
    Returns
    -------
    confusion: matrix of floats
        tn, fp, fn, tp
        see http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
    """
    ytrue = [
        *[
            0 for output in negative
        ],
        *[
            1 for output in positive
        ]
    ]
    ypredicted = [
        *[
            int(round(o)) for o in negative
        ],
        *[
            int(round(o)) for o in positive
        ]
    ]
    return(confusion_matrix(ytrue, ypredicted))

If the training is adequate x ≈ 0 ∀ x in the following:

In [None]:
f = {target: [
        outputs for outputs in test_outputs[target]['false']
] for target in test_outputs}
f

If the training is adequate x ≈ 1 ∀ x in the following:

In [None]:
t = {target: [
        outputs for outputs in test_outputs[target]['true']
] for target in test_outputs}
t

Finally, if training is adequate, f ≪ t:

In [None]:
f_mean = {
    target: np.mean(f[target]) for target in [
        t1 for t in targets['onbody'] for t1 in t['target']
    ] if target in f and len(f[target])
}
t_mean = {
    target: np.mean(t[target]) for target in [
        t1 for t in targets['onbody'] for t1 in t['target']
    ] if target in t and len(t[target])
}
for target in t_mean:
    print(target)
    print(
        "f = {0}\nt = {1}\n{0} ≪ {1} ?\n".format(
            str(f_mean[target]),
            str(t_mean[target])
        ) if f_mean[target] < t_mean[target] else "f = {0}\nt = {1}\n{2}".format(
            str(f_mean[target]),
            str(t_mean[target]),
            stylize(
                "Nope. f > t\n",
                colored.fg("red")
            )
        )
    )
for target in t_mean:
    print("{0}: f = {1:.4f} < t = {2:.4f}".format(
        target,
        f_mean[target],
        t_mean[target]
    ))

In [None]:
calc_confusion(
    [
        output for o in test_outputs[
            "nose"
        ][
            "false"
        ] for output in o
    ],
    [
        output for o in test_outputs[
            "nose"
        ][
            "true"
        ] for output in o
    ]
)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
y_test = [
    *[0 for i in range(len(test_outputs["nose"]["false"]))],
    *[1 for i in range(len(test_outputs["nose"]["true"]))]
]
y_score = [
    *[
        round(output) for o in test_outputs[
            "nose"
        ][
            "false"
        ] for output in o
    ],
    *[
        round(output) for o in test_outputs[
            "nose"
        ][
            "true"
        ] for output in o
    ]
]
fpr = [0]
tpr = [0]
tp = 0
fp = 0
cumulative = []
total = max(
    len([y for y in y_score if y == 1]),
    len([y for y in y_test if y==1])
)
for i in range(len(y_test)):
    if y_score[i] == 1:
        if y_test[i] == 1:
            tp = tp + 1
            tpr.append(tp/total)
            fpr.append(fpr[-1])
        else:
            fp = fp + 1
            fpr.append(fp/total)
            tpr.append(tpr[-1])
        cumulative.append((tp + fp)/total)
for r in [fpr, tpr]:
    if r[-1] < 1:
        r.append(1)

In [None]:
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % auc(fpr, tpr))
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()