# Selective Mutism response paradigm analysis: voice

Authors:
    - Jon Clucas, 2017  <jon.clucas@childmind.org>
Copyright ©2017, Apache v2.0 License

Imports & function definitions:

In [1]:
import json
from sklearn.preprocessing import LabelEncoder
import numpy as np
import os
import pandas as pd
import sys
import urllib
sm_eeg = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if sm_eeg not in sys.path: 
    sys.path.append(sm_eeg)
from SM_openSMILE.openSMILE_analysis import openSMILE_csv
from utilities import fetch_data
from voice_functions import *
with open(os.path.join('../config/config.json')) as cfgf:
    osf = json.load(cfgf)['OSF_urls']

Load data from OSF:

In [None]:
emobase = pd.read_csv(osf['emobase']['features'])
ComParE_2016 = pd.read_csv(osf['ComParE_2016']['features'])
conditions = pd.read_csv(osf['conditions'])
dx = pd.read_csv(osf['dx'])

Harmonize data formats:

In [None]:
emobase, ComParE_2016, conditions, dx = update_encoding(
                                             [emobase, ComParE_2016, conditions, dx],
                                             [{"M00494594":"M00494954"},
                                              {"_":False, np.nan:False, "SM":True}
                                             ],
                                             ["URSI", "Dx?"],
                                             [None, bool]
                                         )

In [None]:
emobase.drop("Unnamed: 0", axis=1)

Merge datatables as necessary and integerize categorical data:

In [None]:
emobase = int_categorize(combine_data(emobase, conditions, dx))
ComParE_2016 = int_categorize(combine_data(ComParE_2016, conditions, dx))

## Random Forests

Import and initialize:

In [None]:
from sklearn.ensemble import RandomForestClassifier
RandomForestClassifier()

We can only have 1-D Y:

In [None]:
eX, eY = make_forest(emobase)

Try with 100 estimators:

In [None]:
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(eX, eY)

In [None]:
clf.feature_importances_

In [None]:
features = pd.DataFrame.from_dict(dict(zip(
               emobase.columns.difference(["Dx?"]), clf.feature_importances_
               )), orient='index'
           ).rename(columns={0:"importance"}
           ).sort_values("importance", ascending=False)
print(features)

---
Collect all combinations of config files, experimental conditions, and noise replacement methods into a (48-item) list of 3-tuples:

In [13]:
config_exp_nr = [
    (
        cf,
        exp,
        nr
    ) for cf in [
        "emobase",
        "ComParE_2016"
    ] for exp in {
        k for k in osf[
            'emobase'
        ] if k != "features"
    } for nr in {
        k for k in osf[
            'emobase'
        ][
            list(
                experimental_conditions
            )[0]
        ]
    }
]

In [11]:
print(len(config_exp_nr))

48


In [35]:
c = {}
for cen in config_exp_nr:
    c[cen[0]] = {} if cen[0] not in c else c[cen[0]]
    c[cen[0]][cen[1]] = {} if cen[1] not in c[cen[0]] else c[cen[0]][cen[1]]
    c[cen[0]][cen[1]][cen[2]] = 0
pd.DataFrame(
    c,
    index=pd.MultiIndex.from_tuples(
        [(c[1], c[2]) for c in config_exp_nr],
        names=[
            "experimental condition",
            "noise replacement"
        ]
    )
)

Unnamed: 0_level_0,Unnamed: 1_level_0,ComParE_2016,emobase
experimental condition,noise replacement,Unnamed: 2_level_1,Unnamed: 3_level_1
"vocal, no stranger",adults replaced: pink noise,,
"vocal, no stranger",adults only,,
"vocal, no stranger",adults replaced: clone,,
"vocal, no stranger",adults removed,,
"vocal, no stranger",adults timeshifted,,
"vocal, no stranger",original,,
"button, no stranger",adults replaced: pink noise,,
"button, no stranger",adults only,,
"button, no stranger",adults replaced: clone,,
"button, no stranger",adults removed,,


[('vocal, no stranger', 'adults replaced: pink noise'),
 ('vocal, no stranger', 'adults only'),
 ('vocal, no stranger', 'adults replaced: clone'),
 ('vocal, no stranger', 'adults removed'),
 ('vocal, no stranger', 'adults timeshifted'),
 ('vocal, no stranger', 'original'),
 ('button, no stranger', 'adults replaced: pink noise'),
 ('button, no stranger', 'adults only'),
 ('button, no stranger', 'adults replaced: clone'),
 ('button, no stranger', 'adults removed'),
 ('button, no stranger', 'adults timeshifted'),
 ('button, no stranger', 'original'),
 ('button, with stranger', 'adults replaced: pink noise'),
 ('button, with stranger', 'adults only'),
 ('button, with stranger', 'adults replaced: clone'),
 ('button, with stranger', 'adults removed'),
 ('button, with stranger', 'adults timeshifted'),
 ('button, with stranger', 'original'),
 ('vocal, with stranger', 'adults replaced: pink noise'),
 ('vocal, with stranger', 'adults only'),
 ('vocal, with stranger', 'adults replaced: clone'),
 