# Selective Mutism response paradigm analysis: voice

Authors:
    - Jon Clucas, 2017  <jon.clucas@childmind.org>
Copyright ©2017, Apache v2.0 License

Imports & function definitions:

In [1]:
import json
from sklearn.preprocessing import LabelEncoder
import numpy as np
import os
import pandas as pd
import sys
import urllib
sm_eeg = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if sm_eeg not in sys.path: 
    sys.path.append(sm_eeg)
from SM_openSMILE.openSMILE_analysis import openSMILE_csv
from utilities import fetch_data
from voice_functions import *
with open(os.path.join('../config/config.json')) as cfgf:
    osf = json.load(cfgf)['OSF_urls']

Load data from OSF:

In [None]:
emobase = pd.read_csv(osf['emobase']['features'])
ComParE_2016 = pd.read_csv(osf['ComParE_2016']['features'])
conditions = pd.read_csv(osf['conditions'])
dx = pd.read_csv(osf['dx'])

Harmonize data formats:

In [None]:
emobase, ComParE_2016, conditions, dx = update_encoding(
                                             [emobase, ComParE_2016, conditions, dx],
                                             [{"M00494594":"M00494954"},
                                              {"_":False, np.nan:False, "SM":True}
                                             ],
                                             ["URSI", "Dx?"],
                                             [None, bool]
                                         )

In [None]:
emobase.drop("Unnamed: 0", axis=1)

Merge datatables as necessary and integerize categorical data:

In [None]:
emobase = int_categorize(combine_data(emobase, conditions, dx))
ComParE_2016 = int_categorize(combine_data(ComParE_2016, conditions, dx))

## Random Forests

Import and initialize:

In [None]:
from sklearn.ensemble import RandomForestClassifier
RandomForestClassifier()

We can only have 1-D Y:

In [None]:
eX, eY = make_forest(emobase)

Try with 100 estimators:

In [None]:
clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(eX, eY)

In [None]:
clf.feature_importances_

In [None]:
features = pd.DataFrame.from_dict(dict(zip(
               emobase.columns.difference(["Dx?"]), clf.feature_importances_
               )), orient='index'
           ).rename(columns={0:"importance"}
           ).sort_values("importance", ascending=False)
print(features)

---

In [2]:
experimental_conditions = {
    k for k in osf[
        'emobase'
    ] if k != "features"
}
noise_replacement_conditions = {
    k for k in osf[
        'emobase'
    ][
        list(
            experimental_conditions
        )[0]
    ]
}

In [8]:
[openSMILE_csv.get_features(
    osf[cf][exp][nr],
    cf
) for cf in [
    "emobase",
    "ComParE_2016"
] for exp in experimental_conditions for nr in noise_replacement_conditions
]

/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_analysis/emobase/emobase_features.csv
/home/jclucas/selective-mutism-eeg/SM_response_paradigm_anal

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
