In [2]:
# default_exp domains

# Domains

Once the data is loaded, it is important to visualize all of the tests in their relative contexts.
No single tests will hold all of the answers for a domain.

A useful interactive visualization system has the following properties:
 - Handles creating and arranging multiple figures
   - Handle passing raw data to transforms for normalization
   - Handle generating axes
 - Maintains a common data source for linked brushing
 - Can export selections (future)

Let's load in our large dataset and start playing with the full data to workout bugs.

In [3]:
%load_ext autoreload
%autoreload 2


In [4]:
#export
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sbn

from autoneuro.calculators import TestCalculator


In [19]:
all_neuro = pd.read_excel('data/neuro_data.xlsx',
                          na_values=['na', '-', 'nd']).groupby(['VisitDate', 'PatientID']).first()
all_neuro.replace({'Race': {2: 'AA',
                            1: 'white',
                            3: 'asian'},
                   'Sex': {1: 'male', 2: 'female'}},
                  inplace=True)

COLUMN_MAPPING = {'BVMTtrial1': 'trial1', 'BVMTtrial2': 'trial2', 'BVMTtrial3': 'trial3',
                  'BVMTdelay': 'delay',
                  'BVMThits': 'hits', 'BVMTfalsepos': 'false_pos',
                  'Stroop.word': 'stroop_word',
                  'Stroop.color': 'stroop_color',
                  'Stroop.colorword': 'stroop_color_word',
                  'Sex': 'gender', 'Race': 'race',
                  'Education': 'education', 'Age': 'age'}

all_neuro.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Visit,Age,Sex,Race,Race_specify,Education,EngFluency,SpanFluency,MMSE,DigitBackwardSENAS,...,Professional,Homeowner,Householdsize,Earnings.Indiv,Income.Indiv,Earnings.House,Income.House,Income.House.Weighted1,Income.House.Weighted2,Effort
VisitDate,PatientID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2013-09-24,A0091,1.0,36,male,AA,,16,3.0,0.0,27.0,6.0,...,,,,,,,,,,1.0
2013-09-24,A0107,1.0,49,female,AA,,12,3.0,0.0,27.0,3.0,...,,,,,,,,,,
2013-09-24,A0138,1.0,53,female,AA,,11,3.0,0.0,,,...,,,,,,,,,,1.0
2013-10-03,A0165,1.0,62,male,AA,,12,3.0,0.0,29.0,5.0,...,,,,,,,,,,
2013-10-08,A0127,1.0,62,male,AA,,16,3.0,0.0,28.0,2.0,...,,,,,,,,,,


This system will be built by visualizations which match to specific neurological tests.
These tests will be the base of this system.
In the abstract, a test defines a set of measurement fields and how they relate.
Then it can generate visualization modules both the raw and normalized fields as needed.
A collection of these `TestCalculator` objects will be combined into a `Domain`.

In [6]:
import yaml

bvmt_test_definition = 'data/test_calculators/BVMT.yaml'
bvmt_calc = TestCalculator.from_config(yaml.full_load(open(bvmt_test_definition)))

heaton_norm_definition = 'data/norms/from_kate/heaton_bvmt.yaml'
heaton_bvmt_calc = TestCalculator.from_config(yaml.full_load(open(heaton_norm_definition)))

norman_scaling_definition = 'data/norms/norman/norman_scaling.yaml'
norman_scale = TestCalculator.from_config(yaml.full_load(open(norman_scaling_definition)))

norman_regression_definition = 'data/norms/norman/norman_bvmt_regnorm.yaml'
reg_calc = TestCalculator.from_config(yaml.full_load(open(norman_regression_definition)))

full_bvmt_calc = bvmt_calc + heaton_bvmt_calc + norman_scale + reg_calc




In [7]:
#export
from bokeh.transform import factor_cmap, factor_mark
from bokeh.models import BooleanFilter, CDSView, BoxAnnotation, Band, IndexFilter, BooleanFilter, FactorRange
from bokeh.models import Legend, LegendItem

from bokeh.models import ColumnDataSource, HoverTool, Range1d
from bokeh.plotting import figure
from bokeh.layouts import gridplot, layout
from bokeh.io import show

In [8]:
from bokeh.io import output_notebook, show
output_notebook()

Those tests will likely become more complicated and less "abstract" as we go.
But they're enough to represent each test and contain the information we need.

Now that we have a way of representing tests, lets merge them into domains and start visualizing.


In [9]:
#export

import os
from itertools import chain

class AbstractDomain(object):

    source = None
    data = None
    ranges = {}
    tools = "pan,wheel_zoom,box_zoom,reset,box_select,lasso_select"

    def __init__(self, calculator):
        """

        Parameters
        ----------
        calculator : TestCalculator
        """

        self.calculator = calculator

    def _load_data(self):
        pass

    def load_data(self, data, mapping=None):
        self.data = self.calculator.process_dataframe(data, mapping=mapping)
        self._load_data()
        self.source = ColumnDataSource(self.data)

    def build_scatter_fig(self, x = None, y = None,
                          fig = None, scatter_kwargs = None):

        x_range = self.ranges.get(x, Range1d(-10, 10))
        y_range = self.ranges.get(y, Range1d(-10, 10))
        default = {'x_range': x_range, 'y_range': y_range, 'tools': self.tools}
        if fig is None:
            fig = figure(**default)
        elif type(fig) == dict:
            fig = figure(**fig, **default)

        scatter_kwargs = {} if scatter_kwargs is None else scatter_kwargs
        fig.scatter(x = x, y = y, source = self.source,
                    **scatter_kwargs)
        fig.xaxis.axis_label = x
        fig.yaxis.axis_label = y

        self.ranges[x] = fig.x_range
        self.ranges[y] = fig.y_range

        return fig



class MemoryDomain(AbstractDomain):


    @staticmethod
    def from_defaults(root_data = 'data/'):

        bvmt_test_definition = os.path.join(root_data, 'test_calculators/BVMT.yaml')
        bvmt_calc = TestCalculator.from_config(yaml.full_load(open(bvmt_test_definition)))

        heaton_norm_definition = os.path.join(root_data, 'norms/from_kate/heaton_bvmt.yaml')
        heaton_bvmt_calc = TestCalculator.from_config(yaml.full_load(open(heaton_norm_definition)))

        norman_regression_definition = os.path.join(root_data, 'norms/norman/norman_bvmt_regnorm.yaml')
        reg_calc = TestCalculator.from_config(yaml.full_load(open(norman_regression_definition)))

        full_bvmt_calc = bvmt_calc + heaton_bvmt_calc + reg_calc

        return MemoryDomain(full_bvmt_calc)

In [16]:
mem_domain = MemoryDomain.from_defaults()
mem_domain.load_data(all_neuro, mapping=COLUMN_MAPPING)


In [17]:
FIG_SIZE = {'plot_height': 300,
            'plot_width': 300}

delay_immed_fig = mem_domain.build_scatter_fig(x = 'heaton_immediate',
                                           y = 'heaton_delay',
                                           fig=FIG_SIZE)

heaton_norman_immediate = mem_domain.build_scatter_fig(x = 'heaton_immediate',
                                                   y = 'norman_immediate',
                                                   fig=FIG_SIZE)

heaton_norman_delay = mem_domain.build_scatter_fig(x = 'heaton_delay',
                                               y = 'norman_delay',
                                               fig=FIG_SIZE)


show(layout([[delay_immed_fig],
             [heaton_norman_immediate, heaton_norman_delay]]))


In [18]:
cols = ['heaton_immediate', 'heaton_retention',
        'heaton_delay', 'heaton_recognition',
        'norman_immediate', 'norman_delay', 'age', 'gender', 'race', 'education']
mem_domain.data[cols].reset_index().to_excel('processed_memory.xlsx', index=False)

Beyond the memory domain there is also the Executive Function domain.
This seems to be mainly using the Trails Making Test and the Stroop Word Test.

Currently only the Stroop test is implemented using the Norman regression based norms.

In [21]:
#export

class ExecutiveFunctionDomain(AbstractDomain):


    @staticmethod
    def from_defaults(root_data = 'data/'):

        norman_regression_definition = os.path.join(root_data, 'norms/norman/norman_stroop_regnorm.yaml')
        reg_calc = TestCalculator.from_config(yaml.full_load(open(norman_regression_definition)))

        full_exec_calc = reg_calc

        return ExecutiveFunctionDomain(full_exec_calc)

In [26]:
ex_fun_domain = ExecutiveFunctionDomain.from_defaults()
ex_fun_domain.load_data(all_neuro, mapping=COLUMN_MAPPING)

ex_fun_domain.data.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,age,education,gender,race,stroop_color,stroop_color_word,stroop_word,norman_gender,norman_race,stroop_color_scaled,stroop_color_word_scaled,stroop_word_scaled,norman_stroop_color,norman_stroop_color_word,norman_stroop_word
VisitDate,PatientID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-02-27,B0551,58,20,male,AA,0.0,0.0,74.0,0.0,1.0,2.0,3.0,5.0,-2.405817,-1.663188,-1.418256
2020-02-27,B0742,41,11,male,AA,48.0,35.0,69.0,0.0,1.0,4.0,7.0,4.0,-1.24919,0.126202,-1.159039
2020-03-03,B0639,65,12,female,AA,60.0,36.0,99.0,1.0,1.0,6.0,8.0,9.0,-0.268558,0.894761,0.793249
2020-03-04,B0388,67,11,female,AA,68.0,35.0,80.0,1.0,1.0,8.0,7.0,6.0,,,
2020-03-05,B0303,58,11,male,AA,37.0,11.0,72.0,0.0,1.0,2.0,3.0,5.0,-1.357962,-0.724579,-0.280097


In [30]:

norman_color_color_word_fig = ex_fun_domain.build_scatter_fig(x = 'norman_stroop_color',
                                                           y = 'norman_stroop_color_word',
                                                           fig=FIG_SIZE)

norman_color__word_fig = ex_fun_domain.build_scatter_fig(x = 'norman_stroop_color',
                                                      y = 'norman_stroop_word',
                                                      fig=FIG_SIZE)

norman_color_word_word_fig = ex_fun_domain.build_scatter_fig(x = 'norman_stroop_color_word',
                                                          y = 'norman_stroop_word',
                                                          fig=FIG_SIZE)

show(layout([[norman_color_color_word_fig],
             [norman_color__word_fig, norman_color_word_word_fig]]))