In [1]:
import numpy as np
from bokeh.plotting import output_notebook, Figure, show
from bokeh.resources import INLINE
from bokeh.models import Column, ColumnDataSource, Range1d, Row, Circle, HoverTool, Div, Spacer
from time import time


np.set_printoptions(suppress=True, precision=3)
output_notebook(resources=INLINE)


In [2]:

def scatter_matrix(design, k_factors=None, width=None, height=None, margin=2, size=8, 
                   alpha=1, all_range=None, source=None, full=False):
    if k_factors is None:
        k_factors = len(design[0, :]) if type(design) is np.ndarray else len(design)
        k_factors = k_factors if k_factors <= 10 else 10
    
    width = 1000 if width is None else width
    width = width // k_factors - margin
    height = 800 if height is None else height
    height = height // k_factors - margin
    
    if source is None or k_factors > 2:
        fsource = ColumnDataSource(data=dict([(str(i), design[:, i]) for i in range(k_factors)]))

    colors = np.arange(k_factors, k_factors * 14 // 5)
    colors = np.vstack([np.random.choice(colors, 3) for i in range(k_factors)]) / k_factors / 2
    
    grey = (234, 234, 234)
    sgrey = np.array(grey) / 255
    
    if k_factors == 2:
        color = (colors[0, :] * colors[1, :] * 128).astype(int)
        scolor = color / 255
        color = tuple(color)
        md = (scolor - sgrey).sum()
        light_color = tuple(((scolor ** 4 ** md) * 255).astype(int))
        g = Figure(width=width * 2, height=height * 2, tools='box_select,wheel_zoom', toolbar_location=None)
        if source is not None:
            glyph = g.circle(x=design[0], y=design[1], source=source, color=color, line_color=None, size=size, alpha=alpha)
            
            glyph.selection_glyph = Circle(fill_alpha=np.sqrt(alpha), fill_color=color, line_color=None)
            glyph.nonselection_glyph = Circle(fill_alpha=1, fill_color='#EAEAEA', line_color=light_color,
                                              line_alpha=1, line_width=size/4)
        else:
            g.circle(x=str(0), y=str(1), source=fsource, color=color, line_color=None, size=size, alpha=alpha)
        
        if all_range is not None:
            g.y_range = Range1d(*all_range)
            g.x_range = Range1d(*all_range)
        return g

    rows = []
    for i in range(k_factors):
        row = []
        for j in range(k_factors):
            #if i == j:
            #    continue
            if i <= j or full:
                color = (colors[i, :] * colors[j, :] * 128).astype(int)
                scolor = color / 255
                color = tuple(color)
                md = (scolor - sgrey).sum()
                light_color = tuple(((scolor ** 4 ** md) * 255).astype(int))
                g = Figure(width=width, height=height, tools='box_select,wheel_zoom', toolbar_location=None)
                glyph = g.circle(x=str(j), y=str(i), source=fsource, color=color, line_color=None, size=size, alpha=alpha)
                glyph.selection_glyph = Circle(fill_alpha=np.sqrt(alpha), fill_color=color, line_color=None)
                glyph.nonselection_glyph = Circle(fill_alpha=1, fill_color='#EAEAEA', line_color=light_color,
                                                  line_alpha=1, line_width=size/4)

                g.xaxis.visible = False
                g.yaxis.visible = False
                if all_range is not None:
                    g.y_range = Range1d(*all_range)
                    g.x_range = Range1d(*all_range)
                row.append(g)
            else:
                row.append(Spacer(width=width))
        rows.append(Row(*row))

    return Column(*rows)

def cc_v_bb_comparison(n=4, width=800, alpha=0.6, size=10, face='ccf'):
    w = (width - 40) // 2
    h = width // 2
    cc = ccdesign(n, face=face)
    all_range = (-1.5, 1.5) if face is 'ccf' else None
    cc_g = scatter_matrix(cc, size=size, height=h, width=w, alpha=alpha, all_range=all_range)

    bb = bbdesign(n)
    bb_g = scatter_matrix(bb, size=size, height=h, width=w, alpha=alpha,  all_range=(-1.5, 1.5))

    inner_text = ('<h1 style="text-align:center;">{}</h1>'
                  + '<h3 style="text-align:center;margin-top:5px;color:#7C7C7C;">{:d} Factors : {:d} Experiments</h3>')

    return Column(Row(Div(width=w, height=40, text=inner_text.format('Central-Composite', n, len(cc))),
                      Spacer(width=40),
                      Div(width=w, height=40, text=inner_text.format('Box-Behnken', n, len(bb)))),
                  Row(cc_g, Spacer(width=40), bb_g))

# Experimental Design
## Situational Awareness
#### Josh McCrary

<h2 style="margin:3px;color:#444444;">Design of Experiments</h2>
<p style="margin:3px;margin-left:15px;">Abbreviated as <strong style="color:#444444;">DOE</strong>, refers to the concept of controlled and prescribed experimentation or a specific design created for a study. The concept is also referred to as <strong style="color:#444444;">Experimental Design</strong>.</p>

<h1 style="margin:3px;color:#003CD6;">$\mathbb{Contents}$</h1>
<ul>
    <li><a href="#overview" style="text-decoration-line:none;"><h4 style="margin:0px;">Overview</h4></a></li>
    <li><h4 style="margin:0px">Glossary</h4></li>
    <li><lh><h4 style="margin:0px">Designs</h4>
    <ul>
        <li><h4 style="margin:0px">Classic</h4></li>
        <li><h4 style="margin:0px">Quadratic</h4></li>
        <li><h4 style="margin:0px">Modern</h4></li>
        </ul></li>
</ul>

<h1 style="margin:3px;color:#444444;">Overview</h1><a id='overview'></a>

<h3 style="margin:3px;color:#000000;">Analysis</h3>

Core goal of analysis is to be able to accurately describe how some system, procedure, or policy behaves so that the knowledge is shareable. One must understand system behavior to be capable of accurately describing it. Need to experience it, collect information about it and learn how it works.


<h4 style="margin:3px;color:#003CD6;"> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $ \boxed{\mbox{statuses} \cup \mbox{results}} + \boxed{\mbox{learning}} \Rightarrow \mbox{Rules} $ </h4>

A generic representation of how we learn from analysis. Often analysis is performed with observational data, and useful models can be constructed which provide adequate accuracy and utility for understanding the system. However, observational studies are insufficient to prove causation since they do not isolate differences by identifying, tracking, and controlling the myriad of variables.

<h3 style="margin:3px;color:#000000;">Experimentation</h3>

Testing the system to obtain more information. Typically performed with a disciplined approach to isolate variation between experiments while tracking which states produce which results.

<h4 style="margin:3px;color:#003CD6;">  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $\mbox{status}_i \rightarrow \boxed{\mbox{Rules}} \rightarrow \mbox{result}_i$, for $i = 1, 2, 3, \ldots n$ </h4>

Rudimentary experimentation existed for many centuries, performing only a single experiment or only changing a single aspect of successive experiment statuses. Technology and scienctific knowledge was certainly a major factor as many things which affect by a variety of variables which had not yet been discovered let alone any methods existed at the time to measure or control. The primary factor which allowed a drastic increase in experiment complexity was an advance in statistics and being able to describe the statistical relationships between events

<h1 style="margin:3px;color:#444444;">Overview</h1>

<h3 style="margin:3px;color:#000000">Statistics</h3>
<h4 style="margin:2px;color:#003CD6;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Understand and quantify the relationships between joint variables and their interactions</h4>

Ronald A Fisher developed Design of Experiments techniques during the 1920s and 1930s at an agricultural research station north of London. With his expertise in statistics he was able to accurately draw conclusions from experiments with natural variation such as temperature and amount of precipitation. Now experiments involving several variables could be designed and the effect of each variable and interactions could be measured. Complexity of the experiments was still low as each additional variable in the design space would multiply the number of experiments and 

<h3 style="margin:3px;color:#000000">Computers</h3>
<h4 style="margin:2px;color:#003CD6;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Virtual experimentation envrionment and computational power expands the size of the design space</h4>

# Glossary

# Glossary
<h2 style="margin:3px">Parameter Terms</h2>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Factor</h3>
<p style="margin:3px;margin-left:15px;">Aspects of an experiment which an experimenter controls or measures. Similar terms for machine learning, data engineering, engineering, and statistics: <strong style="color:#444444;">Feature</strong>, <strong style="color:#444444;">Field</strong>, <strong style="color:#444444;">input parameters</strong>, or <strong style="color:#444444;">independant variables</strong></p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Level</h3>
<p style="margin:3px;margin-left:15px;">Value or state that a <strong style="color:#444444;">Factor</strong> can take</p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Experiment</h3>
<p style="margin:3px;margin-left:15px;">A specific collection of <strong style="color:#444444;">Levels</strong> for the <strong style="color:#444444;">Factors</strong> in an <strong style="color:#444444;">Design</strong>, i.e. $\mbox{status}_i$</p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Replication</h3>
<p style="margin:3px;margin-left:15px;">Repetition of an experiment $\mbox{status}_i$ to understand/mitigate process variability</p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Design</h3>
<p style="margin:3px;margin-left:15px;">A collection of <strong style="color:#444444;">Experiments</strong> that acheive some desired property or efficiency. i.e. $\{\mbox{status}_1, \mbox{status}_2, \mbox{status}_3, ... \mbox{status}_n\}$ </p>


# Glossary
<h2 style="margin:3px">Variable Terms</h2>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Nuisance Variable</h3>
<p style="margin:3px;margin-left:15px;">Some uncontrollable component of the experiment which affects the outcome, i.e. things that cause different results when repeating a controlled experiment $\mbox{status}_i$</p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Known Nuisance Variable</h3>
<p style="margin:3px;margin-left:15px;">An uncontrollable component of the experiment which is measured and accounted for, recorded in $\mbox{status}_i$, in an experiment, i.e. outdoor temperature</p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Unknown Nuisance Variable</h3>
<p style="margin:3px;margin-left:15px;">An uncontrollable component of the experiment which cannot be measured for an experiment, i.e. randomness, measurement error</p>

# Glossary
<h2 style="margin:3px">Result Terms</h2>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Response</h3>
<p style="margin:3px;margin-left:15px;">The output or <strong style="color:#444444;">dependent variable</strong>, consider as a key metric used to measures some aspect of performance for a system</p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Effect</h3>
<p style="margin:3px;margin-left:15px;">Refers to the impact a <strong style="color:#444444;">level</strong> change of a <strong style="color:#444444;">factor</strong> or combination of <strong style="color:#444444;">factors</strong> has on the <strong style="color:#444444;">response</strong></p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">Main Effect</h3>
<p style="margin:3px;margin-left:15px;">The impact a <strong style="color:#444444;">level</strong> change of one <strong style="color:#444444;">factor</strong> has on the <strong style="color:#444444;">response</strong></p>

<h3 style="margin:3px;margin-top:9px;color:#444444;">($n^{th}$ Order) Interaction (Effect)</h3>
<p style="margin:3px;margin-left:15px;">The impact that <strong style="color:#444444;">level</strong> changes within the combination of $n$ <strong style="color:#444444;">factors</strong> has on the <strong style="color:#444444;">response</strong></p>



# Designs

# Designs - Classic
<h4 style="margin:2px;color:#003CD6;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Virtual experimentation envrionment and computational power expands the size of the design space</h4>

# Designs - Classic
<h3 style="margin:3px;margin-top:9px;color:#444444;">Factorial</h3>

In [4]:
from experimentspydesign import ff2n, fullfact, lhs, ccdesign, bbdesign

In [5]:
des = ff2n(6)
g = scatter_matrix(des, width=600, height=400, alpha=0.5, size=8, all_range=(-1.5, 1.5))

In [6]:
show(g)

In [18]:
w = 400
f2 = [2, 2, 2, 2]
d2 = fullfact(f2)
f3 = [3, 3, 3, 3]
d3 = fullfact(f3)
g2 = scatter_matrix(d2, width=w, height=350, alpha=0.8, size=10, all_range=(-1.5, 1.5))
g3 = scatter_matrix(d3, width=w, height=350, alpha=0.8, size=10, all_range=(-1.5, 1.5))

inner_text = ('<h1 style="text-align:center;">{}</h1>'
              + '<h3 style="text-align:center;margin-top:5px;color:#7C7C7C;">{:d} Factors : {:d} Experiments</h3>')

c2 = Column(Div(width=w, height=40, text=inner_text.format('2^{:d} Full Factorial'.format(len(f2)), len(f2), len(d2))), g2)
c3 = Column(Div(width=w, height=40, text=inner_text.format('3^{:d} Full Factorial'.format(len(f3)), len(f3), len(d3))), g3)

In [19]:
show(Row(c2, c3))

# Designs - Classic
<h3 style="margin:3px;margin-top:9px;color:#444444;">Latin Square</h3>

In [30]:
w = 400
n = 5
l5 = lhs(n, n_samples=5)
f5 = [5, 5, 5, 5, 5]
d5 = fullfact(f5)
gl5 = scatter_matrix(l5, width=w, height=350, alpha=0.9, size=8, all_range=(-1.5, 1.5))
gf5 = scatter_matrix(d5, width=w, height=350, alpha=0.5, size=8, all_range=(-1.5, 1.5))

inner_text = ('<h1 style="text-align:center;">{}</h1>'
              + '<h3 style="text-align:center;margin-top:5px;color:#7C7C7C;">{:d} Factors : {:d} Experiments</h3>')

cl5 = Column(Div(width=w, height=40, text=inner_text.format('{:d} x {:d} Latin Square'.format(n, n), 5, len(l5))), gl5)
cf5 = Column(Div(width=w, height=40, text=inner_text.format('5^{:d} Full Factorial'.format(len(f5)), len(f5), len(d5))), gf5)

In [31]:
show(Row(cl5, cf5))

In [20]:
help(lhs)

Help on function lhs in module experimentspydesign.designs:

lhs(*design, n_samples=None, def_scale='traditional')
    Randomly combine 'n_samples' evenly spaced values for each factor with
    other factor values to create a space filling design
    
    :param design: definitions of factors' levels in design, factors defined with an int will be
        scaled per the method selected with the def_scale argument
    :param n_samples: int, number of sample values to create
    :param def_scale: str, paradigm for scaling factors that don't have values defined
            'traditional' : scale output to [-1, 1]
            'standard' : scale output to [0, 1]
            'level_n' : scale output to [1, n]
    :return: numpy.ndarray, a table with experiment values for each factor



# Designs - Quadratic
<h4 style="margin:2px;color:#003CD6;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Virtual experimentation envrionment and computational power expands the size of the design space</h4>

# Designs - Quadratic
<h3 style="margin:3px;margin-top:9px;color:#444444;">Central Composite</h3>

# Designs - Quadratic
<h3 style="margin:3px;margin-top:9px;color:#444444;">Box-Benkhen</h3>

# Designs - Modern
<h4 style="margin:2px;color:#003CD6;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Virtual experimentation envrionment and computational power expands the size of the design space</h4>

# Designs - Modern
<h3 style="margin:3px;margin-top:9px;color:#444444;">Latin Hyper Cube</h3>

# Designs - Modern
<h3 style="margin:3px;margin-top:9px;color:#444444;">Space-Filling</h3>

# End
$$ \boxed{{\mbox{actions}}\over{\mbox{results}}} \rightarrow \boxed{\mbox{learning}} \Rightarrow \mbox{Rules} $$

In [3]:
from experimentspydesign import ff2n, fullfact, bbdesign, ccdesign, lhs

In [4]:
n_factors = np.arange(19) + 3
bb_n = []
cc_n = []
bb_t = []
cc_t = []
for k in n_factors:
    t0 = time()
    bb = bbdesign(int(k))
    t1 = time()
    ccc = ccdesign(int(k))
    t2 = time()
    bb_n.append(len(bb))
    #print('Box-Behnken, {:d} factors create {:d} experiment combinations'.format(*bb.shape[::-1]))
    bb_t.append(t1 - t0)
    #print('  time elapsed for {:d} factors:'.format(k), bb_t[-1])
    cc_n.append(len(ccc))
    #print('Central Composite, {:d} factors create {:d} experiment combinations'.format(*ccc.shape[::-1]))
    cc_t.append(t2 - t1)
    #print('  time elapsed for {:d} factors:'.format(k), cc_t[-1])
    print('created both designs for {:d} factors in {:8.5f} seconds'.format(k, t2-t0))
bb_n = np.array(bb_n)
cc_n = np.array(cc_n)

created both designs for 3 factors in  0.00200 seconds
created both designs for 4 factors in  0.00100 seconds
created both designs for 5 factors in  0.00201 seconds
created both designs for 6 factors in  0.00201 seconds
created both designs for 7 factors in  0.00201 seconds
created both designs for 8 factors in  0.00301 seconds
created both designs for 9 factors in  0.00301 seconds
created both designs for 10 factors in  0.00401 seconds
created both designs for 11 factors in  0.00401 seconds
created both designs for 12 factors in  0.00902 seconds
created both designs for 13 factors in  0.01407 seconds
created both designs for 14 factors in  0.03106 seconds
created both designs for 15 factors in  0.04780 seconds
created both designs for 16 factors in  0.16803 seconds
created both designs for 17 factors in  0.31927 seconds
created both designs for 18 factors in  0.69759 seconds
created both designs for 19 factors in  1.48408 seconds
created both designs for 20 factors in  4.44838 seconds

In [5]:
source = ColumnDataSource(data={'k':n_factors[:22], 'bb':bb_n, 'cc':cc_n, 'ratio':np.round(bb_n / cc_n, decimals=3)})
f = Figure(width=700, height=180, tools='box_select', toolbar_location=None)
f.circle('k', 'bb', color='orange', size=6, source=source, legend='Box-Behnken', alpha=0.7)
f.circle('k', 'cc', size=6, source=source, legend='Central-Composite', alpha=0.7)
f.add_tools(HoverTool(tooltips=[('k factors', '@k'), ('bb', '@bb'), ('cc', '@cc'), ('ratio', '@ratio')]))
f.xaxis.axis_label = 'Number of factors'
f.yaxis.axis_label = 'Number of experiments'
f.legend.location = 'top_left'

width = 220
height = 180
size = 12

bb = bbdesign(3, 3, 3, n_centers=4)

bbsource = ColumnDataSource(data={'1':bb[:, 0], '2':bb[:, 1], '3':bb[:, 2]})
bb_12 = scatter_matrix(('1', '2'), width=width + 40, height=height, size=size, alpha=0.5, source=bbsource)
bb_12.title.text = 'Box-Behnken_12'
bb_13 = scatter_matrix(('1', '3'), width=width, height=height, size=size, alpha=0.5, source=bbsource)
bb_13.title.text = 'Box-Behnken_13'
bb_23 = scatter_matrix(('2', '3'), width=width, height=height, size=size, alpha=0.5, source=bbsource)
bb_23.title.text = 'Box-Behnken_23'

ccc = ccdesign(3, 3, 3, face='ccc') # default face paradigm
cci = ccdesign(3, 3, 3, face='cci')
ccf = ccdesign(3, 3, 3, face='ccf')

ccsource = ColumnDataSource(data={'ccc_x':ccc[:, 0],
                                  'ccc_y':ccc[:, 1],
                                  'cci_x':cci[:, 0],
                                  'cci_y':cci[:, 1],
                                  'ccf_x':ccf[:, 0],
                                  'ccf_y':ccf[:, 1]})
ccc_g = scatter_matrix(('ccc_x', 'ccc_y'), width=width + 40, height=height, size=size, alpha=0.5, source=ccsource)
ccc_g.title.text = 'CC-Circumscribed_12'
cci_g = scatter_matrix(('cci_x', 'cci_y'), width=width, height=height, size=size, alpha=0.5, source=ccsource)
cci_g.title.text = 'CC-Inscribed_12'
ccf_g = scatter_matrix(('ccf_x', 'ccf_y'), width=width, height=height, size=size, alpha=0.5, source=ccsource)
ccf_g.title.text = 'CC-Face_12'

for p in [bb_12, bb_13, bb_23, ccc_g, cci_g, ccf_g]:
    p.x_range = Range1d(-1.8, 1.8)
    p.y_range = Range1d(-1.8, 1.8)

# ff2n
#### Only 2 levels for each factor
A design with every combination of each factor's high and low values, $ 2 ^ k $ experiments where $k$ is the number of factors.

In [11]:
switch = True
factors = ([3, 4], [.001, .01, .1, 1.0], [42, 1138]) if switch else 3
des = ff2n(factors, def_scale='level_n')
print(len(des), 'experiments:')
print(des)

8 experiments:
[[    3.        0.001    42.   ]
 [    4.        0.001    42.   ]
 [    3.        1.       42.   ]
 [    4.        1.       42.   ]
 [    3.        0.001  1138.   ]
 [    4.        0.001  1138.   ]
 [    3.        1.     1138.   ]
 [    4.        1.     1138.   ]]


# fullfact
#### Impossible to define a full factorial design with just the number of factors
To make a full factorial design, the minimum information needed is the number of levels each factor will take.
#### Number of levels for each factor is the length of the iterable or the integer value passed for the factor
Every combination of each factor's defined levels, $\prod _{i=1}^{k} n_i $ experiments, where $ n_i $ is the number of levels for factor $ i $.

In [7]:
f_n_levels = [3, 2, 4]
factors = ([3, 42, 1138], [.001, .01, .1, 1.0])
des = fullfact(f_n_levels, def_scale='level_n')
print(len(des), 'experiments:')
print(des)

24 experiments:
[[1 1 1]
 [2 1 1]
 [3 1 1]
 [1 2 1]
 [2 2 1]
 [3 2 1]
 [1 1 2]
 [2 1 2]
 [3 1 2]
 [1 2 2]
 [2 2 2]
 [3 2 2]
 [1 1 3]
 [2 1 3]
 [3 1 3]
 [1 2 3]
 [2 2 3]
 [3 2 3]
 [1 1 4]
 [2 1 4]
 [3 1 4]
 [1 2 4]
 [2 2 4]
 [3 2 4]]


# ccdesign
## Central Composite

Design used for quadratic models of a response. Expands 2-level full factorial designs by adding low and high *star points for each factor in the 'center' of each face on the $k$ dimensional hypercube of the design space.

Central Composite designs have $ 2^k + 2k + n_c $ experiments, where $n_c \geq 1$ is a number of additional points added at the origin for the purpose of balancing variance.

## Star point methods
#### Circumscribed
The *star points which are added to the ff2n design are scaled outside of the factor's level range to maintain rotatability or some amount of orthogonality in the design. 

#### Inscribed
The *star points are placed on the center of the face of each side of the k-dimensional cube and the corners are scaled down to maintain rotatability or some amount of orthogonality in the design.

#### Face
The *star points are placed on the center of the face of each side of the k-dimensional cube and the corners retain their low and high values, this design is not rotatable nor orthogonal

In [8]:
ccc = ccdesign(3, 3, 3, face='ccc', def_scale='traditional') # default face paradigm
cci = ccdesign(3, 3, 3, face='cci')
ccf = ccdesign(3, 3, 3, face='ccf')
print(len(ccc), 'experiments:')
print(ccc)

19 experiments:
[[-1.    -1.    -1.   ]
 [ 1.    -1.    -1.   ]
 [-1.     1.    -1.   ]
 [ 1.     1.    -1.   ]
 [-1.    -1.     1.   ]
 [ 1.    -1.     1.   ]
 [-1.     1.     1.   ]
 [ 1.     1.     1.   ]
 [ 1.682  0.     0.   ]
 [ 0.     1.682  0.   ]
 [ 0.     0.     1.682]
 [-1.682  0.     0.   ]
 [ 0.    -1.682  0.   ]
 [ 0.     0.    -1.682]
 [ 0.     0.     0.   ]
 [ 0.     0.     0.   ]
 [ 0.     0.     0.   ]
 [ 0.     0.     0.   ]
 [ 0.     0.     0.   ]]


# bbdesign
## Box-Behnken

Similar to central composite design. Used for quadratic models of response variables, but is more efficient for larger number of factors and maintains good rotatability.

In [9]:
bb = bbdesign(3, 3, 9, n_centers=4)
print(len(bb), 'experiments:')
print(bb[:15, :])

16 experiments:
[[-1 -1  0]
 [ 1 -1  0]
 [-1  1  0]
 [ 1  1  0]
 [-1  0 -1]
 [ 1  0 -1]
 [-1  0  1]
 [ 1  0  1]
 [ 0 -1 -1]
 [ 0  1 -1]
 [ 0 -1  1]
 [ 0  1  1]
 [ 0  0  0]
 [ 0  0  0]
 [ 0  0  0]]


# 3 Factors: bbdesign vs ccdesign

In [12]:
show(Column(Row(bb_12, bb_13, bb_23), Row(ccc_g, cci_g, ccf_g), f))

In [12]:
cc_v_bb = cc_v_bb_comparison(n=7, width=900, size=9, alpha=0.5)
show(cc_v_bb)

# lhs 
## Latin Hypercube Sampling
Choose the number of experiments in the design $ n $, each factor will have $ n $ levels evenly spaced from the low to high levels. 

If a factor has the same number of levels, $ n $, explicitly defined it will have those values in the final design rather than a linear spacing across the low to high values.

In [13]:
n = 14
factors = (1, [3.2, 8.7], [(i - 6) * abs(i - 6) for i in range(n)]) if np.mod(n, 2) == 0 else 3
des = lhs(factors, n_samples=n, def_scale='standard')
print(len(des), 'experiments:')
print(des)

14 experiments:
[[  0.308   8.7     4.   ]
 [  0.154   5.315  49.   ]
 [  0.231   4.046  -9.   ]
 [  0.923   7.008  -1.   ]
 [  0.615   8.277 -36.   ]
 [  0.692   4.892  25.   ]
 [  1.      5.738  -4.   ]
 [  0.538   7.431  16.   ]
 [  0.      3.2    36.   ]
 [  0.462   7.854   1.   ]
 [  0.769   6.585   9.   ]
 [  0.077   6.162   0.   ]
 [  0.846   4.469 -16.   ]
 [  0.385   3.623 -25.   ]]


In [14]:
des = lhs([2] * 7, n_samples=149)
des_g = scatter_matrix(des, size=6, height=600, width=800)
show(des_g)

In [19]:
f_n_levels = (2, 3, 4, 5) if False else (4, 4, 4, 4)
des = fullfact(f_n_levels)
g = scatter_matrix(des, width=600, height=400, alpha=0.5, size=12, all_range=(-1.5, 1.5))
print('{} full factorial {:d} experiments'.format(str(f_n_levels), len(des)))
show(g)

(4, 4, 4, 4) full factorial 256 experiments


# Factor Classes and a Design

In [20]:
from experimentspydesign import Design, FactorDiscrete, FactorContinuous

## Creating a Factor

In [21]:
f0 = FactorDiscrete(3, 14, 42, 51)
print(f0)
f1 = FactorContinuous(3.2, 8.7, n_levels=3, name='richter')
print(f1)
f2 = FactorDiscrete(40, 60, n_levels=4, name='speed')

   factor_01
0  3 
1  14
2  42
3  51
        richter      
0    [3.2, 5.03333]  
1  [5.03333, 6.86667]
2    [6.86667, 8.7]  


In [22]:
m = 5
print(f0[0], 'a', type(f0[0]).__name__, 'assumes the values:', [f0[0].value for i in range(m)], '{:d} times'.format(m))
print(f1[0], 'a', type(f1[0]).__name__, 'assumes the values:', [f1[0].value for i in range(m)], '{:d} times'.format(m))
print(f2[0], 'a', type(f2[0]).__name__, 'assumes the values:', [f2[0].value for i in range(m)], '{:d} times'.format(m))

3 a LevelDiscrete assumes the values: [3, 3, 3, 3, 3] 5 times
[3.2, 5.03333] a LevelContinuous assumes the values: [4.287226480492742, 5.01752030396413, 3.3579797819104154, 3.7798461500609504, 3.6268818600234174] 5 times
[40, 44] a LevelDiscrete assumes the values: [40, 41, 41, 43, 41] 5 times


In [31]:
print(type(f2.levels).__name__)
print(f2.levels)
print(f2.levels)

FormattedDict
   speed
0  41
1  45
2  51
3  59
   speed
0  40
1  46
2  53
3  56


## Creating a Design

The `Design` class is a simple dictionary subclass to collect factor definitions and has a helper interface to automatically pass key names to the name attribute of the Factor instance:

In [34]:
design = Design()
f0 = FactorDiscrete(3, 14, 42, 51)
print(f0.name)
design['distance'] = f0 # variable previously assigned without being named
print(f0)
design['stealth'] = FactorContinuous(5, 10, n_levels=5)
print(design['stealth'])

factor_05
   distance
0  3 
1  14
2  42
3  51
     stealth  
0  [5.0, 6.0] 
1  [6.0, 7.0] 
2  [7.0, 8.0] 
3  [8.0, 9.0] 
4  [9.0, 10.0]


# Explicit Designs

When the number of levels in a Factor definition matches the number of levels in the design table then the values in the table will inherit the Factor's level values.

This is most apparent on full factorial designs since by definition the Factors take on each of their defined levels.

The other one which is a bit more tricky is latin hypercubes, for the number of experiments in the design create each of the Factors with the same number of levels to get specific level values.

Without the explicit definitions the values in the raw design table are evenly (linearly) spaced between low and high values

In [35]:
f_n_levels = (2, 3, 4, 
              FactorContinuous((np.arange(1, 5) ** 2 / 8 - 1).tolist()),
              FactorContinuous((np.arange(-2, 3) ** 3 / 8).tolist()))
des = fullfact(f_n_levels)
g = scatter_matrix(des, width=600, height=400, alpha=0.5, size=8, all_range=(-1.5, 1.5))
print('(2, 3, 4, 4, 5) full factorial {:d} experiments'.format(len(des)))
show(g)

(2, 3, 4, 4, 5) full factorial 480 experiments


In [36]:
design[f2.name] = f2
print(design)


distance                 distance
0  3 
1  14
2  42
3  51            
stealth       stealth  
0  [5.0, 6.0] 
1  [6.0, 7.0] 
2  [7.0, 8.0] 
3  [8.0, 9.0] 
4  [9.0, 10.0]
speed      speed  
0  [40, 44]
1  [45, 49]
2  [50, 54]
3  [55, 60]


In [37]:
ff = fullfact(design)
ffg = scatter_matrix(ff, width=400, height=400, alpha=0.5)
print('(4, 5, 4) full factorial {:d} experiments'.format(len(ff)))
show(ffg)

(4, 5, 4) full factorial 80 experiments


In [38]:
n = 11
rdesign = Design()
rdesign['step0'] = FactorContinuous(np.linspace(-1, 1, n).tolist()) 
rdesign['step1'] = FactorContinuous(np.linspace(0, 1, n).tolist())
rdesign['rand0'] = FactorContinuous(-1, 1, n_levels=n)
rdesign['rand1'] = FactorContinuous(0, 1, n_levels=n)

In [39]:
h = lhs(rdesign, n_samples=11)
hg = scatter_matrix(h, width=500, height=400, all_range=(-1.3, 1.3))
show(hg)

In [121]:
f1 = FactorDiscrete(1, 10, 100)
f2 = FactorContinuous(4, 4.5, n_levels=5)
fc = f1 * f2

In [122]:
f

    factor_05__factor_06
 0   [1, [4.0, 4.1]] 
 1   [1, [4.1, 4.2]] 
 2   [1, [4.2, 4.3]] 
 3   [1, [4.3, 4.4]] 
 4   [1, [4.4, 4.5]] 
 5  [10, [4.0, 4.1]] 
 6  [10, [4.1, 4.2]] 
 7  [10, [4.2, 4.3]] 
 8  [10, [4.3, 4.4]] 
 9  [10, [4.4, 4.5]] 
10  [100, [4.0, 4.1]]
11  [100, [4.1, 4.2]]
12  [100, [4.2, 4.3]]
13  [100, [4.3, 4.4]]
14  [100, [4.4, 4.5]]

In [125]:
fc.levels

      factor_05__factor_06   
 0   [1, 4.088829427080821]  
 1   [1, 4.174518646693514]  
 2   [1, 4.268858671675487]  
 3   [1, 4.345211225302373]  
 4   [1, 4.413538003091516]  
 5   [10, 4.073430361751926] 
 6  [10, 4.1988110622017745] 
 7  [10, 4.2459001024504985] 
 8   [10, 4.399021208200122] 
 9   [10, 4.45732096367716]  
10  [100, 4.017267396653594] 
11  [100, 4.163947751643164] 
12  [100, 4.259446064747849] 
13  [100, 4.306620463504838] 
14  [100, 4.4587390095399755]

In [40]:
flower = FactorDiscrete([i * 10 + 30 for i in range(6)], name='lower') # 30 to 80 by 10
fupper = FactorDiscrete([i * 10 + 40 for i in range(6)], name='upper') # 40 to 90 by 10
finterval = flower < fupper
finterval

    lower__upper
 0  [30, 40]
 1  [30, 50]
 2  [30, 60]
 3  [30, 70]
 4  [30, 80]
 5  [30, 90]
 6  [40, 50]
 7  [40, 60]
 8  [40, 70]
 9  [40, 80]
10  [40, 90]
11  [50, 60]
12  [50, 70]
13  [50, 80]
14  [50, 90]
15  [60, 70]
16  [60, 80]
17  [60, 90]
18  [70, 80]
19  [70, 90]
20  [80, 90]

In [41]:
rlower = FactorDiscrete(30, 79, n_levels=5, name='lower')
rupper = FactorDiscrete(40, 89, n_levels=5, name='upper')
rinterval = rlower < rupper
rinterval

        lower__upper    
 0  [[30, 39], [40, 49]]
 1  [[30, 39], [50, 59]]
 2  [[30, 39], [60, 69]]
 3  [[30, 39], [70, 79]]
 4  [[30, 39], [80, 89]]
 5  [[40, 49], [50, 59]]
 6  [[40, 49], [60, 69]]
 7  [[40, 49], [70, 79]]
 8  [[40, 49], [80, 89]]
 9  [[50, 59], [60, 69]]
10  [[50, 59], [70, 79]]
11  [[50, 59], [80, 89]]
12  [[60, 69], [70, 79]]
13  [[60, 69], [80, 89]]
14  [[70, 79], [80, 89]]

In [46]:
rinterval.levels

    lower__upper
 0  [39, 43]
 1  [35, 54]
 2  [39, 61]
 3  [36, 72]
 4  [33, 85]
 5  [43, 57]
 6  [46, 60]
 7  [44, 77]
 8  [47, 88]
 9  [52, 61]
10  [56, 73]
11  [54, 89]
12  [64, 76]
13  [69, 86]
14  [74, 87]

# Indices for half of a matrix

In [47]:
[(i, j) for i in range(5) for j in range(5) if j >= i]

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (2, 2),
 (2, 3),
 (2, 4),
 (3, 3),
 (3, 4),
 (4, 4)]

In [24]:
mydesign = Design()
mydesign['hardness'] = FactorDiscrete(4, 9)
mydesign['richter'] = FactorContinuous(3.2, 8.7)
mydesign['pressure'] = FactorContinuous(2800, 3300)
mydesign


hardness        hardness
0  4
1  9    
 richter       richter
0  3.2
1  8.7  
pressure     pressure
0  2800
1  3300 

In [25]:
lhs(mydesign, n_samples=20)

array([[    9.   ,     8.7  ,  3221.053],
       [    7.   ,     3.489,  3115.789],
       [    5.   ,     8.411,  3063.158],
       [    7.   ,     3.2  ,  3010.526],
       [    6.   ,     7.253,  3142.105],
       [    8.   ,     4.647,  2957.895],
       [    6.   ,     7.832,  3168.421],
       [    5.   ,     4.937,  3247.368],
       [    8.   ,     5.516,  2878.947],
       [    4.   ,     5.805,  3036.842],
       [    8.   ,     6.384,  2984.211],
       [    9.   ,     4.358,  3089.474],
       [    6.   ,     6.674,  2931.579],
       [    5.   ,     5.226,  2826.316],
       [    8.   ,     4.068,  3273.684],
       [    5.   ,     7.542,  3194.737],
       [    7.   ,     8.121,  2852.632],
       [    4.   ,     6.095,  3300.   ],
       [    7.   ,     6.963,  2800.   ],
       [    6.   ,     3.779,  2905.263]])

In [3]:
from nltk.corpus import wordnet as wn

In [4]:
res = wn.synsets('wolf')
wolf = res[0]

In [8]:
wolf.common_hypernyms(wn.synset('dog.n.01'))

[Synset('carnivore.n.01'),
 Synset('chordate.n.01'),
 Synset('vertebrate.n.01'),
 Synset('canine.n.02'),
 Synset('whole.n.02'),
 Synset('entity.n.01'),
 Synset('animal.n.01'),
 Synset('organism.n.01'),
 Synset('mammal.n.01'),
 Synset('object.n.01'),
 Synset('living_thing.n.01'),
 Synset('physical_entity.n.01'),
 Synset('placental.n.01')]

In [10]:
wn.synsets('policy')

[Synset('policy.n.01'), Synset('policy.n.02'), Synset('policy.n.03')]

In [12]:
wolf.definition()

'any of various predatory carnivorous canine mammals of North America and Eurasia that usually hunt in packs'

In [13]:
policies = wn.synsets('policy')
for p in policies:
    print(p.definition(), '\n')

a plan of action adopted by an individual or social group 

a line of argument rationalizing the course of action of a government 

written contract or certificate of insurance 



In [15]:
procedures = wn.synsets('procedure')
for p in procedures:
    print(p.definition(), '\n')

a particular course of action intended to achieve a result 

a process or series of acts especially of a practical or mechanical nature involved in a particular form of work 

a set sequence of steps, part of larger computer program 

a mode of conducting legal and parliamentary proceedings 



In [22]:
for p in procedures:
    print(p.name())
    print(p.definition(), '\n')

procedure.n.01
a particular course of action intended to achieve a result 

operation.n.07
a process or series of acts especially of a practical or mechanical nature involved in a particular form of work 

routine.n.03
a set sequence of steps, part of larger computer program 

procedure.n.04
a mode of conducting legal and parliamentary proceedings 



In [27]:
wn.synset('procedure.n.03').lemmas()

AttributeError: 'Synset' object has no attribute 'lemma'

In [21]:
for policy in policies:
    for procedure in procedures:
        print(policy.name(), 'v', procedure.name())
        for h in policy.common_hypernyms(procedure):
            print(h.name())
        print('\n')

policy.n.01 v procedure.n.01
psychological_feature.n.01
entity.n.01
abstraction.n.06


policy.n.01 v operation.n.07
psychological_feature.n.01
entity.n.01
abstraction.n.06


policy.n.01 v routine.n.03
entity.n.01
abstraction.n.06


policy.n.01 v procedure.n.04
psychological_feature.n.01
entity.n.01
abstraction.n.06


policy.n.02 v procedure.n.01
psychological_feature.n.01
entity.n.01
abstraction.n.06


policy.n.02 v operation.n.07
psychological_feature.n.01
entity.n.01
abstraction.n.06


policy.n.02 v routine.n.03
entity.n.01
abstraction.n.06


policy.n.02 v procedure.n.04
psychological_feature.n.01
entity.n.01
abstraction.n.06


policy.n.03 v procedure.n.01
entity.n.01
abstraction.n.06


policy.n.03 v operation.n.07
entity.n.01
abstraction.n.06


policy.n.03 v routine.n.03
written_communication.n.01
entity.n.01
communication.n.02
abstraction.n.06


policy.n.03 v procedure.n.04
entity.n.01
abstraction.n.06




In [31]:
print(wolf.tree)

[Synset('entity.n.01')]


In [32]:
help(wolf.tree)

Help on method tree in module nltk.corpus.reader.wordnet:

tree(rel, depth=-1, cut_mark=None) method of nltk.corpus.reader.wordnet.Synset instance
    >>> from nltk.corpus import wordnet as wn
    >>> dog = wn.synset('dog.n.01')
    >>> hyp = lambda s:s.hypernyms()
    >>> from pprint import pprint
    >>> pprint(dog.tree(hyp))
    [Synset('dog.n.01'),
     [Synset('canine.n.02'),
      [Synset('carnivore.n.01'),
       [Synset('placental.n.01'),
        [Synset('mammal.n.01'),
         [Synset('vertebrate.n.01'),
          [Synset('chordate.n.01'),
           [Synset('animal.n.01'),
            [Synset('organism.n.01'),
             [Synset('living_thing.n.01'),
              [Synset('whole.n.02'),
               [Synset('object.n.01'),
                [Synset('physical_entity.n.01'),
                 [Synset('entity.n.01')]]]]]]]]]]]]],
     [Synset('domestic_animal.n.01'),
      [Synset('animal.n.01'),
       [Synset('organism.n.01'),
        [Synset('living_thing.n.01'),
         [

In [33]:
wolf.hypernyms()

[Synset('canine.n.02')]

In [34]:
wolf.tree(lambda h: h.hypernyms())

[Synset('wolf.n.01'),
 [Synset('canine.n.02'),
  [Synset('carnivore.n.01'),
   [Synset('placental.n.01'),
    [Synset('mammal.n.01'),
     [Synset('vertebrate.n.01'),
      [Synset('chordate.n.01'),
       [Synset('animal.n.01'),
        [Synset('organism.n.01'),
         [Synset('living_thing.n.01'),
          [Synset('whole.n.02'),
           [Synset('object.n.01'),
            [Synset('physical_entity.n.01'),
             [Synset('entity.n.01')]]]]]]]]]]]]]]

In [40]:
hyp = lambda h: h.hypernyms()

In [41]:
for p in procedures:
    print(p.tree(hyp), '\n')

[Synset('procedure.n.01'), [Synset('activity.n.01'), [Synset('act.n.02'), [Synset('event.n.01'), [Synset('psychological_feature.n.01'), [Synset('abstraction.n.06'), [Synset('entity.n.01')]]]]]]] 

[Synset('operation.n.07'), [Synset('work.n.01'), [Synset('activity.n.01'), [Synset('act.n.02'), [Synset('event.n.01'), [Synset('psychological_feature.n.01'), [Synset('abstraction.n.06'), [Synset('entity.n.01')]]]]]]]] 

[Synset('routine.n.03'), [Synset('software.n.01'), [Synset('code.n.03'), [Synset('coding_system.n.01'), [Synset('writing.n.04'), [Synset('written_communication.n.01'), [Synset('communication.n.02'), [Synset('abstraction.n.06'), [Synset('entity.n.01')]]]]]]]]] 

[Synset('procedure.n.04'), [Synset('proceeding.n.01'), [Synset('due_process.n.01'), [Synset('group_action.n.01'), [Synset('act.n.02'), [Synset('event.n.01'), [Synset('psychological_feature.n.01'), [Synset('abstraction.n.06'), [Synset('entity.n.01')]]]]], [Synset('event.n.01'), [Synset('psychological_feature.n.01'), [Syn

In [37]:
for p in policies:
    print(p.tree(hyp), '\n')

[Synset('policy.n.01'), [Synset('plan_of_action.n.01'), [Synset('plan.n.01'), [Synset('idea.n.01'), [Synset('content.n.05'), [Synset('cognition.n.01'), [Synset('psychological_feature.n.01'), [Synset('abstraction.n.06'), [Synset('entity.n.01')]]]]]]]]] 

[Synset('policy.n.02'), [Synset('argumentation.n.02'), [Synset('reasoning.n.01'), [Synset('thinking.n.01'), [Synset('higher_cognitive_process.n.01'), [Synset('process.n.02'), [Synset('cognition.n.01'), [Synset('psychological_feature.n.01'), [Synset('abstraction.n.06'), [Synset('entity.n.01')]]]]]]]]]] 

[Synset('policy.n.03'), [Synset('contract.n.01'), [Synset('written_agreement.n.01'), [Synset('agreement.n.01'), [Synset('statement.n.01'), [Synset('message.n.02'), [Synset('communication.n.02'), [Synset('abstraction.n.06'), [Synset('entity.n.01')]]]]]], [Synset('legal_document.n.01'), [Synset('document.n.01'), [Synset('writing.n.02'), [Synset('written_communication.n.01'), [Synset('communication.n.02'), [Synset('abstraction.n.06'), [Syns