Tests:
- Amplitudes 
- Con prob
- no reciprocal
- distance dependent
- down larger than up

Connectome types:

1. ER control with same connection probability
2. ER control with same connection probability and more reciprocal connections
2. Added reciprocal connections
3. Symmetrized
4. Aplitude boosted
5. Added connections




In [None]:
import numpy
import pandas
import conntility
import h5py
import scipy

numpy.seterr(divide="ignore")
numpy.seterr(invalid="ignore")

fn = "../../../artefacts/peng_et_al_human_multi_patch.h5"

def load_all(fn, prefixes):
    def load_type(fn, prefix):
        h5 = h5py.File(fn, "r")
        mats = list(h5[prefix].keys())
        ret = [conntility.ConnectivityMatrix.from_h5(fn, _m, prefix)
            for _m in mats]
        idx = pandas.DataFrame({"matrix_number": numpy.arange(len(ret))})
        return pandas.Series(ret, index=pandas.MultiIndex.from_frame(idx))

    mats = pandas.concat([load_type(fn, _p) for _p in prefixes], axis=0,
                         names=["matrix_type"], keys=prefixes)
    return conntility.ConnectivityGroup(mats)

grp = load_all(fn, ["er"]) # "tr"


In [3]:
def avg_amps(mat, node_props, **kwargs):
    return pandas.Series(mat.data)

cfg_amps = {
    "analyses":{
        "amplitudes": {
            "source": avg_amps,
            "output": "Series"
        }
    }
}
amps = grp.analyze(cfg_amps)["amplitudes"].values

In [4]:
import numpy
import conntility
from scipy import sparse
import scipy.stats

props_to_keep = ["piadistance", "coordinate_1", "coordinate_2", "coordinate_3"]
noise = {
    "piadistance": 10.0,
    "coordinate_1": 25.0,
    "coordinate_2": 25.0,
    "coordinate_3": 25.0,
}

def permutate(con, also_matrix):
    idx = numpy.random.permutation(len(con))
    props = con._vertex_properties.iloc[idx]
    props = props.set_index(pandas.RangeIndex(len(con), name="index"), drop=True)
    if not also_matrix:
        return conntility.ConnectivityMatrix(con.matrix, vertex_properties=props)
    M = con.array[:, idx][idx]
    return conntility.ConnectivityMatrix(sparse.coo_matrix(M), vertex_properties=props)


def add_random_offset(mat, props_dict):
    for col, amp in props_dict.items():
        mat._vertex_properties[col] += (amp * numpy.random.rand(len(mat)) - amp/2)
    return mat

def original(mat, node_props, **kwargs):
    props = node_props.set_index("index", drop=True)
    return conntility.ConnectivityMatrix(mat, vertex_properties=props[props_to_keep])

def er_control_with_same_size_fac(p, amps):
    def er_mdl(mat, node_props, **kwargs):
        m = numpy.random.rand(*mat.shape) <= p
        numpy.fill_diagonal(m, False)
        row, col = numpy.nonzero(m)
        m_sparse = sparse.coo_matrix((numpy.random.choice(amps, len(row)), (row, col)),
                                     shape=m.shape)
        props = node_props.set_index("index", drop=True)
        ret = conntility.ConnectivityMatrix(m_sparse, vertex_properties=props[props_to_keep])
        return add_random_offset(permutate(ret, False), noise)
    return er_mdl

def boost_amplitudes(f, a):
    def boost_mdl(mat, node_props, **kwargs):
        m = mat.copy()
        m.data *= ((numpy.random.rand(m.nnz) - 0.5) * 2 * a + f)
        props = node_props.set_index("index", drop=True)
        ret = conntility.ConnectivityMatrix(m, vertex_properties=props[props_to_keep])
        return add_random_offset(permutate(ret, True), noise)
    return boost_mdl

def add_remove_cons(interv_added, amps):
    def added_mdl(mat, node_props, **kwargs):
        to_add = numpy.random.randint(*interv_added)
        mat = mat.tocoo()
        edge_tpl = list(zip(mat.row, mat.col))
        edge_data = list(mat.data)
        if to_add < 0:
            for _ in range(-to_add):
                if len(edge_tpl) > 0:
                    i = numpy.random.randint(len(edge_tpl))
                    edge_tpl.pop(i)
                    edge_data.pop(i)
        elif to_add > 0:
            possible_edges = [  # Can be listed exhaustively because matrices are tiny
                (a, b) for a in range(len(node_props)) for b in range(len(node_props)) if
                a != b and
                (a, b) not in edge_tpl
            ]
            picked = numpy.random.choice(len(possible_edges),
                                         numpy.minimum(to_add, len(possible_edges)),
                                         replace=False)
            edge_tpl = edge_tpl + [possible_edges[_i] for _i in picked]
            edge_data = edge_data + list(numpy.random.choice(amps, len(picked)))
        if len(edge_tpl) == 0:
            m = sparse.coo_matrix(([], ([], [])), shape=mat.shape)
        else:
            row, col = zip(*edge_tpl)
            m = sparse.coo_matrix((edge_data, (row, col)), shape=mat.shape)
        props = node_props.set_index("index", drop=True)
        ret = conntility.ConnectivityMatrix(m, vertex_properties=props[props_to_keep])
        return add_random_offset(permutate(ret, True), noise)
    return added_mdl

def er_with_rec_boost(p, p_boost, amps):
    def er_boost_mdl(mat, node_props, **kwargs):
        n = len(node_props)
        # Can be listed exhaustively because matrices are tiny
        possible_pairs = [(a, b) for b in range(n) for a in range(b + 1, n)]
        n_pick = scipy.stats.binom(len(possible_pairs) * 2, p).rvs()
        n_rec = scipy.stats.binom(int(n_pick / 2), p * p_boost).rvs()
        n_uni = n_pick - (2*n_rec)
        edges = []
        for i in numpy.random.choice(len(possible_pairs), n_rec, replace=False):
            edges.append(possible_pairs[i])
            edges.append(possible_pairs[i][::-1])
        if n_uni > 0:
            possible_edges = [(a, b) for a in range(n) for b in range(n)
                              if a != b and (a, b) not in edges]
            assert len(possible_edges) >= n_uni
            for i in numpy.random.choice(len(possible_edges), n_uni, replace=False):
                edges.append(possible_edges[i])
        
        if len(edges) == 0:
            m = sparse.coo_matrix(([], ([], [])), shape=mat.shape)
        else:
            row, col = zip(*edges)
            edge_data = numpy.random.choice(amps, len(row))
            m = sparse.coo_matrix((edge_data, (row, col)), shape=mat.shape)
        props = node_props.set_index("index", drop=True)
        ret = conntility.ConnectivityMatrix(m, vertex_properties=props[props_to_keep])
        return add_random_offset(permutate(ret, False), noise)
    return er_boost_mdl

def swap_based_on_direction(prop, p):
    def swapped_mdl(mat, node_props, **kwargs):
        mat = mat.tocoo()
        edge_tpl = list(zip(mat.row, mat.col))
        if len(edge_tpl) == 0:
            m = sparse.coo_matrix(([], ([], [])), shape=mat.shape)
        else:
            is_unidir = [_tpl[::-1] not in edge_tpl for _tpl in edge_tpl]
            is_down = [node_props[prop].iloc[_tpl[0]] < 
                    node_props[prop].iloc[_tpl[1]]
                    for _tpl in edge_tpl]
            is_swap = numpy.array(is_unidir) & numpy.array(is_down) & (numpy.random.rand(len(is_unidir)) < p)
            edge_tpl_out = [_tpl[::-1] if _swap else _tpl 
                            for _tpl, _swap in zip(edge_tpl, is_swap)]
            row, col = zip(*edge_tpl_out)
            edge_data = mat.data
            m = sparse.coo_matrix((edge_data, (row, col)), shape=mat.shape)
        props = node_props.set_index("index", drop=True)
        ret = conntility.ConnectivityMatrix(m, vertex_properties=props[props_to_keep])
        return add_random_offset(permutate(ret, True), noise)
    return swapped_mdl

def add_reciprocal(p):
    def rec_mdl(mat, node_props, **kwargs):
        mat = mat.tocoo()
        edge_tpl = list(zip(mat.row, mat.col))
        if len(edge_tpl) == 0:
            m = sparse.coo_matrix(([], ([], [])), shape=mat.shape)
        else:
            is_unidir = numpy.array([_tpl[::-1] not in edge_tpl for _tpl in edge_tpl])
            n_swap = scipy.stats.binom(int(numpy.sum(is_unidir) / 2), p).rvs()
            if n_swap == 0:
                edge_tpl_out = edge_tpl
            else:
                rnd = numpy.random.permutation(numpy.nonzero(is_unidir)[0])
                to_swap = rnd[:n_swap]
                to_keep = numpy.hstack([rnd[n_swap:-n_swap], numpy.nonzero(~is_unidir)[0]])
                edge_tpl_out = [edge_tpl[i] for i in to_swap] +\
                            [edge_tpl[i][::-1] for i in to_swap] +\
                            [edge_tpl[i] for i in to_keep]
            assert len(edge_tpl) == len(edge_tpl_out)
            row, col = zip(*edge_tpl_out)
            edge_data = mat.data
            m = sparse.coo_matrix((edge_data, (row, col)), shape=mat.shape)
        props = node_props.set_index("index", drop=True)
        ret = conntility.ConnectivityMatrix(m, vertex_properties=props[props_to_keep])
        return add_random_offset(permutate(ret, True), noise)
    return rec_mdl
        

In [None]:
numpy.random.permutation(["mystery_{0}".format(i + 1)
                          for i in range(7)])

In [None]:

analyses = [
        {
            "source": original,
            "output": "Object"
        },
        {
            "source": er_control_with_same_size_fac(0.158, amps),
            "output": "Object",
        },
        {
            "source": boost_amplitudes(1.25, 0.15),
            "output": "Object",
        },
        {
            "source": add_remove_cons([-1, 3], amps),
            "output": "Object",
        },
        {
            "source": er_with_rec_boost(0.158, 3, amps),
            "output": "Object"
        },
        {
            "source": swap_based_on_direction("piadistance", 0.28),
            "output": "Object"
        },
        {
            "source": add_reciprocal(0.3),
            "output": "Object"
        }
]
rnd = numpy.random.permutation(["mystery_{0}".format(i + 1) for i in range(7)])
cfg = {
    "analyses": dict([(name, analysis) for name, analysis in zip(rnd, analyses)])
}

M = grp.analyze(cfg)

for k in M.keys():
    midx = M[k].index.to_frame()
    midx["matrix_type"] = k
    M[k].index = pandas.MultiIndex.from_frame(midx)
M = conntility.ConnectivityGroup(pandas.concat(M.values(), axis=0))


In [None]:
cfg_tests = {
    "analyses":{
        "connection_strengths": {
            "source": "./my_tests.py",
            "method": "edge_values",
            "output": "Series",
        },
        "connection_probability":{ 
            "source": "./my_tests.py",
            "method": "connection_probability",
            "output": "Value",
        },
        "reciprocal_probability":{ 
            "source": "./my_tests.py",
            "method": "reciprocal_probability",
            "output": "Value",
        },
        "extra_and_missing_rec":
        { 
            "source": "./my_tests.py",
            "method": "extra_and_missing_reciprocals",
            "args": [0.158],
            "output": "Value",
        },
        "down_minus_up":
        { 
            "source": "./my_tests.py",
            "method": "count_downwards_minus_upwards",
            "args": ["piadistance"],
            "output": "Value",
        }, 
        "p_prox": {
            "source": "./my_tests.py",
            "method": "connection_probability_within",
            "output": "Series",
            "args": [["coordinate_1", "coordinate_2", "coordinate_3"],
                     [0.0, 100.0]
            ]
        },
        "p_dist": {
            "source": "./my_tests.py",
            "method": "connection_probability_within",
            "output": "Series",
            "args": [["coordinate_1", "coordinate_2", "coordinate_3"],
                     [100.0, 5000.0]
            ]
        }
    }
}

res = M.analyze(cfg_tests)

# Amplitudes is 0.64
strength_matches = lambda _x: scipy.stats.ttest_1samp(_x, 0.64).pvalue > 0.05

# Connection prob is 0.158
con_prob_matches = lambda _x: scipy.stats.ttest_1samp(_x, 0.158).pvalue > 0.05

# No reciprocal overexpression
no_rec_overexpr = lambda _x: scipy.stats.ttest_1samp(_x, 0.0).pvalue > 0.05

# Distance-dependence
is_dist_dep = lambda _x: scipy.stats.ttest_rel(_x[0], _x[1]).pvalue <= 0.05

# Directionality
is_directional = lambda _x: scipy.stats.ttest_1samp(_x, 0.0).pvalue <= 0.05


evaluated = pandas.concat([
    res["connection_strengths"].groupby("matrix_type").apply(strength_matches),
    res["connection_probability"].dropna().groupby("matrix_type").apply(con_prob_matches),
    res["extra_and_missing_rec"].dropna().groupby("matrix_type").apply(no_rec_overexpr),
    pandas.concat([res["p_prox"], res["p_dist"]], axis=1).dropna().groupby("matrix_type").apply(is_dist_dep),
    res["down_minus_up"].dropna().groupby("matrix_type").apply(is_directional)
], axis=1)
evaluated

In [8]:
import os

if not os.path.exists("../datasets"):
    os.makedirs("../datasets")
M.to_h5("../datasets/mystery_con_mats.h5")