# Basis for iterating ASMC API

This notebook demonstrates the current state of the new ASMC API under development.

In [1]:
import pathlib
import numpy as np

# Import all from ASMC
from asmc import *

# ASMC submodule for preparing decoding quantities
from asmc.preparedecoding import *

In [2]:
# Create the ASMC object, that requires (minimally) the input files root and the decoding quantities file

files_dir = pathlib.Path('.').resolve().parent / 'FILES'

input_files_root = files_dir / 'EXAMPLE' / 'exampleFile.n300.array'
decoding_quantities_file = files_dir / 'DECODING_QUANTITIES' / '30-100-2000.decodingQuantities.gz'

asmc = ASMC(str(input_files_root), str(decoding_quantities_file))

In [3]:
# Decode specific pairs, and determine what output ought to be calculated and storred

return_vals = asmc.decodePairs(individuals_a=[1, 2, 3], individuals_b=[4, 5, 6], per_pair_posteriors=True,
                               sum_of_posteriors=True, per_pair_posterior_means=True, per_pair_MAPs=True)

In [4]:
# You always get per_pair_indices()
# Each row contains [iInd, iHap, jInd, jHap]

return_vals.per_pair_indices()

array([[1, 1, 4, 1],
       [1, 1, 4, 2],
       [1, 2, 4, 1],
       [1, 2, 4, 2],
       [2, 1, 5, 1],
       [2, 1, 5, 2],
       [2, 2, 5, 1],
       [2, 2, 5, 2],
       [3, 1, 6, 1],
       [3, 1, 6, 2],
       [3, 2, 6, 1],
       [3, 2, 6, 2]], dtype=int32)

In [5]:
# The `per_pair_posteriors` option gives the largest amount of information: a list of 2D numpy arrays
# The list has length numPairs, and each 2D array has size (numStates x numSites)

return_vals.per_pair_posteriors()

[array([[5.0328155e-05, 4.8653153e-05, 1.8588711e-05, ..., 1.3817567e-04,
         1.3910205e-04, 1.3988082e-04],
        [5.8559928e-04, 5.6635996e-04, 2.1812640e-04, ..., 1.5791617e-03,
         1.5893405e-03, 1.5978598e-03],
        [2.2119428e-03, 2.1402165e-03, 8.3090545e-04, ..., 5.8486331e-03,
         5.8849230e-03, 5.9151589e-03],
        ...,
        [5.2487299e+02, 5.2656293e+02, 4.3085327e+02, ..., 4.6913040e+01,
         6.4050049e+01, 9.2578720e+01],
        [5.4038147e+02, 5.4220166e+02, 4.5117395e+02, ..., 4.1582565e+01,
         6.0002148e+01, 9.2350250e+01],
        [5.7183435e+02, 5.7293945e+02, 4.9948923e+02, ..., 3.3636688e+01,
         5.4310802e+01, 9.3977165e+01]], dtype=float32),
 array([[5.0033963e-05, 4.8349310e-05, 1.8266384e-05, ..., 1.3191681e-04,
         1.3285187e-04, 1.3363738e-04],
        [5.8220583e-04, 5.6285120e-04, 2.1434795e-04, ..., 1.5099356e-03,
         1.5202459e-03, 1.5288701e-03],
        [2.1992354e-03, 2.1270621e-03, 8.1652612e-04, ...,

In [6]:
# The sum of posteriors is a single 2D numpy array of size (numStates x numSites)

return_vals.sum_of_posteriors()

array([[8.5068529e-04, 8.3441625e-04, 5.4106547e-04, ..., 9.6203364e-04,
        9.7131415e-04, 9.7933377e-04],
       [9.7976290e-03, 9.6191363e-03, 6.3549569e-03, ..., 1.1016103e-02,
        1.1120295e-02, 1.1210141e-02],
       [3.6641557e-02, 3.6006615e-02, 2.4221251e-02, ..., 4.0879734e-02,
        4.1259639e-02, 4.1586574e-02],
       ...,
       [6.0983535e+03, 5.9642104e+03, 6.6230146e+03, ..., 7.2316230e+03,
        7.2303198e+03, 7.2272881e+03],
       [6.3727632e+03, 6.1780967e+03, 7.6444102e+03, ..., 8.8509473e+03,
        8.7864287e+03, 8.6914922e+03],
       [7.0040562e+03, 6.5945859e+03, 1.0479755e+04, ..., 1.3321224e+04,
        1.3020476e+04, 1.2586901e+04]], dtype=float32)

In [7]:
# Turning on the per_pair_posteriors flag gives you the the following:

# A 2D numpy array with posterior means, of size (numPairs x numSites)
print(return_vals.per_pair_posterior_means())

# Two 1D numpy arrays with the column-wise min and argmin of this array:
print(return_vals.min_posterior_means())
print(return_vals.argmin_posterior_means())

[[11582.295  11621.537  11591.007  ...  4564.138   4751.7817  5019.9873]
 [11691.353  11729.577  11485.069  ...  4602.1494  4790.702   5060.468 ]
 [ 7473.013   7338.504   3611.9812 ...  5874.7397  6050.4727  6340.3086]
 ...
 [ 9156.706   9069.157   5968.9883 ... 35135.785  34483.758  33441.316 ]
 [11642.028  11685.2295 11668.593  ...  4463.173   4648.9307  4914.073 ]
 [12991.703  13152.506  22723.69   ...  6173.692   6353.275   6652.0234]]
[7473.013  7338.504  3272.1086 ... 4352.291  4535.672  4796.5107]
[2 2 4 ... 4 4 4]


In [8]:
# Turning on the per_pair_MAPs flag gives you the the following:

# A 2D numpy array with posterior MAPs, of size (numPairs x numSites)
print(return_vals.per_pair_MAPs())

# Two 1D numpy arrays with the column-wise min and argmin of this array:
print(return_vals.min_MAPs())
print(return_vals.argmin_MAPs())

[[29 29 41 ... 29 29 29]
 [29 29 41 ... 29 29 29]
 [29 29 29 ... 29 29 29]
 ...
 [29 29 29 ... 68 68 68]
 [29 29 41 ... 29 29 29]
 [29 29 68 ... 29 29 29]]
[24 24 24 ... 29 29 29]
[4 4 4 ... 0 0 0]
