# ASMC Python API

This notebook demonstrates how to decode pairs using ASMC from Python.

In [None]:
# Import all from ASMC
from asmc.asmc import *

import pathlib
import numpy as np

data_dir = pathlib.Path('.').resolve().parent / 'ASMC_data'

In [None]:
# Create the ASMC object, that requires (minimally) the input files root and the decoding quantities file

input_files_root = str(data_dir / 'examples' / 'asmc' / 'exampleFile.n300.array')
dq_file = str(data_dir / 'decoding_quantities' / '30-100-2000_CEU.decodingQuantities.gz')

asmc = ASMC(input_files_root, dq_file)

## Specify what outputs to calculate
The outputs default to false to avoid unnecessary computation

In [None]:
# Per pair posterior mean, MAP and full posteriors, as well as the sum of posteriors can be stored in matrices
asmc.set_store_per_pair_posterior_mean(True)
asmc.set_store_per_pair_map(True)
asmc.set_store_per_pair_posterior(True)
asmc.set_store_sum_of_posterior(True)

# Per pair posterior mean and MAP can be written to file. This is typically slow.
asmc.set_write_per_pair_posterior_mean(False)
asmc.set_write_per_pair_map(False)

## You can specify lists of pairs to decode by their haploid index

Haplotypes are encoded in the form `<id>_1` and `<id>_2`.

In [None]:
a = [1, 2, 3]
b = [4, 5, 6]

a_str = [f"1_{x}_1" for x in range(1,149)]
b_str = [f"1_{x}_2" for x in range(1,149)]

asmc.decode_pairs(a, b)
asmc.decode_pairs(a_str, b_str)
# asmc.decode_pairs()  # <-- decode all pairs in the dataset by omitting a list of pairs

## Get return values either by copy or reference

If you plan to call `decode_pairs` multiple times you should get a copy of the results: they will be overwritten on subsequent calls to `decode_pairs`.

However, if you are only decoding a single set of pairs, or if you are performing calculations batch-by-batch and do not require the results to be stored, it is safe to omit the copy and get a reference to the underlying data structure.

In [None]:
return_vals = asmc.get_copy_of_results()
# return_vals_ref = asmc.get_ref_of_results()  # <-- safe, if only calling decode_pairs a single time

## Get various information from the return structure

In [None]:
return_vals.per_pair_indices

In [None]:
# The `per_pair_posteriors` option gives the largest amount of information: a list of 2D numpy arrays
# The list has length numPairs, and each 2D array has size (numStates x numSites)

return_vals.per_pair_posteriors

In [None]:
# The sum of posteriors is a single 2D numpy array of size (numStates x numSites)

return_vals.sum_of_posteriors

In [None]:
# Turning on the per_pair_posteriors flag gives you the the following:

# A 2D numpy array with posterior means, of size (numPairs x numSites)
print(return_vals.per_pair_posterior_means)

# Two 1D numpy arrays with the column-wise min and argmin of this array:
print(return_vals.min_posterior_means)
print(return_vals.argmin_posterior_means)

In [None]:
# Turning on the per_pair_MAPs flag gives you the the following:

# A 2D numpy array with posterior MAPs, of size (numPairs x numSites)
print(return_vals.per_pair_MAPs)

# Two 1D numpy arrays with the column-wise min and argmin of this array:
print(return_vals.min_MAPs)
print(return_vals.argmin_MAPs)