In [1]:
import pathlib
import numpy as np
np.set_printoptions(threshold=10)

from asmc_data_module import HapsMatrixType

## Create a HapsMatrixType object from haps/samples/map files

In [2]:
data_dir = pathlib.Path("..").resolve() / 'test' / 'data' / 'haps_plus_samples'

haps_file = data_dir / 'real_example.haps.gz'
sample_file = data_dir / 'real_example.sample.gz'
map_file = data_dir / 'real_example.map.gz'

haps_mat = HapsMatrixType.createFromHapsPlusSamples(str(haps_file), str(sample_file), str(map_file))

## Query this object

You can ask to:

- getNumIndividuals()
- getNumSites()
- getPhysicalPositions(): this returns a python `list[int]`, copied from a C++ `std::vector<unsigned long>`
- getGeneticPositions(): this returns a python `list[float]`, copied from a C++ `std::vector<double>`
- getData(): this returns a numpy matrix by reference from an eigen matrix of type `uint8_t`
- getSite(ind): this returns a numpy array by reference from an eigen row vector of type `uint8_t`
- getHap(ind): this returns a numpy array by reference from an eigen column vector of type `uint8_t`
- getIndividual(ind): this returns a numpy matrix by reference from an eigen matrix of two adjacent vectors, of type `uint8_t`


In [3]:
haps_mat.getNumIndividuals()

50

In [4]:
haps_mat.getNumSites()

102

In [5]:
len(haps_mat.getPhysicalPositions())

102

In [6]:
len(haps_mat.getGeneticPositions())

102

In [7]:
haps_mat.getData()

array([[0, 0, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 1, 1, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 1, 1, ..., 1, 0, 0]], dtype=uint8)

In [8]:
print(haps_mat.getSite(3).dtype)
print(haps_mat.getSite(3).shape)

uint8
(100,)


In [9]:
haps_mat.getHap(5)

array([0, 1, 1, ..., 1, 0, 1], dtype=uint8)

## Counts and frequencies

You can get minor & derived allele counts and frequencies for specific sites, and for all sites.

For instance:

In [10]:
haps_mat.getMinorAlleleCount(5)

3

In [11]:
haps_mat.getMinorAlleleCounts()

array([ 1, 10,  2, ..., 26, 27, 41], dtype=uint64)

In [12]:
haps_mat.getDerivedAlleleFrequency(7)

0.06

In [13]:
haps_mat.getMinorAlleleFrequencies()

array([0.01, 0.1 , 0.02, ..., 0.26, 0.27, 0.41])