Skip to content

Commit

Permalink
add additive encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
jrm5100 committed Oct 14, 2020
1 parent 262b9e5 commit 3b7ea99
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
24 changes: 24 additions & 0 deletions pandas_genomics/arrays/genotype_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,3 +644,27 @@ def __ge__(self, other):
a2_gt = self._data['allele2'] > allele2
a2_eq = self._data['allele2'] == allele2
return a1_gt | (a1_eq & a2_gt) | (a1_eq & a2_eq)

######################
# Encoding Functions #
######################

def encode_additive(self) -> pd.arrays.IntegerArray:
"""
Returns
-------
pd.arrays.IntegerArray
0 for Homozygous Reference
1 for Heterozygous
2 for Homozygous
pd.NA for missing
Raises an error if there is more than 1 alternate allele
"""
# TODO: Return multiple arrays for multiple alternate alleles?
if len(self.variant.alleles) > 2:
raise ValueError("Additive encoding can only be used with one allele")

allele_sum = self._data['allele1'] + self._data['allele2']
# Mask those > 2 which would result from a missing allele (255)
result = pd.arrays.IntegerArray(values=allele_sum, mask=(allele_sum > 2))
return result
5 changes: 5 additions & 0 deletions tests/genotype_array/test_genotypearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,8 @@ class TestReshaping(base.BaseReshapingTests):

class TestSetitems(base.BaseSetitemTests):
pass


# Custom Tests
def test_encoding_additive(data):
result = data.encode_additive()

0 comments on commit 3b7ea99

Please sign in to comment.