Skip to content

Commit

Permalink
Merge pull request #24 from jrm5100/master
Browse files Browse the repository at this point in the history
Version 0.6.1
  • Loading branch information
jrm5100 committed Jun 25, 2021
2 parents d63e63d + 6d4ab5a commit d8c1416
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 12 deletions.
5 changes: 5 additions & 0 deletions docs/release-history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
Release History
===============

v0.6.1 (2021-06-26)
-------------------

Specify only alternate allele frequencies when generating random genotypes, to avoid float rounding problems

v0.6.0 (2021-06-25)
-------------------

Expand Down
31 changes: 22 additions & 9 deletions pandas_genomics/sim/random_gt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import List, Union

import numpy as np

Expand All @@ -7,16 +7,19 @@


def generate_random_gt(
variant: Variant, allele_freq: List[float], n: int = 1000, random_seed: int = 1855
variant: Variant,
alt_allele_freq: Union[List[float], float],
n: int = 1000,
random_seed: int = 1855,
) -> GenotypeArray:
"""
Simulate random genotypes according to the provided allele frequencies
Parameters
----------
variant: Variant
allele_freq: List[float]
Allele frequencies for each allele in the variant
alt_allele_freq: float or List[float]
Allele frequencies for each alternate allele in the variant (Bialleleic variants may specify a single float value)
n: int, default 1000
How many genotypes to simulate
random_seed: int, default 1855
Expand All @@ -27,16 +30,26 @@ def generate_random_gt(
"""
# Validate frequencies
if len(allele_freq) != len(variant.alleles):
if isinstance(alt_allele_freq, float):
# Convert it into a list
alt_allele_freq = [
alt_allele_freq,
]
if len(alt_allele_freq) != len(variant.alleles) - 1:
raise ValueError(
f"The number of provided frequencies ({len(allele_freq)}) doesn't match"
f" the number of alleles in the variant ({len(variant.alleles)})."
f"The number of provided frequencies ({len(alt_allele_freq)}) doesn't match"
f" the number of alternate alleles in the variant ({len(variant.alleles)-1})."
)
if sum(allele_freq) != 1.0:
if sum(alt_allele_freq) > 1.0:
raise ValueError(
f"The provided frequencies must add up to 1.0 (sum was {sum(allele_freq):.3f})"
f"The provided frequencies must not sum to > 1.0 (sum was {sum(alt_allele_freq):.3e})"
)

# Set remaining odds to the reference allele
allele_freq = [
1 - sum(alt_allele_freq),
] + alt_allele_freq

# Choose gts
np.random.seed(random_seed)
genotypes = np.random.choice(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandas-genomics"
version = "0.6.0"
version = "v0.6.1"
description = "Pandas ExtensionDtypes and ExtensionArray for working with genomics data"
license = "BSD-3-Clause"
authors = ["John McGuigan <jrm5100@psu.edu>"]
Expand Down
4 changes: 2 additions & 2 deletions tests/simulation/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

def test():
var = Variant(chromosome="1", position=123456, ref="T", alt=["A"])
gta = sim.generate_random_gt(var, allele_freq=[0.7, 0.3])
gta = sim.generate_random_gt(var, alt_allele_freq=0.3)
var2 = Variant(chromosome="1", position=223456, ref="T", alt=["A", "C"])
gta_2 = sim.generate_random_gt(var2, allele_freq=[0.7, 0.25, 0.05])
gta_2 = sim.generate_random_gt(var2, alt_allele_freq=[0.25, 0.05])

0 comments on commit d8c1416

Please sign in to comment.