-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a random genotype array simulator, and reorganize simulation into…
… a 'sim' module
- Loading branch information
Showing
7 changed files
with
76 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .BAMS import BAMS, SNPEffectEncodings, PenetranceTables | ||
from .random_gt import generate_random_gt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from typing import List | ||
|
||
import numpy as np | ||
|
||
from pandas_genomics.arrays import GenotypeArray, GenotypeDtype | ||
from pandas_genomics.scalars import Variant | ||
|
||
|
||
def generate_random_gt( | ||
variant: Variant, allele_freq: List[float], n: int = 1000, random_seed: int = 1855 | ||
) -> GenotypeArray: | ||
""" | ||
Simulate random genotypes according to the provided allele frequencies | ||
Parameters | ||
---------- | ||
variant: Variant | ||
allele_freq: List[float] | ||
Allele frequencies for each allele in the variant | ||
n: int, default 1000 | ||
How many genotypes to simulate | ||
random_seed: int, default 1855 | ||
Returns | ||
------- | ||
GenotypeArray | ||
""" | ||
# Validate frequencies | ||
if len(allele_freq) != len(variant.alleles): | ||
raise ValueError( | ||
f"The number of provided frequencies ({len(allele_freq)}) doesn't match" | ||
f" the number of alleles in the variant ({len(variant.alleles)})." | ||
) | ||
if sum(allele_freq) != 1.0: | ||
raise ValueError( | ||
f"The provided frequencies must add up to 1.0 (sum was {sum(allele_freq):.3f})" | ||
) | ||
|
||
# Choose gts | ||
np.random.seed(random_seed) | ||
genotypes = np.random.choice( | ||
range(len(variant.alleles)), p=allele_freq, size=(n, variant.ploidy) | ||
) | ||
|
||
# Create GenotypeArray representation of the data | ||
dtype = GenotypeDtype(variant) | ||
scores = np.empty(n) | ||
scores[:] = np.nan | ||
data = np.array(list(zip(genotypes, scores)), dtype=dtype._record_type) | ||
gt_array = GenotypeArray(values=data, dtype=dtype) | ||
|
||
return gt_array |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from pandas_genomics.scalars import Variant | ||
from pandas_genomics import sim | ||
|
||
|
||
def test(): | ||
var = Variant(chromosome="1", position=123456, ref="T", alt=["A"]) | ||
gta = sim.generate_random_gt(var, allele_freq=[0.7, 0.3]) | ||
var2 = Variant(chromosome="1", position=223456, ref="T", alt=["A", "C"]) | ||
gta_2 = sim.generate_random_gt(var2, allele_freq=[0.7, 0.25, 0.05]) |