# APOL1 Classifier Development

This notebook shows how to develop a classifier with embedded tests in Jupyter.

In [1]:
from bioscript.classifier import DiploidResult, GenotypeClassifier, GenotypeEnum
from bioscript.types import Alleles, VariantCall

In [2]:
# Define APOL1 variant calls
rs73885319 = VariantCall(rsid="rs73885319", ref=Alleles.A, alt=Alleles.NOT_A)
rs60910145 = VariantCall(rsid="rs60910145", ref=Alleles.T, alt=Alleles.NOT_T)
rs71785313 = VariantCall(
    rsid=["rs71785313", "rs1317778148", "rs143830837"], ref=Alleles.I, alt=Alleles.D
)

In [3]:
class APOL1Genotypes(GenotypeEnum):
    G2 = "G2"
    G1 = "G1"
    G0 = "G0"

MISSING = "G-"

In [4]:
class APOL1Classifier(GenotypeClassifier):
    def classify(self, matches) -> DiploidResult:
                        
        g2_match = matches.get(rs71785313)
        site1_match = matches.get(rs73885319)
        site2_match = matches.get(rs60910145)

        has_data = any(match is not None for match in (g2_match, site1_match, site2_match))
        if not has_data:
            return DiploidResult(MISSING, MISSING)

        d_count = g2_match.alt_count if g2_match else 0
        site1_variants = site1_match.alt_count if site1_match else 0
        site2_variants = site2_match.alt_count if site2_match else 0

        has_g1 = site1_variants > 0 and site2_variants > 0
        g1_total = site1_variants + site2_variants if has_g1 else 0

        if d_count == 2:
            return DiploidResult(APOL1Genotypes.G2, APOL1Genotypes.G2)
        elif d_count == 1:
            if g1_total >= 2:
                return DiploidResult(APOL1Genotypes.G2, APOL1Genotypes.G1)
            else:
                return DiploidResult(APOL1Genotypes.G2, APOL1Genotypes.G0)
        else:
            if g1_total == 4:
                return DiploidResult(APOL1Genotypes.G1, APOL1Genotypes.G1)
            elif g1_total >= 2:
                return DiploidResult(APOL1Genotypes.G1, APOL1Genotypes.G0)
            else:
                return DiploidResult(APOL1Genotypes.G0, APOL1Genotypes.G0)

In [5]:
__bioscript__ = {
    "variant_calls": [rs73885319, rs60910145, rs71785313],
    "classifier": APOL1Classifier,
    "name": "APOL1",
}

## Tests

Write tests using the test_* function convention:

In [6]:
from bioscript import VariantFixture
from bioscript.types import MatchList

fixture = VariantFixture(
    [
        {"rsid": "rs73885319", "chromosome": "22", "position": 36265860},
        {"rsid": "rs60910145", "chromosome": "22", "position": 36265988},
        {"rsid": "rs71785313", "chromosome": "22", "position": 36266000},
    ],
    assembly="GRCh38",
)

In [7]:
def test_g0_homozygous():
    variants = fixture(["AA", "TT", "II"])
    matches = MatchList([rs73885319, rs60910145, rs71785313]).match_rows(variants)
    classifier = APOL1Classifier()
    result = classifier(matches)
    assert result == "G0/G0"

In [8]:
def test_g1_homozygous():
    variants = fixture(["GG", "CC", "II"])
    matches = MatchList([rs73885319, rs60910145, rs71785313]).match_rows(variants)
    classifier = APOL1Classifier()
    result = classifier(matches)
    assert result == "G1/G1"

## Run Tests in Jupyter

You can run tests directly in the notebook:

In [9]:
# Run tests
test_g0_homozygous()
test_g1_homozygous()
print("✓ All tests passed!")

✓ All tests passed!


In [10]:
from bioscript import export_from_notebook
export_from_notebook("apol1_dev.ipynb", "classify_apol1.py")

PosixPath('classify_apol1.py')

In [11]:
!bioscript test classify_apol1.py


Testing: classify_apol1.py
Running tests with pytest: classify_apol1.py
platform darwin -- Python 3.12.7, pytest-8.4.2, pluggy-1.6.0 -- /Users/madhavajay/dev/bioscript/workspace1/.venv/bin/python3
cachedir: .pytest_cache
rootdir: /Users/madhavajay/dev/bioscript/workspace1/examples/apol1
plugins: anyio-4.11.0
collected 2 items                                                              [0m

classify_apol1.py::test_g0_homozygous [32mPASSED[0m[32m                             [ 50%][0m
classify_apol1.py::test_g1_homozygous [32mPASSED[0m[32m                             [100%][0m

