In [1]:
from bioscript.classifier import GenotypeClassifier
from bioscript.types import Alleles, MatchList, Nucleotide, VariantCall

In [2]:
# def rs12913832 = 38/37 ['15:28120472-28120472', '15:28365618-28365618']
# https://www.ncbi.nlm.nih.gov/snp/?term=rs12913832
# https://www.ncbi.nlm.nih.gov/snp/rs12913832
rs12913832 = VariantCall(rsid=["rs12913832", "rs60078917"], ref=Alleles.A, alt=Alleles.NOT_A)
# (A;A) yields brown eye color ~80% of the time.
# (A;G) also tends toward brown.
# (G;G) gives blue eye color ~99% of the time.

In [3]:
class HERC2Classifier(GenotypeClassifier):
    def classify(self, matches):
        match = matches.get(rs12913832)
        # Get raw_line from the match if it exists
        raw_line = match.raw_line if match else None
        
        if not match or match.has_missing:
            return {"result": "No call", "genotype_sorted": None, "raw_line": raw_line}
        
        eye_color_map = {
            "AA": "Brown",
            "AG": "Brown",
            "GG": "Blue",
        }
        return {
            "result": eye_color_map.get(match.genotype_sorted, "Unknown"),
            "genotype_sorted": match.genotype_sorted,
            "raw_line": raw_line,
        }

In [4]:
__bioscript__ = {
    "variant_calls": [rs12913832],
    "classifier": HERC2Classifier(),
    "name": "HERC2",
}

In [5]:
from bioscript import VariantFixture

# Use the regular VariantFixture which now includes raw_line functionality
fixture = VariantFixture(
    [
        {"rsid": "rs12913832", "chromosome": "15", "position": 28120472}
    ],
    assembly="GRCh38",
)

In [6]:
def classify_fixture(genotype):
    variants = fixture([genotype])
    matches = MatchList([rs12913832]).match_rows(variants)
    classifier = HERC2Classifier()
    return classifier(matches)

def test_brown_homozygous():
    result = classify_fixture("AA")
    assert result["result"] == "Brown"
    assert result["genotype_sorted"] == "AA"
    assert result["raw_line"] == "rs12913832\t15\t28120472\tAA"

def test_brown_heterozygous_unsorted():
    result = classify_fixture("GA")
    assert result["result"] == "Brown"
    assert result["genotype_sorted"] == "AG"
    assert result["raw_line"] == "rs12913832\t15\t28120472\tGA"

def test_blue_homozygous():
    result = classify_fixture("GG")
    assert result["result"] == "Blue"
    assert result["genotype_sorted"] == "GG"
    assert result["raw_line"] == "rs12913832\t15\t28120472\tGG"

def test_no_call():
    result = classify_fixture("..")
    assert result["result"] == "No call"
    assert result["genotype_sorted"] is None
    assert result["raw_line"] == "rs12913832\t15\t28120472\t.."

In [7]:
# Run tests
test_brown_homozygous()
test_brown_heterozygous_unsorted()
test_blue_homozygous()
test_no_call()
print("✓ All tests passed!")


✓ All tests passed!


In [8]:
from bioscript import export_from_notebook
export_from_notebook("herc2_dev.ipynb", "classify_herc2_exported.py")

!bioscript test classify_herc2_exported.py


Testing: classify_herc2_exported.py
Running tests with pytest: classify_herc2_exported.py
platform darwin -- Python 3.12.7, pytest-8.4.2, pluggy-1.6.0 -- /Users/madhavajay/dev/bioscript/workspace1/.venv/bin/python3
cachedir: .pytest_cache
rootdir: /Users/madhavajay/dev/bioscript/workspace1/examples/herc2
plugins: anyio-4.11.0
collected 4 items                                                              [0m

classify_herc2_exported.py::test_brown_homozygous [32mPASSED[0m[32m                 [ 25%][0m
classify_herc2_exported.py::test_brown_heterozygous_unsorted [32mPASSED[0m[32m      [ 50%][0m
classify_herc2_exported.py::test_blue_homozygous [32mPASSED[0m[32m                  [ 75%][0m
classify_herc2_exported.py::test_no_call [32mPASSED[0m[32m                          [100%][0m



In [9]:
# Rewritten to write the raw line to a file and use bioscript classify in the next cell
from pathlib import Path

def test_brown_heterozygous_file_based():
    # Write the raw_line to a test file
    test_file = Path("herc2_test_ga.tsv")
    test_file.write_text(
        "# rsid\tchromosome\tposition\tgenotype\n" +
        "rs12913832\t15\t28120472\tGA"
    )
    
    print(f"Test file created: {test_file}")
    print(f"Content:\n{test_file.read_text()}")
    return test_file

# Run the test
test_file = test_brown_heterozygous_file_based()
print("✓ Test passed and file created!")

Test file created: herc2_test_ga.tsv
Content:
# rsid	chromosome	position	genotype
rs12913832	15	28120472	GA
✓ Test passed and file created!


In [10]:
!bioscript classify classify_herc2_exported.py --file herc2_test_ga.tsv --out tsv --participant_id="X" > herc2_result.tsv
!cat herc2_result.tsv

participant_id	HERC2_result	HERC2_genotype_sorted	HERC2_raw_line
X	Brown	AG	"rs12913832	15	28120472	GA"


In [11]:
# Test JSON output to show the new consistent naming convention
!bioscript classify classify_herc2_exported.py --file herc2_test_ga.tsv --out json --participant_id="test_user"

{
  "participant_id": "test_user",
  "HERC2_result": "Brown",
  "HERC2_genotype_sorted": "AG",
  "HERC2_raw_line": "rs12913832\t15\t28120472\tGA"
}
