# APOL1 Classifier Development

This notebook shows how to develop a classifier with embedded tests in Jupyter.

In [None]:
!uv pip install pytest

In [None]:
!uv pip install -e ../../python

In [None]:
from bioscript import AlleleCounter
from bioscript.classifier import DiploidResult, GenotypeClassifier, GenotypeEnum
from bioscript.types import Alleles, VariantCall

In [None]:
import pandas as pd

In [None]:
def filter_snvs(df: pd.DataFrame) -> pd.DataFrame:
    """
    Return only rows where clnvc == 'single_nucleotide_variant' (case-insensitive).
    """
    mask = df["clnvc"].str.lower() == "single_nucleotide_variant"
    return df[mask].reset_index(drop=True)

In [None]:
def generate_variant_calls(df: pd.DataFrame) -> list[str]:
    vcs = []
    for _, row in df.iterrows():
        rsid = str(row["rsid"]).strip()
        ref = str(row["ref"]).strip().upper()
        alt = str(row["alt"]).strip().upper()

        # Build readable variant call
        vc = VariantCall(rsid=rsid, ref=Alleles.from_letter(ref), alt=Alleles.from_not_letter(ref))
        vcs.append(vc)

    return vcs

In [None]:
def get_vcs():
    # Path to your TSV file
    tsv_path = "brca1_clinvar.tsv"
    
    # Load the TSV file
    df = pd.read_csv(
        tsv_path,
        sep="\t",
        dtype={
            "rsid": "string",
            "gene": "string",
            "chromosome": "string",
            "position": "Int64",
            "ref": "string",
            "alt": "string",
            "clnrevstat": "string",
            "clnsig": "string",
            "clnvc": "string",
        }
    )

    # Example usage:
    df_snvs = filter_snvs(df)
    vcs = generate_variant_calls(df_snvs)
    return vcs

In [None]:
class BRCA1Classifier(GenotypeClassifier):
    def classify(self, matches) -> DiploidResult:
        print(type(matches))
        print(len(matches.all_matches))
        print(len(matches.variant_matches))

In [None]:
__bioscript__ = {
    "variant_calls": get_vcs(),
    "classifier": BRCA1Classifier(),
    "name": "BRCA1",
}

## Tests

Write tests using the test_* function convention:

In [None]:
# from bioscript import VariantFixture
# from bioscript.types import MatchList

# fixture = VariantFixture(
#     [
#         {"rsid": "rs73885319", "chromosome": "22", "position": 36265860},
#         {"rsid": "rs60910145", "chromosome": "22", "position": 36265988},
#         {"rsid": "rs71785313", "chromosome": "22", "position": 36266000},
#     ],
#     assembly="GRCh38",
# )

In [None]:
# def test_g0_homozygous():
#     variants = fixture(["AA", "TT", "II"])
#     matches = MatchList([rs73885319, rs60910145, rs71785313]).match_rows(variants)
#     classifier = APOL1Classifier()
#     result = classifier(matches)
#     assert result == "G0/G0"

In [None]:
# def test_g1_homozygous():
#     variants = fixture(["GG", "CC", "II"])
#     matches = MatchList([rs73885319, rs60910145, rs71785313]).match_rows(variants)
#     classifier = APOL1Classifier()
#     result = classifier(matches)
#     assert result == "G1/G1"

## Run Tests in Jupyter

You can run tests directly in the notebook:

In [None]:
# # Run tests
# test_g0_homozygous()
# test_g1_homozygous()
# print("✓ All tests passed!")

## Export to Python Module

Export this notebook to a Python file:

```bash
bioscript export apol1_dev.ipynb -o classify_apol1_exported.py
```

Or in Python:

```python
from bioscript import export_from_notebook
export_from_notebook("apol1_dev.ipynb", "classify_apol1_exported.py")
```

In [None]:
from bioscript import export_from_notebook
export_from_notebook("brca1_dev.ipynb", "brca1_classifier.py")

In [None]:
# !bioscript test brca1_classifier.py

In [None]:
# !bioscript classify --participant_id="X" --file="test.txt" brca1_classifier.py --out=tsv