Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
1 change: 1 addition & 0 deletions tasks/cyvcf2_count_alterations/data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Add large files here that should not be committed to the repository
343 changes: 343 additions & 0 deletions tasks/cyvcf2_count_alterations/data/SRR2058984_zc.vcf

Large diffs are not rendered by default.

224 changes: 224 additions & 0 deletions tasks/cyvcf2_count_alterations/data/SRR2058985_zc.vcf

Large diffs are not rendered by default.

255 changes: 255 additions & 0 deletions tasks/cyvcf2_count_alterations/data/SRR2058987_zc.vcf

Large diffs are not rendered by default.

227 changes: 227 additions & 0 deletions tasks/cyvcf2_count_alterations/data/SRR2058988_zc.vcf

Large diffs are not rendered by default.

246 changes: 246 additions & 0 deletions tasks/cyvcf2_count_alterations/data/SRR2058989_zc.vcf

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions tasks/cyvcf2_count_alterations/implementation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
def cyvcf2_count_alterations(
input_vcf: str = "/mount/input/SRR2058984_zc.vcf",
reference_nucleotide: str = "A",
alternate_nucleotide: str = "C",
) -> dict:
"""
Use the cyvcf2 to parse through VCF file containing detected sequence variants to identify the number of single
nucleotide polymorphisms (SNPs) from a specific reference nucleotide to a specific alternate nucleotide.

Args:
input_vcf: Path to the input VCF file
reference_nucleotide: The reference nucleotide to compare against ("A", "C", "G", or "T")
alternate_nucleotide: The alternate nucleotide to compare against ("A", "C", "G", or "T")

Returns:
dict with the following structure:
{
'num_snps': int # The number of SNPs that are altered from reference `reference_nucleotide` to
`alternate_nucleotide`.
}
"""
from cyvcf2 import VCF

# Initialize counters
num_snps = 0

# Iterate over each variant
for variant in VCF(input_vcf):
if (
variant.is_snp
and variant.REF == reference_nucleotide
and variant.ALT
and variant.ALT[0] == alternate_nucleotide
):
num_snps += 1

return {"num_snps": num_snps}
9 changes: 9 additions & 0 deletions tasks/cyvcf2_count_alterations/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#! /bin/bash
set -e

git clone https://github.com/brentp/cyvcf2 /workspace/cyvcf2
cd /workspace/cyvcf2 && git checkout main && git checkout 541ab16

# Insert commands here to install dependencies and setup the environment...
pip install cyvcf2
pip install numpy
81 changes: 81 additions & 0 deletions tasks/cyvcf2_count_alterations/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: cyvcf2_count_alterations
repo:
name: cyvcf2
url: "https://github.com/brentp/cyvcf2"
commit: 541ab16
branch: main
env: []
papers: [pedersen2017cyvcf2]
category: genomics_proteomics
requires: cpu
description: Use cyvcf2 to parse through VCF file containing detected sequence variants to identify the number of single nucleotide polymorphisms (SNPs) from a specific reference nucleotide to a specific alternate nucleotide.
arguments:
- name: input_vcf
description: Path to the input VCF file
type: str
- name: reference_nucleotide
description: The reference nucleotide to compare against ("A", "C", "G", or "T")
type: str
- name: alternate_nucleotide
description: The alternate nucleotide to compare against ("A", "C", "G", or "T")
type: str
returns:
- name: num_snps
description: The number of SNPs that are altered from reference `reference_nucleotide` to `alternate_nucleotide`.
type: int
example:
arguments:
- name: input_vcf
value: /mount/input/SRR2058984_zc.vcf
- name: reference_nucleotide
value: "A"
- name: alternate_nucleotide
value: "C"
mount:
- source: SRR2058984_zc.vcf
target: SRR2058984_zc.vcf
test_invocations:
- name: SRR2058985
arguments:
- name: input_vcf
value: /mount/input/SRR2058985_zc.vcf
- name: reference_nucleotide
value: "A"
- name: alternate_nucleotide
value: "T"
mount:
- source: SRR2058985_zc.vcf
target: SRR2058985_zc.vcf
- name: SRR2058987
arguments:
- name: input_vcf
value: /mount/input/SRR2058987_zc.vcf
- name: reference_nucleotide
value: "T"
- name: alternate_nucleotide
value: "C"
mount:
- source: SRR2058987_zc.vcf
target: SRR2058987_zc.vcf
- name: SRR2058988
arguments:
- name: input_vcf
value: /mount/input/SRR2058988_zc.vcf
- name: reference_nucleotide
value: "T"
- name: alternate_nucleotide
value: "A"
mount:
- source: SRR2058988_zc.vcf
target: SRR2058988_zc.vcf
- name: SRR2058989
arguments:
- name: input_vcf
value: /mount/input/SRR2058989_zc.vcf
- name: reference_nucleotide
value: "T"
- name: alternate_nucleotide
value: "G"
mount:
- source: SRR2058989_zc.vcf
target: SRR2058989_zc.vcf
24 changes: 24 additions & 0 deletions tasks/cyvcf2_count_alterations/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest
from pytest_lazy_fixtures import lf
from tasks.utils import initialize, parametrize_invocation
from toolarena.run import ToolRunResult

initialize()


@parametrize_invocation("SRR2058985", "SRR2058987", "SRR2058988", "SRR2058989")
def test_status(invocation: ToolRunResult):
assert invocation.status == "success"


@pytest.mark.parametrize(
"invocation,expected_num_snps",
[
(lf("SRR2058985"), 0),
(lf("SRR2058987"), 13),
(lf("SRR2058988"), 1),
(lf("SRR2058989"), 4),
],
)
def test_num_snps(invocation: ToolRunResult, expected_num_snps: int):
assert invocation.result["num_snps"] == expected_num_snps
12 changes: 12 additions & 0 deletions tasks/papers.bib
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
@article{pedersen2017cyvcf2,
author = {Pedersen, Brent S and Quinlan, Aaron R},
title = {cyvcf2: fast, flexible variant analysis with Python},
year = {2017},
month = {02},
journal = {Bioinformatics},
volume = {33},
number = {12},
pages = {1867-1869},
issn = {1367-4803},
}

@article{isensee2020nnunet,
author = {Isensee, Fabian and Jaeger, Paul F. and Kohl, Simon A. A. and Petersen, Jens and Maier-Hein, Klaus H.},
title = {nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation},
Expand Down
1 change: 1 addition & 0 deletions tasks/papers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ yuksekgonul2024textgrad: "https://arxiv.org/abs/2406.07496"
neidlinger2025eagle: "https://arxiv.org/abs/2502.13027"
wasserthal2023totalsegmentator: "https://pubs.rsna.org/doi/10.1148/ryai.230024"
zigutyte2024mopadi: "https://www.biorxiv.org/content/10.1101/2024.10.29.620913v2"
pedersen2017cyvcf2: "https://academic.oup.com/bioinformatics/article/33/12/1867/2971439"