Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Migrate cache models to pydantic v2 #1274

Merged
merged 7 commits into from Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion BALSAMIC/commands/init/base.py
Expand Up @@ -128,7 +128,10 @@ def initialize(
references=references,
references_date=datetime.now().strftime("%Y-%m-%d %H:%M"),
)
write_json(json.loads(cache_config.json(exclude_none=True)), config_path.as_posix())
write_json(
json_obj=json.loads(cache_config.model_dump_json(exclude_none=True)),
path=config_path.as_posix(),
)
LOG.info(f"Reference workflow configured successfully ({config_path.as_posix()})")

snakefile: Path = (
Expand Down
25 changes: 16 additions & 9 deletions BALSAMIC/models/cache.py
@@ -1,9 +1,16 @@
"""Balsamic reference cache models."""
import logging
from pathlib import Path
from typing import Dict, Optional, List, Any, Union
from typing import Dict, Optional, List, Union

from pydantic.v1 import BaseModel, AnyUrl, DirectoryPath, validator, FilePath
from pydantic import (
BaseModel,
AnyUrl,
DirectoryPath,
FilePath,
field_validator,
ValidationInfo,
)

from BALSAMIC.constants.cache import GenomeVersion, GRCHVersion
from BALSAMIC.constants.constants import FileType, BwaIndexFileType
Expand Down Expand Up @@ -31,8 +38,8 @@ class ReferenceUrl(BaseModel):
gzip: bool
file_name: str
dir_name: str
file_path: Optional[str]
secret: Optional[str]
file_path: Optional[str] = None
secret: Optional[str] = None


class References(BaseModel):
Expand Down Expand Up @@ -277,15 +284,15 @@ class CacheConfig(BaseModel):
variants_dir: Path
vep_dir: Path
genome_version: GenomeVersion
cosmic_key: Optional[str]
cosmic_key: Optional[str] = None
bioinfo_tools: dict
containers: Dict[str, str]
references: Union[ReferencesHg, ReferencesCanFam]
references_date: str

@validator("references")
@field_validator("references")
def validate_references(
cls, references: References, values: Dict[str, Any]
cls, references: References, info: ValidationInfo
) -> References:
"""Validate the reference output paths."""
for model in references:
Expand All @@ -294,15 +301,15 @@ def validate_references(
reference_key, reference = model[0], model[1]
reference.file_path = (
Path(
values.get("references_dir"),
info.data.get("references_dir"),
reference.dir_name,
reference.file_name,
).as_posix()
if reference
else None
)
reference.secret = (
values.get("cosmic_key") if "cosmic" in reference_key else None
info.data.get("cosmic_key") if "cosmic" in reference_key else None
)
return references

Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/workflows/reference.smk
Expand Up @@ -17,7 +17,7 @@ from BALSAMIC.utils.utils import get_relative_paths_dict
LOG = logging.getLogger(__name__)

# Balsamic cache configuration model
cache_config: CacheConfig = CacheConfig.parse_obj(config)
cache_config: CacheConfig = CacheConfig.model_validate(config)

# Temporary directory and shell options
os.environ["TMPDIR"] = cache_config.references_dir.as_posix()
Expand Down Expand Up @@ -46,7 +46,7 @@ rule all:
run:
analysis_references: Dict[str, str] = get_relative_paths_dict(
base_path=cache_config.references_dir,
data=cache_config.get_analysis_references().dict(),
data=cache_config.get_analysis_references().model_dump(),
)
write_json(
json_obj=analysis_references,
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Expand Up @@ -49,6 +49,7 @@ Changed:
* Update `reference.json` file to use relative paths https://github.com/Clinical-Genomics/BALSAMIC/pull/1251
* Update pydantic to v2 while maintaining support for v1 models https://github.com/Clinical-Genomics/BALSAMIC/pull/1253
* `PCT_PF_READS_IMPROPER_PAIRS` QC threshold lowered to 5% https://github.com/Clinical-Genomics/BALSAMIC/issues/1265
* Migrate cache models to pydantic v2 https://github.com/Clinical-Genomics/BALSAMIC/pull/1277

Fixed:
^^^^^^
Expand Down
8 changes: 4 additions & 4 deletions tests/conftest.py
@@ -1,5 +1,4 @@
import copy
import json
import os
import shutil
from datetime import datetime
Expand All @@ -11,6 +10,7 @@
import pytest
from _pytest.tmpdir import TempPathFactory
from click.testing import CliRunner
from pydantic_core import Url

from BALSAMIC import __version__ as balsamic_version
from BALSAMIC.commands.base import cli
Expand Down Expand Up @@ -2043,9 +2043,9 @@ def fixture_analysis_references_hg(


@pytest.fixture(scope="session", name="reference_url")
def fixture_reference_url() -> str:
def fixture_reference_url() -> Url:
"""Return dummy reference url."""
return "gs://gatk-legacy-bundles/b37/reference.vcf.gz"
return Url("gs://gatk-legacy-bundles/b37/reference.vcf.gz")


@pytest.fixture(scope="session", name="reference_file")
Expand All @@ -2058,7 +2058,7 @@ def fixture_reference_file(session_tmp_path: Path) -> Path:

@pytest.fixture(scope="session", name="reference_url_data")
def fixture_reference_url_data(
reference_url: str, reference_file: Path, cosmic_key: str
reference_url: Url, reference_file: Path, cosmic_key: str
) -> Dict[str, Any]:
"""return reference url model data."""
return {
Expand Down
31 changes: 17 additions & 14 deletions tests/models/test_cache_models.py
Expand Up @@ -4,7 +4,7 @@

import pytest
from _pytest.logging import LogCaptureFixture
from pydantic.v1 import ValidationError
from pydantic import ValidationError

from BALSAMIC.constants.cache import (
GRCHVersion,
Expand Down Expand Up @@ -35,7 +35,7 @@ def test_analysis_references(analysis_references_data: Dict[str, Path]):
model: AnalysisReferences = AnalysisReferences(**analysis_references_data)

# THEN the model should have been correctly built
assert model.dict() == analysis_references_data
assert model.model_dump() == analysis_references_data


def test_analysis_references_empty():
Expand All @@ -60,7 +60,7 @@ def test_analysis_references_canfam(analysis_references_data: Dict[str, Path]):
)

# THEN the model should have been correctly built
assert model.dict() == analysis_references_data
assert model.model_dump() == analysis_references_data


def test_analysis_references_canfam_empty():
Expand All @@ -83,7 +83,7 @@ def test_analysis_references_hg(analysis_references_hg_data: Dict[str, Path]):
model: AnalysisReferencesHg = AnalysisReferencesHg(**analysis_references_hg_data)

# THEN the model should have been correctly built
assert model.dict() == analysis_references_hg_data
assert model.model_dump() == analysis_references_hg_data


def test_analysis_references_hg_empty():
Expand All @@ -106,7 +106,7 @@ def test_reference_url(reference_url_data: Dict[str, Any]):
model: ReferenceUrl = ReferenceUrl(**reference_url_data)

# THEN the model should have been correctly built
assert model.dict() == reference_url_data
assert model.model_dump() == reference_url_data


def test_reference_url_empty():
Expand All @@ -120,7 +120,7 @@ def test_reference_url_empty():
ReferenceUrl()


def test_references(references_data: Dict[str, dict]):
def test_references(references_data: Dict[str, dict], references: References):
"""Test references model."""

# GIVEN an input for the reference model
Expand All @@ -129,7 +129,7 @@ def test_references(references_data: Dict[str, dict]):
model: References = References(**references_data)

# THEN the model should have been correctly built
assert model.dict() == references_data
assert model == references


def test_references_empty():
Expand Down Expand Up @@ -219,7 +219,7 @@ def test_get_refgene_bed_file_path(references: References, refgene_bed_file: Pat
assert refgene_output_file == refgene_bed_file.as_posix()


def test_references_canfam(references_data: Dict[str, dict]):
def test_references_canfam(references_data: Dict[str, dict], references: References):
"""Test canine references model."""

# GIVEN an input for the canine reference model
Expand All @@ -228,7 +228,7 @@ def test_references_canfam(references_data: Dict[str, dict]):
model: ReferencesCanFam = ReferencesCanFam(**references_data)

# THEN the model should have been correctly built
assert model.dict() == references_data
assert model.model_dump() == references.model_dump()


def test_references_canfam_empty():
Expand All @@ -242,7 +242,9 @@ def test_references_canfam_empty():
ReferencesCanFam()


def test_references_hg(references_hg_data: Dict[str, dict]):
def test_references_hg(
references_hg_data: Dict[str, dict], references_hg: ReferencesHg
):
"""Test human genome references model."""

# GIVEN an input for the human genome reference model
Expand All @@ -251,7 +253,7 @@ def test_references_hg(references_hg_data: Dict[str, dict]):
model: ReferencesHg = ReferencesHg(**references_hg_data)

# THEN the model should have been correctly built
assert model.dict() == references_hg_data
assert model == references_hg


def test_references_hg_empty():
Expand Down Expand Up @@ -353,7 +355,7 @@ def test_cache_analysis(cache_analysis_data: Dict[str, str]):
model: CacheAnalysis = CacheAnalysis(**cache_analysis_data)

# THEN the model should have been correctly built
assert model.dict() == cache_analysis_data
assert model.model_dump() == cache_analysis_data


def test_cache_analysis_empty():
Expand Down Expand Up @@ -591,6 +593,7 @@ def test_get_reference_output_paths(cache_config: CacheConfig):
def test_get_analysis_references_hg(
cache_config: CacheConfig,
analysis_references_hg_data: Dict[str, Path],
analysis_references_hg: AnalysisReferences,
):
"""Test analysis references retrieval to be used for Balsamic human genome analyses."""

Expand All @@ -602,7 +605,7 @@ def test_get_analysis_references_hg(

# THEN the retrieved analysis references should match the mocked one
assert type(analysis_references) is AnalysisReferencesHg
assert analysis_references.dict() == analysis_references_hg_data
assert analysis_references == analysis_references_hg


def test_get_analysis_references_canfam(
Expand All @@ -620,4 +623,4 @@ def test_get_analysis_references_canfam(

# THEN the retrieved analysis references should match the mocked one
assert type(analysis_references) is AnalysisReferencesCanFam
assert analysis_references.dict() == analysis_references_data
assert analysis_references.model_dump() == analysis_references_data