From c8fb3ed560945b3c81c71a35bda63f586e576dfa Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 2 Oct 2023 16:09:26 +0200 Subject: [PATCH 1/4] Migrate cache model to pydantic v2 --- BALSAMIC/commands/init/base.py | 5 ++++- BALSAMIC/models/cache.py | 19 ++++++++++--------- BALSAMIC/workflows/reference.smk | 4 ++-- tests/conftest.py | 8 ++++---- tests/models/test_cache_models.py | 31 +++++++++++++++++-------------- 5 files changed, 37 insertions(+), 30 deletions(-) diff --git a/BALSAMIC/commands/init/base.py b/BALSAMIC/commands/init/base.py index e7dd83d5b..af1286ec9 100644 --- a/BALSAMIC/commands/init/base.py +++ b/BALSAMIC/commands/init/base.py @@ -128,7 +128,10 @@ def initialize( references=references, references_date=datetime.now().strftime("%Y-%m-%d %H:%M"), ) - write_json(json.loads(cache_config.json(exclude_none=True)), config_path.as_posix()) + write_json( + json_obj=json.loads(cache_config.model_dump_json(exclude_none=True)), + path=config_path.as_posix(), + ) LOG.info(f"Reference workflow configured successfully ({config_path.as_posix()})") snakefile: Path = ( diff --git a/BALSAMIC/models/cache.py b/BALSAMIC/models/cache.py index 26f324547..549b011d2 100644 --- a/BALSAMIC/models/cache.py +++ b/BALSAMIC/models/cache.py @@ -1,9 +1,10 @@ """Balsamic reference cache models.""" import logging from pathlib import Path -from typing import Dict, Optional, List, Any, Union +from typing import Dict, Optional, List, Union -from pydantic.v1 import BaseModel, AnyUrl, DirectoryPath, validator, FilePath +from pydantic import BaseModel, AnyUrl, DirectoryPath, FilePath, field_validator +from pydantic_core.core_schema import FieldValidationInfo from BALSAMIC.constants.cache import GenomeVersion, GRCHVersion from BALSAMIC.constants.constants import FileType, BwaIndexFileType @@ -31,8 +32,8 @@ class ReferenceUrl(BaseModel): gzip: bool file_name: str dir_name: str - file_path: Optional[str] - secret: Optional[str] + file_path: Optional[str] = None + secret: Optional[str] = None class References(BaseModel): @@ -277,15 +278,15 @@ class CacheConfig(BaseModel): variants_dir: Path vep_dir: Path genome_version: GenomeVersion - cosmic_key: Optional[str] + cosmic_key: Optional[str] = None bioinfo_tools: dict containers: Dict[str, str] references: Union[ReferencesHg, ReferencesCanFam] references_date: str - @validator("references") + @field_validator("references") def validate_references( - cls, references: References, values: Dict[str, Any] + cls, references: References, info: FieldValidationInfo ) -> References: """Validate the reference output paths.""" for model in references: @@ -294,7 +295,7 @@ def validate_references( reference_key, reference = model[0], model[1] reference.file_path = ( Path( - values.get("references_dir"), + info.data.get("references_dir"), reference.dir_name, reference.file_name, ).as_posix() @@ -302,7 +303,7 @@ def validate_references( else None ) reference.secret = ( - values.get("cosmic_key") if "cosmic" in reference_key else None + info.data.get("cosmic_key") if "cosmic" in reference_key else None ) return references diff --git a/BALSAMIC/workflows/reference.smk b/BALSAMIC/workflows/reference.smk index 0ab5b5d96..eb137b169 100644 --- a/BALSAMIC/workflows/reference.smk +++ b/BALSAMIC/workflows/reference.smk @@ -17,7 +17,7 @@ from BALSAMIC.utils.utils import get_relative_paths_dict LOG = logging.getLogger(__name__) # Balsamic cache configuration model -cache_config: CacheConfig = CacheConfig.parse_obj(config) +cache_config: CacheConfig = CacheConfig.model_validate(config) # Temporary directory and shell options os.environ["TMPDIR"] = cache_config.references_dir.as_posix() @@ -46,7 +46,7 @@ rule all: run: analysis_references: Dict[str, str] = get_relative_paths_dict( base_path=cache_config.references_dir, - data=cache_config.get_analysis_references().dict(), + data=cache_config.get_analysis_references().model_dump(), ) write_json( json_obj=analysis_references, diff --git a/tests/conftest.py b/tests/conftest.py index 657f8d3fe..83e44f196 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,4 @@ import copy -import json import os import shutil from datetime import datetime @@ -11,6 +10,7 @@ import pytest from _pytest.tmpdir import TempPathFactory from click.testing import CliRunner +from pydantic_core import Url from BALSAMIC import __version__ as balsamic_version from BALSAMIC.commands.base import cli @@ -2029,9 +2029,9 @@ def fixture_analysis_references_hg( @pytest.fixture(scope="session", name="reference_url") -def fixture_reference_url() -> str: +def fixture_reference_url() -> Url: """Return dummy reference url.""" - return "gs://gatk-legacy-bundles/b37/reference.vcf.gz" + return Url("gs://gatk-legacy-bundles/b37/reference.vcf.gz") @pytest.fixture(scope="session", name="reference_file") @@ -2044,7 +2044,7 @@ def fixture_reference_file(session_tmp_path: Path) -> Path: @pytest.fixture(scope="session", name="reference_url_data") def fixture_reference_url_data( - reference_url: str, reference_file: Path, cosmic_key: str + reference_url: Url, reference_file: Path, cosmic_key: str ) -> Dict[str, Any]: """return reference url model data.""" return { diff --git a/tests/models/test_cache_models.py b/tests/models/test_cache_models.py index 28873af55..9b1cde717 100644 --- a/tests/models/test_cache_models.py +++ b/tests/models/test_cache_models.py @@ -4,7 +4,7 @@ import pytest from _pytest.logging import LogCaptureFixture -from pydantic.v1 import ValidationError +from pydantic import ValidationError from BALSAMIC.constants.cache import ( GRCHVersion, @@ -35,7 +35,7 @@ def test_analysis_references(analysis_references_data: Dict[str, Path]): model: AnalysisReferences = AnalysisReferences(**analysis_references_data) # THEN the model should have been correctly built - assert model.dict() == analysis_references_data + assert model.model_dump() == analysis_references_data def test_analysis_references_empty(): @@ -60,7 +60,7 @@ def test_analysis_references_canfam(analysis_references_data: Dict[str, Path]): ) # THEN the model should have been correctly built - assert model.dict() == analysis_references_data + assert model.model_dump() == analysis_references_data def test_analysis_references_canfam_empty(): @@ -83,7 +83,7 @@ def test_analysis_references_hg(analysis_references_hg_data: Dict[str, Path]): model: AnalysisReferencesHg = AnalysisReferencesHg(**analysis_references_hg_data) # THEN the model should have been correctly built - assert model.dict() == analysis_references_hg_data + assert model.model_dump() == analysis_references_hg_data def test_analysis_references_hg_empty(): @@ -106,7 +106,7 @@ def test_reference_url(reference_url_data: Dict[str, Any]): model: ReferenceUrl = ReferenceUrl(**reference_url_data) # THEN the model should have been correctly built - assert model.dict() == reference_url_data + assert model.model_dump() == reference_url_data def test_reference_url_empty(): @@ -120,7 +120,7 @@ def test_reference_url_empty(): ReferenceUrl() -def test_references(references_data: Dict[str, dict]): +def test_references(references_data: Dict[str, dict], references: References): """Test references model.""" # GIVEN an input for the reference model @@ -129,7 +129,7 @@ def test_references(references_data: Dict[str, dict]): model: References = References(**references_data) # THEN the model should have been correctly built - assert model.dict() == references_data + assert model == references def test_references_empty(): @@ -219,7 +219,7 @@ def test_get_refgene_bed_file_path(references: References, refgene_bed_file: Pat assert refgene_output_file == refgene_bed_file.as_posix() -def test_references_canfam(references_data: Dict[str, dict]): +def test_references_canfam(references_data: Dict[str, dict], references: References): """Test canine references model.""" # GIVEN an input for the canine reference model @@ -228,7 +228,7 @@ def test_references_canfam(references_data: Dict[str, dict]): model: ReferencesCanFam = ReferencesCanFam(**references_data) # THEN the model should have been correctly built - assert model.dict() == references_data + assert model.model_dump() == references.model_dump() def test_references_canfam_empty(): @@ -242,7 +242,9 @@ def test_references_canfam_empty(): ReferencesCanFam() -def test_references_hg(references_hg_data: Dict[str, dict]): +def test_references_hg( + references_hg_data: Dict[str, dict], references_hg: ReferencesHg +): """Test human genome references model.""" # GIVEN an input for the human genome reference model @@ -251,7 +253,7 @@ def test_references_hg(references_hg_data: Dict[str, dict]): model: ReferencesHg = ReferencesHg(**references_hg_data) # THEN the model should have been correctly built - assert model.dict() == references_hg_data + assert model == references_hg def test_references_hg_empty(): @@ -353,7 +355,7 @@ def test_cache_analysis(cache_analysis_data: Dict[str, str]): model: CacheAnalysis = CacheAnalysis(**cache_analysis_data) # THEN the model should have been correctly built - assert model.dict() == cache_analysis_data + assert model.model_dump() == cache_analysis_data def test_cache_analysis_empty(): @@ -591,6 +593,7 @@ def test_get_reference_output_paths(cache_config: CacheConfig): def test_get_analysis_references_hg( cache_config: CacheConfig, analysis_references_hg_data: Dict[str, Path], + analysis_references_hg: AnalysisReferences, ): """Test analysis references retrieval to be used for Balsamic human genome analyses.""" @@ -602,7 +605,7 @@ def test_get_analysis_references_hg( # THEN the retrieved analysis references should match the mocked one assert type(analysis_references) is AnalysisReferencesHg - assert analysis_references.dict() == analysis_references_hg_data + assert analysis_references == analysis_references_hg def test_get_analysis_references_canfam( @@ -620,4 +623,4 @@ def test_get_analysis_references_canfam( # THEN the retrieved analysis references should match the mocked one assert type(analysis_references) is AnalysisReferencesCanFam - assert analysis_references.dict() == analysis_references_data + assert analysis_references.model_dump() == analysis_references_data From 479fd2336704f52c0dc2fec029bb6514a288b106 Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 2 Oct 2023 16:16:03 +0200 Subject: [PATCH 2/4] CHANGELOG --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d26fd7712..ddf507a30 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -48,6 +48,7 @@ Changed: * Update `reference.json` file to use relative paths https://github.com/Clinical-Genomics/BALSAMIC/pull/1251 * Update pydantic to v2 while maintaining support for v1 models https://github.com/Clinical-Genomics/BALSAMIC/pull/1253 * `PCT_PF_READS_IMPROPER_PAIRS` QC threshold lowered to 5% https://github.com/Clinical-Genomics/BALSAMIC/issues/1265 +* Migrate cache models to pydantic v2 https://github.com/Clinical-Genomics/BALSAMIC/pull/1277 Fixed: ^^^^^^ From 5531aa4411dd7e4f97fb1aad7706695e149dd2de Mon Sep 17 00:00:00 2001 From: Vadym Date: Wed, 4 Oct 2023 15:18:31 +0200 Subject: [PATCH 3/4] Remove deprecated method --- BALSAMIC/models/cache.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/BALSAMIC/models/cache.py b/BALSAMIC/models/cache.py index 549b011d2..5379c5904 100644 --- a/BALSAMIC/models/cache.py +++ b/BALSAMIC/models/cache.py @@ -3,8 +3,14 @@ from pathlib import Path from typing import Dict, Optional, List, Union -from pydantic import BaseModel, AnyUrl, DirectoryPath, FilePath, field_validator -from pydantic_core.core_schema import FieldValidationInfo +from pydantic import ( + BaseModel, + AnyUrl, + DirectoryPath, + FilePath, + field_validator, + ValidationInfo, +) from BALSAMIC.constants.cache import GenomeVersion, GRCHVersion from BALSAMIC.constants.constants import FileType, BwaIndexFileType @@ -286,7 +292,7 @@ class CacheConfig(BaseModel): @field_validator("references") def validate_references( - cls, references: References, info: FieldValidationInfo + cls, references: References, info: ValidationInfo ) -> References: """Validate the reference output paths.""" for model in references: From b1399ddf4b0890d9d5b5a442e4893152ac5bf10f Mon Sep 17 00:00:00 2001 From: Vadym Date: Wed, 4 Oct 2023 16:33:37 +0200 Subject: [PATCH 4/4] Update CHANGELOG.rst --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f37ce7498..7d26405bf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -51,7 +51,7 @@ Changed: * `PCT_PF_READS_IMPROPER_PAIRS` QC threshold lowered to 5% https://github.com/Clinical-Genomics/BALSAMIC/issues/1265 * Migrate Metrics models to pydantic v2 https://github.com/Clinical-Genomics/BALSAMIC/pull/1270 * Migrate Snakemake models to pydantic v2 https://github.com/Clinical-Genomics/BALSAMIC/pull/1268 -* Migrate Cache models to pydantic v2 https://github.com/Clinical-Genomics/BALSAMIC/pull/1277 +* Migrate Cache models to pydantic v2 https://github.com/Clinical-Genomics/BALSAMIC/pull/1274 Fixed: ^^^^^^