Skip to content

Commit

Permalink
Merge branch 'master' into add-illumina-run-model
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrOertlin committed May 16, 2024
2 parents b74ea93 + d7a9eb2 commit a775921
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 66 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 60.7.19
current_version = 60.7.21
commit = True
tag = True
tag_name = v{new_version}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests_and_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:

jobs:
tests-coverage:
runs-on: ubuntu-latest
runs-on: Beefy_Linux

steps:
- name: Checkout Repository
Expand Down Expand Up @@ -37,7 +37,7 @@ jobs:

- name: Test with pytest & Coveralls
run: |
pytest -n auto --cov=cg/
pytest -n logical --cov=cg/
coveralls
env:
COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__title__ = "cg"
__version__ = "60.7.19"
__version__ = "60.7.21"
3 changes: 3 additions & 0 deletions cg/constants/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
NO_FIELD: str = "Nej"
PRECISION: int = 2

RIN_MAX_THRESHOLD: int = 10
RIN_MIN_THRESHOLD: int = 1

REPORT_GENDER: dict[str, str] = {
"unknown": "Okänd",
"female": "Kvinna",
Expand Down
76 changes: 42 additions & 34 deletions cg/models/report/metadata.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from pydantic import BaseModel, BeforeValidator
from pydantic import BaseModel, BeforeValidator, field_validator
from typing_extensions import Annotated

from cg.constants import NA_FIELD
from cg.constants import NA_FIELD, RIN_MIN_THRESHOLD, RIN_MAX_THRESHOLD
from cg.models.report.validators import (
get_float_as_percentage,
get_float_as_string,
get_gender_as_string,
get_report_string,
get_number_as_string,
)


Expand All @@ -19,8 +19,8 @@ class SampleMetadataModel(BaseModel):
duplicates: fraction of mapped sequence that is marked as duplicate; source: workflow
"""

million_read_pairs: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
duplicates: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
million_read_pairs: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
duplicates: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD


class MipDNASampleMetadataModel(SampleMetadataModel):
Expand All @@ -36,9 +36,9 @@ class MipDNASampleMetadataModel(SampleMetadataModel):

bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD
gender: Annotated[str, BeforeValidator(get_gender_as_string)] = NA_FIELD
mapped_reads: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
mean_target_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_10x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
mapped_reads: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
mean_target_coverage: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_10x: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD


class BalsamicSampleMetadataModel(SampleMetadataModel):
Expand All @@ -49,8 +49,8 @@ class BalsamicSampleMetadataModel(SampleMetadataModel):
fold_80: fold 80 base penalty; source: workflow
"""

mean_insert_size: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
fold_80: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
mean_insert_size: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
fold_80: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD


class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel):
Expand All @@ -66,10 +66,10 @@ class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel):

bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD
bait_set_version: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD
median_target_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_250x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_500x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
gc_dropout: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
median_target_coverage: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_250x: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_500x: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
gc_dropout: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD


class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel):
Expand All @@ -81,10 +81,10 @@ class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel):
pct_60x: fraction of bases that attained at least 15X sequence coverage; source: workflow
"""

median_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_15x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_60x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_reads_improper_pairs: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
median_coverage: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_15x: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_60x: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_reads_improper_pairs: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD


class SequencingSampleMetadataModel(SampleMetadataModel):
Expand All @@ -96,7 +96,7 @@ class SequencingSampleMetadataModel(SampleMetadataModel):
"""

gc_content: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
mean_length_r1: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
mean_length_r1: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD


class WTSSampleMetadataModel(SequencingSampleMetadataModel):
Expand All @@ -115,16 +115,24 @@ class WTSSampleMetadataModel(SequencingSampleMetadataModel):
uniquely_mapped_reads: percentage of mapped reads; source: workflow
"""

bias_5_3: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
input_amount: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
mrna_bases: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_adapter: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_surviving: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
bias_5_3: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
input_amount: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
mrna_bases: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_adapter: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_surviving: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
q20_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
q30_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
ribosomal_bases: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
rin: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
uniquely_mapped_reads: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
rin: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
uniquely_mapped_reads: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD

@field_validator("rin")
def ensure_rin_thresholds(cls, rin: str) -> str:
if rin != NA_FIELD:
rin_number = float(rin)
if RIN_MIN_THRESHOLD <= rin_number <= RIN_MAX_THRESHOLD:
return str(rin_number)
return NA_FIELD


class RnafusionSampleMetadataModel(WTSSampleMetadataModel):
Expand All @@ -136,8 +144,8 @@ class RnafusionSampleMetadataModel(WTSSampleMetadataModel):
mapped_reads: percentage of reads aligned to the reference sequence; source: workflow
"""

insert_size: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
insert_size_peak: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
insert_size: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
insert_size_peak: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
mapped_reads: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD


Expand All @@ -151,10 +159,10 @@ class TaxprofilerSampleMetadataModel(SequencingSampleMetadataModel):
million_read_pairs_after_filtering: number of reads after filtering; source: workflow
"""

average_read_length: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
mapped_reads: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
mean_length_r2: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
million_read_pairs_after_filtering: Annotated[str, BeforeValidator(get_float_as_string)] = (
average_read_length: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
mapped_reads: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
mean_length_r2: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
million_read_pairs_after_filtering: Annotated[str, BeforeValidator(get_number_as_string)] = (
NA_FIELD
)

Expand All @@ -167,5 +175,5 @@ class TomteSampleMetadataModel(WTSSampleMetadataModel):
pct_intronic_bases: proportion of genomic bases within intronic regions; source: workflow
"""

pct_intergenic_bases: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_intronic_bases: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_intergenic_bases: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
pct_intronic_bases: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
20 changes: 16 additions & 4 deletions cg/models/report/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,26 @@ def get_boolean_as_string(value: bool | None) -> str:
return NA_FIELD


def get_float_as_string(value: float | None) -> str:
"""Return string representation of a float value."""
return str(round(float(value), PRECISION)) if value or isinstance(value, float) else NA_FIELD
def get_number_as_string(value: Any) -> str:
"""
Return string representation of a number. If None is provided, then it returns N/A.
Raises:
ValueError: If the input value cannot be converted to a float.
"""
try:
result: str = str(round(float(value), PRECISION))
return result
except TypeError:
return NA_FIELD
except ValueError:
LOG.error(f"Value {value} cannot be converted to float")
raise


def get_float_as_percentage(value: float | None) -> str:
"""Return string percentage representation of a float value."""
return get_float_as_string(value * 100) if value or isinstance(value, float) else NA_FIELD
return get_number_as_string(value * 100) if value or isinstance(value, float) else NA_FIELD


def get_date_as_string(date: datetime | None) -> str:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "cg"
version = "60.7.19"
version = "60.7.21"
description = "Clinical Genomics command center"
authors = ["Clinical Genomics <support@clinicalgenomics.se>"]
readme = "README.md"
Expand Down
50 changes: 49 additions & 1 deletion tests/meta/report/test_rnafusion_api.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
"""Test module for the Rnafusion delivery report API."""

import pytest
from pytest_mock import MockFixture

from cg.constants import NA_FIELD, RIN_MAX_THRESHOLD, RIN_MIN_THRESHOLD
from cg.meta.report.rnafusion import RnafusionReportAPI
from cg.models.analysis import NextflowAnalysis
from cg.models.report.metadata import RnafusionSampleMetadataModel
from cg.store.models import Case, Sample
from tests.mocks.limsmock import MockLimsAPI


def test_get_sample_metadata(
report_api_rnafusion: RnafusionReportAPI,
sample_id: str,
rnafusion_case_id: str,
rnafusion_validated_metrics: dict[str, str],
rnafusion_mock_analysis_finish,
rnafusion_mock_analysis_finish: None,
):
"""Test Rnafusion sample metadata extraction."""

Expand All @@ -33,3 +38,46 @@ def test_get_sample_metadata(

# THEN the sample metadata should be correctly retrieved and match the expected validated metrics
assert sample_metadata.model_dump() == rnafusion_validated_metrics


@pytest.mark.parametrize(
"input_rin, expected_rin",
[
(RIN_MAX_THRESHOLD, str(float(RIN_MAX_THRESHOLD))), # Test for a valid integer input
(RIN_MAX_THRESHOLD + 1, NA_FIELD), # Test for an integer above the allowed threshold
(RIN_MIN_THRESHOLD - 1, NA_FIELD), # Test for an integer below the allowed threshold
(None, NA_FIELD), # Test for a None input
],
)
def test_ensure_rin_thresholds(
rnafusion_case_id: str,
sample_id: str,
input_rin: int | float,
expected_rin: str,
report_api_rnafusion: RnafusionReportAPI,
rnafusion_mock_analysis_finish: None,
mocker: MockFixture,
):
"""Test Rnafusion RIN value validation."""

# GIVEN a Rnafusion case and associated sample
case: Case = report_api_rnafusion.status_db.get_case_by_internal_id(
internal_id=rnafusion_case_id
)
sample: Sample = report_api_rnafusion.status_db.get_sample_by_internal_id(internal_id=sample_id)

# GIVEN an analysis metadata object
latest_metadata: NextflowAnalysis = report_api_rnafusion.analysis_api.get_latest_metadata(
case_id=rnafusion_case_id
)

# GIVEN a specific RIN value
mocker.patch.object(MockLimsAPI, "get_sample_rin", return_value=input_rin)

# WHEN getting the sample metadata
sample_metadata: RnafusionSampleMetadataModel = report_api_rnafusion.get_sample_metadata(
case=case, sample=sample, analysis_metadata=latest_metadata
)

# THEN the sample RIN value should match the expected RIN value
assert sample_metadata.rin == expected_rin
50 changes: 28 additions & 22 deletions tests/models/report/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
get_boolean_as_string,
get_date_as_string,
get_delivered_files_as_file_names,
get_float_as_percentage,
get_float_as_string,
get_gender_as_string,
get_list_as_string,
get_path_as_string,
get_prep_category_as_string,
get_report_string,
get_number_as_string,
get_float_as_percentage,
)


Expand Down Expand Up @@ -63,30 +63,36 @@ def test_get_boolean_as_string():
assert validated_not_bool_field == NA_FIELD


def test_get_float_as_string():
"""Test the validation of a float value."""

# GIVEN a valid float input
float_value: float = 12.3456789

# WHEN performing the validation
validated_float_value: str = get_float_as_string(float_value)

# THEN check if the input value was formatted correctly
assert validated_float_value == "12.35"
@pytest.mark.parametrize(
"input_value, expected_output",
[
(12.3456789, "12.35"), # Test for a valid float input
(0.0, "0.0"), # Test for float zero input
(5, "5.0"), # Test for a valid integer input
(0, "0.0"), # Test for integer zero input
(None, NA_FIELD), # Test for None input
("1.2", "1.2"), # Test for valid string input
("invalid", ValueError), # Test for an invalid string input
],
)
def test_get_number_as_string(input_value: Any, expected_output: str, caplog: LogCaptureFixture):
"""Test the validation and formatting of numbers."""

# GIVEN a list of number inputs and their expected values

def test_get_float_as_string_zero_input():
"""Tests the validation of a float value when input is zero."""
if expected_output == ValueError:
# WHEN getting a string representation of a number
with pytest.raises(ValueError):
get_number_as_string(input_value)

# GIVEN a valid float input
float_value: float = 0.0
# THEN a ValueError should have been raised for an invalid number input
assert f"Value {input_value} cannot be converted to float" in caplog.text
else:
# WHEN getting a string representation of a number
validated_float_value = get_number_as_string(input_value)

# WHEN performing the validation
validated_float_value: str = get_float_as_string(float_value)

# THEN check if the input value was formatted correctly
assert validated_float_value == "0.0"
# THEN the expected output should be correctly formatted
assert validated_float_value == expected_output


def test_get_float_as_percentage():
Expand Down

0 comments on commit a775921

Please sign in to comment.