Skip to content

Commit

Permalink
Merge 8b0d34c into ce89f90
Browse files Browse the repository at this point in the history
  • Loading branch information
northwestwitch committed Dec 29, 2020
2 parents ce89f90 + 8b0d34c commit e44c835
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 10 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
@@ -1,7 +1,9 @@
## [] -
### Added
- Code for performing coordinate liftover using Ensembl REST API
- Variant liftover when comparing genotype features
### Changed
- Using coloredlogs for app logs
### Fixed
- removed unused docker folder

Expand Down
15 changes: 12 additions & 3 deletions patientMatcher/match/genotype_matcher.py
@@ -1,8 +1,11 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging

from patientMatcher.parse.patient import gtfeatures_to_genes_symbols, gtfeatures_to_variants
from patientMatcher.parse.patient import (
gtfeatures_to_genes_symbols,
gtfeatures_to_variants,
lift_variant,
)

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -42,6 +45,7 @@ def match(database, gt_features, max_score):
if symbols:
query_fields.append({"genomicFeatures.gene._geneName": {"$in": symbols}})

# Obtain variants and the corresponding variants in the other genome build from the genotype features
variants = gtfeatures_to_variants(gt_features)
if variants:
query_fields.append({"genomicFeatures.variant": {"$in": variants}})
Expand Down Expand Up @@ -100,7 +104,10 @@ def evaluate_GT_similarity(query_features, db_patient_features, max_feature_simi
matched_features.append(0) # score for matching of every feature is initially 0
q_gene_id = feature["gene"]["id"] # query feature's gene id
q_gene_symbol = feature["gene"].get("_geneName") # query feature's gene symbol

# Do liftover for query variant in order to maximize perfect matching chances
q_variant = feature.get("variant") # query feature's variant. Not mandatory.
lifted_q_variant = lift_variant(q_variant) if q_variant else []

# loop over the database patient's features:
for matching_feature in db_patient_features:
Expand All @@ -112,7 +119,9 @@ def evaluate_GT_similarity(query_features, db_patient_features, max_feature_simi
"variant"
) # matching feature's variant. Not mandatory.

if q_variant == m_variant: # variants are matching -> Assign max score
# if variants are matching or lifted query variant matches with matched patients variant
# ->assign max matching score
if q_variant == m_variant or m_variant in lifted_q_variant:
matched_features[n_feature] = max_feature_similarity

elif q_gene_id == m_gene_id: # matching genes
Expand Down
5 changes: 0 additions & 5 deletions patientMatcher/match/phenotype_matcher.py
Expand Up @@ -129,11 +129,6 @@ def evaluate_pheno_similariy(
omim_score = evaluate_subcategories(disorders, matching_omim_terms, max_omim_score)

patient_similarity = hpo_score + omim_score
LOG.info(
"patient phenotype score: {0} (OMIM:{1}, HPO:{2})".format(
patient_similarity, omim_score, hpo_score
)
)
return patient_similarity


Expand Down
49 changes: 49 additions & 0 deletions patientMatcher/parse/patient.py
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-

import json
from copy import deepcopy
from jsonschema import validate, RefResolver, FormatChecker
from patientMatcher.utils.gene import symbol_to_ensembl, entrez_to_symbol, ensembl_to_symbol
from patientMatcher.utils.variant import liftover
from pkgutil import get_data
import logging

Expand Down Expand Up @@ -175,6 +177,43 @@ def gtfeatures_to_genes_symbols(gtfeatures):
return gene_set, symbol_set


def lift_variant(variant):
"""Perform a variant liftover using Ensebl REST API and return eventual variant in the other genome build
Args:
variant(dict): example:
{'assembly': 'GRCh38', 'referenceName': '12', 'start': 14641142, 'end': 14641142, 'referenceBases': 'C', 'alternateBases': 'T'}
Returns:
lifted_variants(list of dict): example:
[{'assembly': 'GRCh37', 'referenceName': '12', 'start': 14794076, 'end': 14794076, 'referenceBases': 'C', 'alternateBases': 'T'}]
"""
lifted_vars = []
mappings = liftover(
variant.get("assembly"),
variant.get("referenceName"),
variant.get("start") + 1, # coordinates are 0-based in MatchMaker
variant.get("end") + 1,
)

if mappings is None:
return lifted_vars

for res in mappings:
# Create a variant which is the copy of the original variant
lifted = deepcopy(variant)
mapped = res["mapped"]
# Modify coordinates of this variant according to mapping results
lifted["assembly"] = mapped["assembly"]
lifted["referenceName"] = mapped["seq_region_name"]
lifted["start"] = mapped["start"] - 1 # conver back to 0-based coordinates
lifted["end"] = mapped["end"] - 1

lifted_vars.append(lifted)

return lifted_vars


def gtfeatures_to_variants(gtfeatures):
"""Extracts all variants from a list of genomic features
Expand All @@ -187,7 +226,17 @@ def gtfeatures_to_variants(gtfeatures):
variants = []
for feature in gtfeatures:
if "variant" in feature:
variant = feature["variant"]
if variant is None:
continue
# Add variant to search terms
variants.append(feature["variant"])
# Add also corresponding variant in another genome build (GRCh38 if original variant was GRCh37, and the other way around)
lifted_variants = lift_variant(feature["variant"])
if not lifted_variants:
continue # Variant could not be lifted to the other build
for lifted in lifted_variants:
variants.append(lifted)

return variants

Expand Down
5 changes: 4 additions & 1 deletion patientMatcher/server/__init__.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
import coloredlogs
import os
from pymongo import MongoClient
import logging
from flask import Flask
from flask_mail import Mail
from . import views

logging.basicConfig(level=logging.INFO)
LOG = logging.getLogger(__name__)


Expand All @@ -28,6 +28,9 @@ def create_app():
app = Flask(__name__, instance_path=instance_path, instance_relative_config=True)
app.config.from_pyfile("config.py")

current_log_level = LOG.getEffectiveLevel()
coloredlogs.install(level="DEBUG" if app.debug else current_log_level)

client = MongoClient(app.config["DB_URI"])
app.client = client
app.db = client[app.config["DB_NAME"]]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
@@ -1,4 +1,5 @@
## server
coloredlogs
Flask
Flask-Mail
flask-negotiate
Expand Down
27 changes: 27 additions & 0 deletions tests/conftest.py
Expand Up @@ -188,6 +188,33 @@ def match_objs():
return matches


@pytest.fixture()
def patient_37():
"""A patient with a variant in genome assembly GRCh38"""

patient = {
"patient": {
"id": "patient_id",
"contact": {"name": "Contact Name", "href": "mailto:contact_name@mail.com"},
"features": [{"id": "HP:0009623"}],
"genomicFeatures": [
{
"gene": {"id": "GUCY2C"},
"variant": {
"assembly": "GRCh37",
"referenceName": "12",
"start": 14794075,
"end": 14794076,
"referenceBases": "C",
"alternateBases": "T",
},
}
],
}
}
return patient


@pytest.fixture(scope="function")
def gpx4_patients(json_patients):
"""Return all patients with variants in GPX4 gene"""
Expand Down
26 changes: 25 additions & 1 deletion tests/parse/test_parse_patient.py
@@ -1,6 +1,11 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from patientMatcher.parse.patient import features_to_hpo, disorders_to_omim, mme_patient
from patientMatcher.parse.patient import (
features_to_hpo,
disorders_to_omim,
mme_patient,
gtfeatures_to_variants,
)


def test_features_to_hpo_no_features():
Expand Down Expand Up @@ -36,3 +41,22 @@ def test_mme_patient_entrez_gene(entrez_gene_patient, database):
# After conversion formatted patient's gene id should be an Ensembl id
assert mme_formatted_patient["genomicFeatures"][0]["gene"]["id"].startswith("ENSG")
assert mme_formatted_patient["genomicFeatures"][0]["gene"]["_geneName"] # it's "KARS"


def test_gtfeatures_to_variants(patient_37):
"""Test the function that parses variants dictionaries from patient's genomic features"""

# GIVEN a patient containing 1 genomic feature (and one variant)
gt_features = patient_37["patient"]["genomicFeatures"]
assert len(gt_features) == 1

# WHEN gtfeatures_to_variants is used to extract variants from gt_features
variants = gtfeatures_to_variants(gt_features)

# THEN it should return 2 variants
assert len(variants) == 2

# One with genome build GRCh37
assert variants[0]["assembly"] == "GRCh37"
# And one with genome build GRCh38
assert variants[1]["assembly"] == "GRCh38"

0 comments on commit e44c835

Please sign in to comment.