Skip to content

Commit

Permalink
add test for elasticsearch regulome
Browse files Browse the repository at this point in the history
  • Loading branch information
mingjiecn committed Feb 1, 2022
1 parent 2454f1f commit ac1a1e0
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 0 deletions.
60 changes: 60 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,3 +451,63 @@ def value_strand_col_chip_seq():
'strand_col': 5,
'value_col': 6
}


from contextlib import contextmanager


@contextmanager
def start_elasticsearch(host='localhost', port=9203):
import io
import os
import shutil
import subprocess
import tempfile
data_directory = tempfile.mkdtemp()
command = [
'elasticsearch',
f'-Enetwork.host={host}',
f'-Ehttp.port={port}',
f'-Epath.data={os.path.join(data_directory, "data")}',
f'-Epath.logs={os.path.join(data_directory, "logs")}',
f'-Epath.conf=./genomic_data_service/rnaseq/tests/elasticsearch/conf',
]
process = subprocess.Popen(
command,
close_fds=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
for line in io.TextIOWrapper(
process.stdout,
encoding="utf-8"
):
print(line)
if 'started' in line:
print('ES up and running')
break
try:
print('yielding ES')
yield process
finally:
print('cleaning up ES')
process.terminate()
process.wait()
shutil.rmtree(data_directory)


@pytest.fixture(scope='session')
def regulome_elasticsearch_client(host='127.0.0.1', port=9203):
from genomic_data_service.region_indexer_elastic_search import RegionIndexerElasticSearch
from genomic_data_service.region_indexer import SUPPORTED_CHROMOSOMES, SUPPORTED_ASSEMBLIES
with start_elasticsearch(host=host, port=port) as process:
yield RegionIndexerElasticSearch(host, port, SUPPORTED_CHROMOSOMES, SUPPORTED_ASSEMBLIES)


@pytest.fixture
def regulome_elasticsearch_client_index(regulome_elasticsearch_client):
regulome_elasticsearch_client_index = regulome_elasticsearch_client
regulome_elasticsearch_client_index.setup_indices()
yield regulome_elasticsearch_client_index


69 changes: 69 additions & 0 deletions tests/functional/test_elasticsearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pytest

from genomic_data_service.region_indexer import dataset_accession

def test_indices(regulome_elasticsearch_client_index):

indices = list(regulome_elasticsearch_client_index.es.indices.get_alias().keys())
indices.sort()
assert indices == ['chr1', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19',\
'chr2', 'chr20', 'chr21', 'chr22', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chrx', 'chry', \
'resident_regionsets', 'snp_grch38', 'snp_hg19',]


def test_index_regions(regulome_elasticsearch_client_index):
from genomic_data_service.region_indexer_task import index_regions_from_file
from genomic_data_service.region_indexer import encode_graph

query = ['accession=ENCFF760LBY']

file_properties = encode_graph(query)[0]
file_uuid = file_properties['uuid']
dataset_accession = file_properties['dataset'].split('/')[2]
dataset_accession = 'accession=' + dataset_accession
dataset_query = [dataset_accession]
dataset = encode_graph(dataset_query)[0]
#indexed_file = file_in_es(uuid, regulome_elasticsearch_client.es)
index_regions_from_file(regulome_elasticsearch_client_index.es, file_uuid, file_properties, dataset, snp=False)

regulome_elasticsearch_client_index.es.indices.refresh()

result = regulome_elasticsearch_client_index.es.search(index="chr10", body={"query":{"match_all":{}}})
assert result['hits']['total'] == 18014
assert 'coordinates' in result['hits']['hits'][0]["_source"]
assert 'strand' in result['hits']['hits'][0]["_source"]
assert 'value' in result['hits']['hits'][0]["_source"]
assert 'uuid' in result['hits']['hits'][0]["_source"]

result = regulome_elasticsearch_client_index.es.search(index="resident_regionsets", body={"query":{"match_all":{}}})

assert result['hits']['total'] == 1
assert result['hits']['hits'][0]["_source"]['file']['uuid'] == file_uuid
assert 'chroms' in result['hits']['hits'][0]["_source"]
assert 'dataset' in result['hits']['hits'][0]["_source"]
assert 'dataset_type' in result['hits']['hits'][0]["_source"]
assert 'uuid' in result['hits']['hits'][0]["_source"]
assert 'file' in result['hits']['hits'][0]["_source"]
assert 'uses' in result['hits']['hits'][0]["_source"]

def test_index_snps(regulome_elasticsearch_client_index):

import uuid
from genomic_data_service.region_indexer_task import index_regions_from_test_snp_file
from genomic_data_service.region_indexer import TEST_SNP_FILE, FILE_HG19

file_uuid = uuid.uuid4()
index_regions_from_test_snp_file(regulome_elasticsearch_client_index.es, file_uuid, TEST_SNP_FILE, FILE_HG19)
regulome_elasticsearch_client_index.es.indices.refresh()
result = regulome_elasticsearch_client_index.es.search(index="snp_hg19", body={"query":{"match_all":{}}})

assert result['hits']['total'] == 11
assert result['hits']['hits'][0]["_type"] == 'chr10'
assert 'alt_allele_freq' in result['hits']['hits'][0]["_source"]
assert 'ref_allele_freq' in result['hits']['hits'][0]["_source"]
assert 'coordinates' in result['hits']['hits'][0]["_source"]
assert 'rsid' in result['hits']['hits'][0]["_source"]
assert 'chrom' in result['hits']['hits'][0]["_source"]
assert 'maf' in result['hits']['hits'][0]["_source"]


0 comments on commit ac1a1e0

Please sign in to comment.