-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
224 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
from __future__ import print_function, unicode_literals | ||
from ..utils import ( | ||
check_function, | ||
init_check_res, | ||
action_function, | ||
init_action_res | ||
) | ||
from dcicutils import ff_utils | ||
import requests | ||
import sys | ||
import json | ||
import datetime | ||
import time | ||
import boto3 | ||
|
||
|
||
@check_function() | ||
def biosource_cell_line_value(connection, **kwargs): | ||
''' | ||
checks cell line biosources to make sure they have an associated ontology term | ||
''' | ||
check = init_check_res(connection, 'biosource_cell_line_value') | ||
|
||
cell_line_types = ["primary cell", "primary cell line", "immortalized cell line", | ||
"in vitro differentiated cells", "induced pluripotent stem cell line", | ||
"stem cell", "stem cell derived cell line"] | ||
biosources = ff_utils.search_metadata('search/?type=Biosource&frame=object', | ||
ff_env=connection.ff_env, page_limit=200) | ||
missing = [] | ||
for biosource in biosources: | ||
# check if the biosource type is a cell/cell line | ||
if biosource.get('biosource_type') and biosource.get('biosource_type') in cell_line_types: | ||
# append if cell_line field is missing | ||
if not biosource.get('cell_line'): | ||
missing.append({'uuid': biosource['uuid'], | ||
'@id': biosource['@id'], | ||
'biosource_type': biosource.get('biosource_type'), | ||
'description': biosource.get('description'), | ||
'error': 'Missing cell_line metadata'}) | ||
# detail = 'In Biosource {}'.format(value['@id']) + \ | ||
# ' - Missing Required cell_line field value for biosource type ' + \ | ||
# '{}'.format(bstype) | ||
check.full_output = missing | ||
check.brief_output = [item['uuid'] for item in missing] | ||
if missing: | ||
check.status = 'WARN' | ||
check.summary = 'Cell line biosources found missing cell_line metadata' | ||
else: | ||
check.status = 'PASS' | ||
check.summary = 'No cell line biosources are missing cell_line metadata' | ||
return check | ||
|
||
|
||
@check_function() | ||
def external_expsets_without_pub(connection, **kwargs): | ||
''' | ||
checks external experiment sets to see if they are attributed to a publication | ||
''' | ||
check = init_check_res(connection, 'external_expsets_without_pub') | ||
|
||
ext = ff_utils.search_metadata('search/?award.project=External&type=ExperimentSet&frame=object', | ||
ff_env=connection.ff_env, page_limit=50) | ||
no_pub = [] | ||
for expset in ext: | ||
if not expset.get('publications_of_set') and not expset.get('produced_in_pub'): | ||
# add to Output | ||
no_pub.append({'uuid': expset['uuid'], | ||
'@id': expset['@id'], | ||
'description': expset.get('description'), | ||
'lab': expset.get('lab'), | ||
'error': 'Missing attribution to a publication'}) | ||
if no_pub: | ||
check.status = 'WARN' | ||
check.summary = 'External experiment sets found without associated publication' | ||
check.description = '{} external experiment sets are missing attribution to a publication.'.format(len(no_pub)) | ||
else: | ||
check.status = 'PASS' | ||
check.summary = 'No external experiment sets are missing publication' | ||
check.description = '0 external experiment sets are missing attribution to a publication.' | ||
check.full_output = no_pub | ||
check.brief_output = [item['uuid'] for item in no_pub] | ||
return check | ||
|
||
|
||
@check_function() | ||
def expset_opfsets_unique_titles(connection, **kwargs): | ||
''' | ||
checks experiment sets with other_processed_files to see if each collection | ||
of other_processed_files has a unique title within that experiment set | ||
''' | ||
check = init_check_res(connection, 'expset_opfsets_unique_titles') | ||
|
||
opf_expsets = ff_utils.search_metadata('search/?type=ExperimentSet&other_processed_files.files.uuid%21=No+value&frame=object', | ||
ff_env=connection.ff_env, page_limit=50) | ||
errors = [] | ||
for expset in opf_expsets: | ||
e = [] | ||
fileset_names = [fileset.get('title') for fileset in expset['other_processed_files']] | ||
if None in fileset_names or '' in fileset_names: | ||
e.append('Missing title') | ||
if len(list(set(fileset_names))) != len(fileset_names): | ||
e.append('Duplicate title') | ||
if e: | ||
info = {'uuid': expset['uuid'], | ||
'@id': expset['@id'], | ||
'errors': []} | ||
if 'Missing title' in e: | ||
info['errors'] += ['ExperimentSet {} has an other_processed_files collection with a missing title.'.format(expset['accession'])] | ||
if 'Duplicate title' in e: | ||
info['errors'] += ['ExperimentSet {} has 2+ other_processed_files collections with duplicated titles.'.format(expset['accession'])] | ||
errors.append(info) | ||
|
||
if errors: | ||
check.status = 'WARN' | ||
check.summary = 'Experiment Sets found with duplicate/missing titles in other_processed_files' | ||
check.description = '{} Experiment Sets have other_processed_files collections with missing or duplicate titles.'.format(len(errors)) | ||
else: | ||
check.status = 'PASS' | ||
check.summary = 'No issues found with other_processed_files of experiment sets' | ||
check.description = '0 Experiment Sets have other_processed_files collections with missing or duplicate titles.' | ||
check.full_output = errors | ||
check.brief_output = {'missing title': [item['uuid'] for item in errors if 'missing' in ''.join(item['errors'])], | ||
'duplicate title': [item['uuid'] for item in errors if 'duplicated' in ''.join(item['errors'])]} | ||
return check | ||
|
||
|
||
@check_function() | ||
def expset_opf_unique_files_in_experiments(connection, **kwargs): | ||
''' | ||
checks experiment sets with other_processed_files and looks for other_processed_files collections | ||
in child experiments to make sure that (1) the collections have titles and (2) that if the titles | ||
are shared with the parent experiment set, that the filenames contained within are unique | ||
''' | ||
check = init_check_res(connection, 'expset_opf_unique_files_in_experiments') | ||
|
||
opf_expsets = ff_utils.search_metadata('search/?type=ExperimentSet&other_processed_files.files.uuid%21=No+value', | ||
ff_env=connection.ff_env, page_limit=25) | ||
errors = [] | ||
for expset in opf_expsets: | ||
expset_titles = {fileset.get('title'): fileset.get('files') for fileset in expset['other_processed_files'] if fileset.get('title')} | ||
if not expset.get('experiments_in_set'): | ||
continue | ||
for expt in (exp for exp in expset.get('experiments_in_set') if exp.get('other_processed_files')): | ||
e = [] | ||
for opf_set in expt['other_processed_files']: | ||
# look for missing names | ||
if not opf_set.get('title'): | ||
e.append('Experiment {} in Experiment Set {} has an other_processed_files set ' | ||
'missing a title.'.format(expt['accession'], expset['accession'])) | ||
# look for duplicate names | ||
elif opf_set.get('title') in expset_titles.keys() and opf_set.get('files'): | ||
for opf_file in opf_set['files']: | ||
# if duplicate names, look for duplicate file names | ||
if opf_file in expset_titles[opf_set['title']]: | ||
e.append('Experiment {} other_processed_files collection with title `{}` has file {} which ' | ||
'is also present in parent ExperimentSet {} other_processed_files collection of the ' | ||
'same name.'.format(expt['accession'], opf_set['title'], opf_file['accession'], expset['accession'])) | ||
if e: | ||
errors.append({'uuid': expt['uuid'], | ||
'@id': expt['@id'], | ||
'error_details': e}) | ||
if errors: | ||
check.status = 'WARN' | ||
check.summary = '{} experiments found with issues in other_processed_files'.format(len(errors)) | ||
check.description = ('{} Experiments found that are either missing titles for sets of other_processed_files,' | ||
' or have non-uniquefilenames in other_processed_files'.format(len(errors))) | ||
else: | ||
check.status = 'PASS' | ||
check.summary = 'No issues found with other_processed_files of experiments' | ||
check.description = ('0 Experiments found to be missing titles for sets of other_processed_files,' | ||
' or have non-unique filenames in other_processed_files') | ||
check.full_output = errors | ||
check.brief_output = {'missing title': [item['uuid'] for item in errors if 'missing' in ''.join(item['error_details'])], | ||
'duplicate title': [item['uuid'] for item in errors if 'also present in parent' in ''.join(item['error_details'])]} | ||
return check |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters