Skip to content

Commit

Permalink
Merge fe4678a into 00f4991
Browse files Browse the repository at this point in the history
  • Loading branch information
terazus authored Sep 27, 2022
2 parents 00f4991 + fe4678a commit af45fad
Show file tree
Hide file tree
Showing 10 changed files with 43 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/buildandtestpython.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9]

steps:
- uses: actions/checkout@v2
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,5 @@ venv/
isa-cookbook/content/notebooks/notebook-output/

# performance profiles output
performances/profiles/*
performances/profiles/*
.github/workflows/_build.yml
2 changes: 1 addition & 1 deletion isatools/isatab/load/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def read_tfile(tfile_path, index_col=None, factor_filter=None) -> IsaTabDataFram
tfile_fp.seek(0)
log.debug("Reading file into DataFrame")
tfile_fp = strip_comments(tfile_fp)
csv = read_csv(tfile_fp, dtype=str, sep='\t', index_col=index_col, memory_map=True, encoding='utf-8').fillna('')
csv = read_csv(tfile_fp, dtype=str, sep='\t', index_col=index_col, encoding='utf-8').fillna('')
tfile_df = IsaTabDataFrame(csv)
if factor_filter:
log.debug("Filtering DataFrame contents on Factor Value %s", factor_filter)
Expand Down
21 changes: 7 additions & 14 deletions isatools/net/mtbls.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,7 @@ def getj(mtbls_study_id):
def get_data_files(mtbls_study_id, factor_selection=None):
tmp_dir = get(mtbls_study_id)
if tmp_dir is None:
raise IOError(
'There was a problem retrieving study {study_id}. Does it exist?'
.format(study_id=mtbls_study_id))
raise IOError('There was a problem retrieving study {study_id}. Does it exist?'.format(study_id=mtbls_study_id))

else:
result = slice_data_files(tmp_dir, factor_selection=factor_selection)
Expand Down Expand Up @@ -206,12 +204,9 @@ def slice_data_files(dir, factor_selection=None):

with open(table_file, encoding='utf-8') as fp:
df = isatab.load_table(fp)
df = df[[x for x in df.columns if
'Factor Value' in x or 'Sample Name' in x]]
df.columns = ['sample' if 'Sample Name' in x else x for x in
df.columns]
df.columns = [x[13:-1] if 'Factor Value' in x else x for x in
df.columns]
df = df[[x for x in df.columns if 'Factor Value' in x or 'Sample Name' in x]]
df.columns = ['sample' if 'Sample Name' in x else x for x in df.columns]
df.columns = [x[13:-1] if 'Factor Value' in x else x for x in df.columns]
df.columns = [x.replace(' ', '_') for x in df.columns]
# build query
sample_names_series = df['sample'].drop_duplicates()
Expand All @@ -226,18 +221,16 @@ def slice_data_files(dir, factor_selection=None):
for factor_name, factor_value in factor_selection.items():
factor_name = factor_name.replace(' ', '_')
factor_query += '{factor_name}=="{factor_value}" and '\
.format(factor_name=factor_name,
factor_value=factor_value)
.format(factor_name=factor_name, factor_value=factor_value)
factor_query = factor_query[:-5]
try:
query_results = df.query(factor_query)[
'sample'].drop_duplicates()
query_results = df.query(factor_query)['sample'].drop_duplicates()
results = query_results.apply(lambda x: {
'sample': x,
'data_files': [],
'query_used': factor_selection
}).tolist()
except pd.core.computation.ops.UndefinedVariableError:
except pd.errors.UndefinedVariableError:
pass

# now collect the data files relating to the samples
Expand Down
Empty file.
14 changes: 5 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,24 @@
graphene==2.1
graphene==3.1.1
graphql-core==3.2.3
wheel~=0.36.2
setuptools~=57.1.0
numpy~=1.21.0
numpy~=1.23.3
jsonschema~=3.2.0
pandas~=1.3.4
pandas==1.5.0
openpyxl>=2.5.0
networkx~=2.5.1
lxml~=4.6.3
lxml~=4.9.1
requests~=2.25.1
iso8601~=0.1.14
chardet~=4.0.0
jinja2~=3.0.1
beautifulsoup4~=4.9.3
mzml2isa==1.0.3
#-e git+http://github.com/ISA-tools/mzml2isa@5866b3e8e185ddbb3b784e4d6a2ef6fbbcfcb256#egg=mzml2isa
#-e git+http://github.com/ISA-tools/isa-api@4e38b09abac60c6acb787169e6eaeac0ac35c1ae#egg=isatools
biopython~=1.79
progressbar2~=3.53.1
deepdiff~=5.5.0
PyYAML~=5.4.1
bokeh~=2.3.2
# test dependencies
# tox==3.14.0
# nose==1.3.7
certifi==2021.5.30
flake8==3.9.2
ddt==1.4.2
Expand Down
30 changes: 23 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ def read(f_name):
name='isatools',
version='0.13.0-rc.2',
packages=['isatools',
'isatools.model',
'isatools.isatab',
'isatools.isajson',
'isatools.convert',
'isatools.create',
'isatools.errors',
Expand Down Expand Up @@ -54,27 +57,40 @@ def read(f_name):
'Intended Audience :: Developers',
'Intended Audience :: System Administrators',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
],
install_requires=[
'numpy~=1.21.0',
'graphene==3.1.1',
'graphql-core==3.2.3',
'wheel~=0.36.2',
'setuptools~=57.1.0',
'numpy~=1.23.3',
'jsonschema~=3.2.0',
'pandas~=1.2.5',
'pandas==1.5.0',
'openpyxl>=2.5.0',
'networkx~=2.5.1',
'lxml~=4.6.3',
'lxml~=4.9.1',
'requests~=2.25.1',
'chardet~=4.0.0',
'iso8601~=0.1.14',
'chardet~=4.0.0',
'jinja2~=3.0.1',
'beautifulsoup4~=4.9.3',
'mzml2isa==1.0.3',
'biopython~=1.79',
'progressbar2~=3.53.1',
'deepdiff~=5.5.0',
'PyYAML~=5.4.1'
'PyYAML~=5.4.1',
'bokeh~=2.3.2',
'certifi==2021.5.30',
'flake8==3.9.2',
'ddt==1.4.2',
'behave==1.2.6',
'httpretty==1.1.3',
'sure==2.0.0',
'coveralls~=3.1.0',
'rdflib~=6.0.2',
],
test_suite='tests'
)
Empty file added test.py
Empty file.
42 changes: 2 additions & 40 deletions tests/test_mtbls.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,55 +32,17 @@ def test_get_study(self, mock_ftp_constructor):
mock_ftp.cwd.assert_called_with('/pub/databases/metabolights/studies/public/MTBLS1')
shutil.rmtree(tmp_dir)

"""Tries to do actual call on MetaboLights; uses MTBLS2 as not so big"""
# def test_get_study_as_tab(self):
# tmp_dir = MTBLS.getj('MTBLS2') # gets MTBLS ISA-Tab files
# self.assertEqual(len(os.listdir(tmp_dir)), 3)
# self.assertSetEqual(set(os.listdir(tmp_dir)), {'a_mtbl2_metabolite profiling_mass spectrometry.txt',
# 'i_Investigation.txt', 's_MTBL2.txt'})
# shutil.rmtree(tmp_dir)

# def test_get_study_as_json(self):
# isa_json = MTBLS.get('MTBLS2') # loads MTBLS study into ISA JSON
# self.assertIsInstance(isa_json, dict)
# self.assertEqual(isa_json['identifier'], 'MTBLS2')
# self.assertEqual(isa_json['studies'][0]['people'][0]['email'], 'boettch@ipb-halle.de')

# def test_get_factor_names(self):
# factors = MTBLS.get_factor_names('MTBLS2')
# self.assertIsInstance(factors, set)
# self.assertEqual(len(factors), 2)
# self.assertSetEqual(factors, {'genotype', 'replicate'})

# def test_get_factor_values(self):
# fvs = MTBLS.get_factor_values('MTBLS2', 'genotype')
# self.assertIsInstance(fvs, set)
# self.assertEqual(len(fvs), 2)
# self.assertSetEqual(fvs, {'Col-0', 'cyp79'})

# def test_get_datafiles(self):
# datafiles = MTBLS.get_data_files('MTBLS2')
# self.assertIsInstance(datafiles, list)
# self.assertEqual(len(datafiles), 16)
# factor_selection = {"genotype": "Col-0"}
# results = MTBLS.get_data_files('MTBLS2', factor_selection)
# self.assertEqual(len(results), 8)
# self.assertEqual(len(results[0]['data_files']), 1)

@patch('isatools.net.mtbls.get')
def test_get_datafiles_multiple_factors(self, mock_mtbls_get):
value = 'MTBLS1'
src = os.path.abspath(
os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'mtbls', value)
)
src = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'mtbls', value))
targets = []
for i in range(3):
dest = tempfile.mkdtemp()
targets.append(shutil.copytree(src, os.path.abspath(os.path.join(dest, value))))
it = iter(targets)
mock_mtbls_get.return_value = next(it)
factor_selection = {"Gender": "Male",
"Metabolic syndrome": "Control Group"}
factor_selection = {"Gender": "Male", "Metabolic syndrome": "Control Group"}
results = MTBLS.get_data_files(value, factor_selection)
self.assertEqual(len(results), 56)
self.assertEqual(len(results[0]['data_files']), 1)
Expand Down
3 changes: 2 additions & 1 deletion tests/utils/test_graphQL.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def test_full_query(self):
"fileType": "Raw Data F",
"protocol": "nucleic acid ext",
"compound": "carbon diox",
"dose": "high"
"dose": "high",
"material": None
}
response = investigation.execute_query(self.query, variables)
self.assertTrue(not response.errors)
Expand Down

0 comments on commit af45fad

Please sign in to comment.