Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update project docstrings #108

Merged
merged 2 commits into from
Sep 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions autometa/binning/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def merge_annotations(mgargs):
Parameters
----------
mgargs : argparse.Namespace
metagenome args parsed from config using `config.parse_config`.
metagenome args parsed from config using `config.parse_args`.

Returns
-------
Expand Down Expand Up @@ -308,7 +308,7 @@ def main():
configs_search_str = os.path.join(args.workspace, "**", "metagenome_*.config")
config_fpaths = glob(configs_search_str, recursive=True)
for config_fpath in config_fpaths:
mgargs = config.parse_config(config_fpath)
mgargs = config.parse_args(config_fpath)
ncbi = NCBI(dirpath=mgargs.databases.ncbi)
annotations = merge_annotations(mgargs)
for domain, bin_df in annotations.items():
Expand Down
2 changes: 1 addition & 1 deletion autometa/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def update_config(fpath, section, option, value):
logger.debug(f"updated {fpath} [{section}] option: {option} : {value}")


def parse_config(fpath=None):
def parse_args(fpath=None):
"""Generate argparse namespace (args) from config file.

Parameters
Expand Down
173 changes: 125 additions & 48 deletions autometa/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
import logging
import os

from configparser import NoOptionError

from autometa.config import DEFAULT_CONFIG
from autometa.config import get_config
from autometa.config import parse_config
from autometa.config import parse_args
from autometa.config import put_config
from autometa.common.utilities import make_inputs_checkpoints
from autometa.common.utilities import get_existing_checkpoints
Expand All @@ -41,7 +43,34 @@


class Project:
"""Autometa Project."""
"""Autometa Project class to configure project directory given `config_fpath`

Parameters
----------
config_fpath : str
</path/to/project.config>

Attributes
----------
dirpath : str
Path to directory containing `config_fpath`
config : config.ConfigParser
interpolated config object parsed from `config_fpath`.
n_metagenomes : int
Number of metagenomes contained in project directory
metagenomes : dict
metagenomes pertaining to project keyed by number and values of metagenome.config file path.
new_metagenome_num : int
Retrieve new minimum metagenome num from metagenomes in project.

Methods
----------
* self.save()
* self.new_metagenome_directory()
* self.setup_checkpoints_and_files()
* self.add()
* self.update()
"""

def __init__(self, config_fpath):
self.config_fpath = config_fpath
Expand All @@ -52,6 +81,13 @@ def __init__(self, config_fpath):

@property
def n_metagenomes(self):
"""Return the number of metagenome directories present in the project

Returns
-------
int
Number of metagenomes contained in project.
"""
return len(self.metagenomes)

@property
Expand All @@ -69,23 +105,20 @@ def metagenomes(self):
if os.path.exists(v)
}

@property
def new_metagenome_num(self):
"""Retrieve new minimum metagenome num from metagenomes in project.

Returns
-------
int
Description of returned object.

Raises
-------
ExceptionName
Why the exception is raised.
New metagenome number in project.

"""
# I.e. no metagenomes have been added to project yet.
if not self.metagenomes:
return 1
# max corresponds to highest metagenome number recovered in project directory
max_num = max(self.metagenomes)
if max_num == self.n_metagenomes:
return self.n_metagenomes + 1
Expand All @@ -97,8 +130,79 @@ def new_metagenome_num(self):
return mg_num

def save(self):
"""Save project config in project directory
"""
put_config(self.config, self.config_fpath)

def new_metagenome_directory(self):
"""Create a new metagenome directory in project

Returns
-------
str
Path to newly created metagenome directory contained in project

Raises
------
IsADirectoryError
Directory that is trying to be created already exists
"""
metagenome_name = f"metagenome_{self.new_metagenome_num:03d}"
metagenome_dirpath = os.path.join(self.dirpath, metagenome_name)
# Check presence of metagenome directory
if os.path.exists(metagenome_dirpath):
raise IsADirectoryError(metagenome_dirpath)
os.makedirs(metagenome_dirpath)
return metagenome_dirpath

def setup_checkpoints_and_files(self, config, dirpath):
"""Update config files section with symlinks of existing files to metagenome output directory.
Also get checkpoints from each existing file and write these to a checkpoints file.

Note
----
Will write checkpoints to `config.get("files", "checkpoints")` file path. Will skip writing checkpoints
if "checkpoints" is not available in "files".

Parameters
----------
config : config.ConfigParser
metagenome config to be updated
dirpath : str
Path to output metagenome directory

Returns
-------
config.ConfigParser
Updated metagenome config
"""
# symlink any files that already exist and were specified
checkpoint_inputs = []
try:
checkpoints_fpath = config.get("files", "checkpoints")
except NoOptionError:
logger.debug("checkpoints option unavailable, skipping.")
checkpoints_fpath = None
for option in config.options("files"):
default_fname = os.path.basename(DEFAULT_CONFIG.get("files", option))
option_fpath = os.path.realpath(config.get("files", option))
if os.path.exists(option_fpath):
if option_fpath.endswith(".gz") and not default_fname.endswith(".gz"):
default_fname += ".gz"
full_fpath = os.path.join(dirpath, default_fname)
os.symlink(option_fpath, full_fpath)
checkpoint_inputs.append(full_fpath)
else:
full_fpath = os.path.join(dirpath, default_fname)
config.set("files", option, full_fpath)
if checkpoints_fpath:
logger.debug(
f"Making {len(checkpoint_inputs)} checkpoints and writing to {checkpoints_fpath}"
)
checkpoints = make_inputs_checkpoints(checkpoint_inputs)
checkpoints.to_csv(checkpoints_fpath, sep="\t", index=False, header=True)
return config

def add(self, fpath):
"""Setup Autometa metagenome directory given a metagenome.config file.

Expand All @@ -113,63 +217,36 @@ def add(self, fpath):

Raises
-------
FileNotFoundError
Directory found but metagenome.config not present
IsADirectoryError
Metagenome output directory already exists
"""
# metagenome_num = 1 + self.n_metagenomes
metagenome_num = self.new_metagenome_num()
metagenome_name = f"metagenome_{metagenome_num:03d}"
metagenome_dirpath = os.path.join(self.dirpath, metagenome_name)
mg_config_fpath = os.path.join(metagenome_dirpath, f"{metagenome_name}.config")
# Check presence of metagenome directory and config
mg_config_present = os.path.exists(mg_config_fpath)
mg_dir_present = os.path.exists(metagenome_dirpath)
if not mg_config_present and mg_dir_present:
raise FileNotFoundError(
f"{mg_config_fpath} is not present but the directory exists! Either remove the directory or locate the config file before continuing."
)
if mg_dir_present:
raise IsADirectoryError(metagenome_dirpath)

os.makedirs(metagenome_dirpath)
metagenome_dirpath = self.new_metagenome_directory()
metagenome_name = os.path.basename(metagenome_dirpath)
mg_config = get_config(fpath)
# Add database and env for debugging individual metagenome binning runs.
# Add/Update database and env sections for debugging individual metagenome binning runs.
for section in ["databases", "environ", "versions"]:
if not mg_config.has_section(section):
mg_config.add_section(section)
for option, value in self.config.items(section):
mg_config.set(section, option, value)
# symlink any files that already exist and were specified
checkpoint_inputs = []
for option in mg_config.options("files"):
default_fname = os.path.basename(DEFAULT_CONFIG.get("files", option))
option_fpath = os.path.realpath(mg_config.get("files", option))
if os.path.exists(option_fpath):
if option_fpath.endswith(".gz") and not default_fname.endswith(".gz"):
default_fname += ".gz"
full_fpath = os.path.join(metagenome_dirpath, default_fname)
os.symlink(option_fpath, full_fpath)
checkpoint_inputs.append(full_fpath)
else:
full_fpath = os.path.join(metagenome_dirpath, default_fname)
mg_config.set("files", option, full_fpath)
checkpoints = make_inputs_checkpoints(checkpoint_inputs)
checkpoints_fpath = mg_config.get("files", "checkpoints")
checkpoints.to_csv(checkpoints_fpath, sep="\t", index=False, header=True)
if not mg_config.has_option(section, option):
mg_config.set(section, option, value)
# symlink any files that already exist and were specified and checkpoint existing files
self.setup_checkpoints_and_files(config=mg_config, dirpath=metagenome_dirpath)
# Set outdir parameter and add config section linking metagenome config to project config
mg_config.set("parameters", "outdir", metagenome_dirpath)
mg_config_fpath = os.path.join(metagenome_dirpath, f"{metagenome_name}.config")
mg_config.add_section("config")
mg_config.set("config", "project", self.config_fpath)
mg_config.set("config", "metagenome", mg_config_fpath)
# Save metagenome config to metagenome directory metagenome_00d.config
put_config(mg_config, mg_config_fpath)
# Only write updated project config after successful metagenome configuration.
self.config.set("metagenomes", metagenome_name, mg_config_fpath)
# Only write updated project config after successful metagenome configuration.
self.save()
logger.debug(
f"updated {self.config_fpath} metagenome: {metagenome_name} : {mg_config_fpath}"
)
return parse_config(mg_config_fpath)
return parse_args(mg_config_fpath)

def update(self, metagenome_num, fpath):
"""Update project config metagenomes section with input metagenome.config file.
Expand Down Expand Up @@ -218,7 +295,7 @@ def update(self, metagenome_num, fpath):
)
put_config(old_config, old_config_fp)
logger.debug(f"Updated {metagenome}.config with {fpath}")
return parse_config(old_config_fp)
return parse_args(old_config_fp)


def main():
Expand Down
2 changes: 1 addition & 1 deletion autometa/config/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def prepare_binning_args(self, fpath):
# 1. configure user environment
self.configure()
# 2. check workspace exists
mgargs = config.parse_config(fpath)
mgargs = config.parse_args(fpath)
workspace = os.path.realpath(mgargs.parameters.workspace)
if not os.path.exists(workspace):
os.makedirs(workspace)
Expand Down