Skip to content

Commit

Permalink
Update project docstrings (#108)
Browse files Browse the repository at this point in the history
* 📝 Add class and method docstrings.
* 🐛 save project config after adding metagenome to project dir
* 🎨📝 Add two methods: new_metagenome_directory() and setup_checkpoints_and_files()
  • Loading branch information
evanroyrees committed Sep 15, 2020
1 parent 8ebdb8e commit 79869f9
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 52 deletions.
4 changes: 2 additions & 2 deletions autometa/binning/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def merge_annotations(mgargs):
Parameters
----------
mgargs : argparse.Namespace
metagenome args parsed from config using `config.parse_config`.
metagenome args parsed from config using `config.parse_args`.
Returns
-------
Expand Down Expand Up @@ -308,7 +308,7 @@ def main():
configs_search_str = os.path.join(args.workspace, "**", "metagenome_*.config")
config_fpaths = glob(configs_search_str, recursive=True)
for config_fpath in config_fpaths:
mgargs = config.parse_config(config_fpath)
mgargs = config.parse_args(config_fpath)
ncbi = NCBI(dirpath=mgargs.databases.ncbi)
annotations = merge_annotations(mgargs)
for domain, bin_df in annotations.items():
Expand Down
2 changes: 1 addition & 1 deletion autometa/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def update_config(fpath, section, option, value):
logger.debug(f"updated {fpath} [{section}] option: {option} : {value}")


def parse_config(fpath=None):
def parse_args(fpath=None):
"""Generate argparse namespace (args) from config file.
Parameters
Expand Down
173 changes: 125 additions & 48 deletions autometa/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
import logging
import os

from configparser import NoOptionError

from autometa.config import DEFAULT_CONFIG
from autometa.config import get_config
from autometa.config import parse_config
from autometa.config import parse_args
from autometa.config import put_config
from autometa.common.utilities import make_inputs_checkpoints
from autometa.common.utilities import get_existing_checkpoints
Expand All @@ -41,7 +43,34 @@


class Project:
"""Autometa Project."""
"""Autometa Project class to configure project directory given `config_fpath`
Parameters
----------
config_fpath : str
</path/to/project.config>
Attributes
----------
dirpath : str
Path to directory containing `config_fpath`
config : config.ConfigParser
interpolated config object parsed from `config_fpath`.
n_metagenomes : int
Number of metagenomes contained in project directory
metagenomes : dict
metagenomes pertaining to project keyed by number and values of metagenome.config file path.
new_metagenome_num : int
Retrieve new minimum metagenome num from metagenomes in project.
Methods
----------
* self.save()
* self.new_metagenome_directory()
* self.setup_checkpoints_and_files()
* self.add()
* self.update()
"""

def __init__(self, config_fpath):
self.config_fpath = config_fpath
Expand All @@ -52,6 +81,13 @@ def __init__(self, config_fpath):

@property
def n_metagenomes(self):
"""Return the number of metagenome directories present in the project
Returns
-------
int
Number of metagenomes contained in project.
"""
return len(self.metagenomes)

@property
Expand All @@ -69,23 +105,20 @@ def metagenomes(self):
if os.path.exists(v)
}

@property
def new_metagenome_num(self):
"""Retrieve new minimum metagenome num from metagenomes in project.
Returns
-------
int
Description of returned object.
Raises
-------
ExceptionName
Why the exception is raised.
New metagenome number in project.
"""
# I.e. no metagenomes have been added to project yet.
if not self.metagenomes:
return 1
# max corresponds to highest metagenome number recovered in project directory
max_num = max(self.metagenomes)
if max_num == self.n_metagenomes:
return self.n_metagenomes + 1
Expand All @@ -97,8 +130,79 @@ def new_metagenome_num(self):
return mg_num

def save(self):
"""Save project config in project directory
"""
put_config(self.config, self.config_fpath)

def new_metagenome_directory(self):
"""Create a new metagenome directory in project
Returns
-------
str
Path to newly created metagenome directory contained in project
Raises
------
IsADirectoryError
Directory that is trying to be created already exists
"""
metagenome_name = f"metagenome_{self.new_metagenome_num:03d}"
metagenome_dirpath = os.path.join(self.dirpath, metagenome_name)
# Check presence of metagenome directory
if os.path.exists(metagenome_dirpath):
raise IsADirectoryError(metagenome_dirpath)
os.makedirs(metagenome_dirpath)
return metagenome_dirpath

def setup_checkpoints_and_files(self, config, dirpath):
"""Update config files section with symlinks of existing files to metagenome output directory.
Also get checkpoints from each existing file and write these to a checkpoints file.
Note
----
Will write checkpoints to `config.get("files", "checkpoints")` file path. Will skip writing checkpoints
if "checkpoints" is not available in "files".
Parameters
----------
config : config.ConfigParser
metagenome config to be updated
dirpath : str
Path to output metagenome directory
Returns
-------
config.ConfigParser
Updated metagenome config
"""
# symlink any files that already exist and were specified
checkpoint_inputs = []
try:
checkpoints_fpath = config.get("files", "checkpoints")
except NoOptionError:
logger.debug("checkpoints option unavailable, skipping.")
checkpoints_fpath = None
for option in config.options("files"):
default_fname = os.path.basename(DEFAULT_CONFIG.get("files", option))
option_fpath = os.path.realpath(config.get("files", option))
if os.path.exists(option_fpath):
if option_fpath.endswith(".gz") and not default_fname.endswith(".gz"):
default_fname += ".gz"
full_fpath = os.path.join(dirpath, default_fname)
os.symlink(option_fpath, full_fpath)
checkpoint_inputs.append(full_fpath)
else:
full_fpath = os.path.join(dirpath, default_fname)
config.set("files", option, full_fpath)
if checkpoints_fpath:
logger.debug(
f"Making {len(checkpoint_inputs)} checkpoints and writing to {checkpoints_fpath}"
)
checkpoints = make_inputs_checkpoints(checkpoint_inputs)
checkpoints.to_csv(checkpoints_fpath, sep="\t", index=False, header=True)
return config

def add(self, fpath):
"""Setup Autometa metagenome directory given a metagenome.config file.
Expand All @@ -113,63 +217,36 @@ def add(self, fpath):
Raises
-------
FileNotFoundError
Directory found but metagenome.config not present
IsADirectoryError
Metagenome output directory already exists
"""
# metagenome_num = 1 + self.n_metagenomes
metagenome_num = self.new_metagenome_num()
metagenome_name = f"metagenome_{metagenome_num:03d}"
metagenome_dirpath = os.path.join(self.dirpath, metagenome_name)
mg_config_fpath = os.path.join(metagenome_dirpath, f"{metagenome_name}.config")
# Check presence of metagenome directory and config
mg_config_present = os.path.exists(mg_config_fpath)
mg_dir_present = os.path.exists(metagenome_dirpath)
if not mg_config_present and mg_dir_present:
raise FileNotFoundError(
f"{mg_config_fpath} is not present but the directory exists! Either remove the directory or locate the config file before continuing."
)
if mg_dir_present:
raise IsADirectoryError(metagenome_dirpath)

os.makedirs(metagenome_dirpath)
metagenome_dirpath = self.new_metagenome_directory()
metagenome_name = os.path.basename(metagenome_dirpath)
mg_config = get_config(fpath)
# Add database and env for debugging individual metagenome binning runs.
# Add/Update database and env sections for debugging individual metagenome binning runs.
for section in ["databases", "environ", "versions"]:
if not mg_config.has_section(section):
mg_config.add_section(section)
for option, value in self.config.items(section):
mg_config.set(section, option, value)
# symlink any files that already exist and were specified
checkpoint_inputs = []
for option in mg_config.options("files"):
default_fname = os.path.basename(DEFAULT_CONFIG.get("files", option))
option_fpath = os.path.realpath(mg_config.get("files", option))
if os.path.exists(option_fpath):
if option_fpath.endswith(".gz") and not default_fname.endswith(".gz"):
default_fname += ".gz"
full_fpath = os.path.join(metagenome_dirpath, default_fname)
os.symlink(option_fpath, full_fpath)
checkpoint_inputs.append(full_fpath)
else:
full_fpath = os.path.join(metagenome_dirpath, default_fname)
mg_config.set("files", option, full_fpath)
checkpoints = make_inputs_checkpoints(checkpoint_inputs)
checkpoints_fpath = mg_config.get("files", "checkpoints")
checkpoints.to_csv(checkpoints_fpath, sep="\t", index=False, header=True)
if not mg_config.has_option(section, option):
mg_config.set(section, option, value)
# symlink any files that already exist and were specified and checkpoint existing files
self.setup_checkpoints_and_files(config=mg_config, dirpath=metagenome_dirpath)
# Set outdir parameter and add config section linking metagenome config to project config
mg_config.set("parameters", "outdir", metagenome_dirpath)
mg_config_fpath = os.path.join(metagenome_dirpath, f"{metagenome_name}.config")
mg_config.add_section("config")
mg_config.set("config", "project", self.config_fpath)
mg_config.set("config", "metagenome", mg_config_fpath)
# Save metagenome config to metagenome directory metagenome_00d.config
put_config(mg_config, mg_config_fpath)
# Only write updated project config after successful metagenome configuration.
self.config.set("metagenomes", metagenome_name, mg_config_fpath)
# Only write updated project config after successful metagenome configuration.
self.save()
logger.debug(
f"updated {self.config_fpath} metagenome: {metagenome_name} : {mg_config_fpath}"
)
return parse_config(mg_config_fpath)
return parse_args(mg_config_fpath)

def update(self, metagenome_num, fpath):
"""Update project config metagenomes section with input metagenome.config file.
Expand Down Expand Up @@ -218,7 +295,7 @@ def update(self, metagenome_num, fpath):
)
put_config(old_config, old_config_fp)
logger.debug(f"Updated {metagenome}.config with {fpath}")
return parse_config(old_config_fp)
return parse_args(old_config_fp)


def main():
Expand Down
2 changes: 1 addition & 1 deletion autometa/config/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def prepare_binning_args(self, fpath):
# 1. configure user environment
self.configure()
# 2. check workspace exists
mgargs = config.parse_config(fpath)
mgargs = config.parse_args(fpath)
workspace = os.path.realpath(mgargs.parameters.workspace)
if not os.path.exists(workspace):
os.makedirs(workspace)
Expand Down

0 comments on commit 79869f9

Please sign in to comment.