Skip to content

Commit

Permalink
Added additional statistical distribution functions for the peak list…
Browse files Browse the repository at this point in the history
… simulation functionality.
  • Loading branch information
smelandr committed Jul 12, 2017
1 parent 3d10e10 commit aac2d89
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 47 deletions.
58 changes: 36 additions & 22 deletions nmrstarlib/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
nmrstarlib --version
nmrstarlib convert (<from_path> <to_path>) [--from_format=<format>] [--to_format=<format>] [--bmrb_url=<url>] [--nmrstar_version=<version>] [--verbose]
nmrstarlib csview <starfile_path> [--amino_acids=<aa>] [--atoms=<at>] [--csview_outfile=<path>] [--csview_format=<format>] [--bmrb_url=<url>] [--nmrstar_version=<version>] [--verbose]
nmrstarlib plsimulate (<from_path> <to_path> <spectrum>) [--from_format=<format>] [--to_format=<format>] [--plsplit=<%>] [--H_std=<std>] [--C_std=<std>] [--N_std=<std>] [--H_mean=<mean>] [--C_mean=<mean>] [--N_mean=<mean>] [--bmrb_url=<url>] [--nmrstar_version=<version>] [--spectrum_descriptions=<path>] [--verbose]
nmrstarlib plsimulate (<from_path> <to_path> <spectrum>) [--from_format=<format>] [--to_format=<format>] [--plsplit=<%>] [--distribution=<func>] [--H=<value>] [--C=<value>] [--N=<value>] [--bmrb_url=<url>] [--nmrstar_version=<version>] [--spectrum_descriptions=<path>] [--verbose]
Options:
-h, --help Show this screen.
Expand All @@ -24,15 +24,15 @@
--csview_outfile=<path> Where to save chemical shifts table.
--csview_format=<format> Format to which save chemical shift table [default: svg].
--plsplit=<%> How to split peak list into chunks by percent [default: 100].
--H_std=<ppm> Standard deviation for H dimensions [default: 0].
--C_std=<ppm> Standard deviation for C dimensions [default: 0].
--N_std=<ppm> Standard deviation for N dimensions [default: 0].
--H_mean=<ppm> Mean for H dimensions [default: 0].
--C_mean=<ppm> Mean for C dimensions [default: 0].
--N_mean=<ppm> Mean for N dimensions [default: 0].
--spectrum_descriptions=<path> Path to custom spectrum descriptions file.
--distribution=<func> Statistical distribution function [default: normal].
--H=<value> Statistical distribution parameter(s) for H dimension.
--C=<value> Statistical distribution parameter(s) for C dimension.
--N=<value> Statistical distribution parameter(s) for N dimension.
"""

import itertools
import collections
import docopt

from . import nmrstarlib
Expand Down Expand Up @@ -75,21 +75,35 @@ def main(cmdargs):
nmrstarlib.update_constants(spectrum_descriptions_cfg=cmdargs["--spectrum_descriptions"])

plsplit = tuple(float(i) for i in cmdargs["--plsplit"].split(","))
parameters = {"H_mean": tuple(float(i) for i in cmdargs["--H_mean"].split(",")),
"C_mean": tuple(float(i) for i in cmdargs["--C_mean"].split(",")),
"N_mean": tuple(float(i) for i in cmdargs["--N_mean"].split(",")),
"H_std": tuple(float(i) for i in cmdargs["--H_std"].split(",")),
"C_std": tuple(float(i) for i in cmdargs["--C_std"].split(",")),
"N_std": tuple(float(i) for i in cmdargs["--N_std"].split(","))}

# fill with zeros values for parameters that have missing values
for param_name, param_values in parameters.items():
given_values = list(param_values)
missing_values = [0.0 for _ in range(len(plsplit) - len(param_values))]
param_values = given_values + missing_values
parameters[param_name] = tuple(param_values)

noise_generator = noise.RandomNormalNoiseGenerator(parameters)
distribution_name = cmdargs["--distribution"]
distribution_parameter_names = noise.distributions[distribution_name]["parameters"]

if not distribution_parameter_names:
parameters = None
else:
parameters = collections.defaultdict(list)

for dim in ("H", "N", "C"):
params = cmdargs["--{}".format(dim)]
if params is None:
for param_name in distribution_parameter_names:
parameters["{}_{}".format(dim, param_name)].append(None)
else:
params = params.split(",")
if len(params) > len(distribution_parameter_names):
raise ValueError("Inconsistent number of parameters provided.")

for param, param_name in zip(params, distribution_parameter_names):
parameters["{}_{}".format(dim, param_name)].extend([float(val) if val else None for val in param.split(":")])

# fill with None values for parameters that have missing values
for param_name, param_values in parameters.items():
given_values = list(param_values)
missing_values = [None for _ in range(len(plsplit) - len(param_values))]
param_values = given_values + missing_values
parameters[param_name] = tuple(param_values)

noise_generator = noise.NoiseGenerator(parameters=parameters, distribution_name=distribution_name)

peaklist_file_translator = translator.StarFileToPeakList(from_path=cmdargs["<from_path>"],
to_path=cmdargs["<to_path>"],
Expand Down
87 changes: 62 additions & 25 deletions nmrstarlib/noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,60 @@
import numpy as np


distributions = {"normal": {"function": np.random.normal, "parameters": ["loc", "scale"]},
"beta": {"function": np.random.beta, "parameters": ["a", "b"]},
"binomial": {"function": np.random.binomial, "parameters": ["n", "p"]},
"chisquare": {"function": np.random.chisquare, "parameters": ["df"]},
"exponential": {"function": np.random.exponential, "parameters": ["scale"]},
"f": {"function": np.random.f, "parameters": ["dfnum", "dfden"]},
"gamma": {"function": np.random.gamma, "parameters": ["shape", "scale"]},
"geometric": {"function": np.random.geometric, "parameters": ["p"]},
"gumbel": {"function": np.random.gumbel, "parameters": ["loc", "scale"]},
"hypergeometric": {"function": np.random.hypergeometric, "parameters": ["ngood", "nbad", "nsample"]},
"laplace": {"function": np.random.laplace, "parameters": ["loc", "scale"]},
"logistic": {"function": np.random.logistic, "parameters": ["loc", "scale"]},
"lognormal": {"function": np.random.lognormal, "parameters": ["mean", "sigma"]},
"logseries": {"function": np.random.logseries, "parameters": ["p"]},
"negative_binomial": {"function": np.random.negative_binomial, "parameters": ["n", "p"]},
"noncentral_chisquare": {"function": np.random.noncentral_chisquare, "parameters": ["df", "nonc"]},
"noncentral_f": {"function": np.random.noncentral_f, "parameters": ["dfnum", "dfden", "nonc"]},
"pareto": {"function": np.random.pareto, "parameters": ["a"]},
"poisson": {"function": np.random.poisson, "parameters": ["lam"]},
"power": {"function": np.random.power, "parameters": ["a"]},
"rayleigh": {"function": np.random.rayleigh, "parameters": ["scale"]},
"triangular": {"function": np.random.triangular, "parameters": ["left", "mode", "right"]},
"uniform": {"function": np.random.uniform, "parameters": ["low", "high"]},
"vonmises": {"function": np.random.vonmises, "parameters": ["mu", "kappa"]},
"wald": {"function": np.random.wald, "parameters": ["mean", "scale"]},
"weibull": {"function": np.random.weibull, "parameters": ["a"]},
"zipf": {"function": np.random.zipf, "parameters": ["a"]}}


class NoiseGenerator(object):
"""Noise generator abstract class."""
"""Noise generator class."""

def __init__(self, parameters):
def __init__(self, parameters=None, distribution_name="normal"):
"""Noise generator initializer.
:param parameters: Statistical distribution parameters per each peak list split.
:param dict parameters: Statistical distribution parameters per each peak list split.
:param str distribution_name: Name of the statistical distribution function.
"""
if parameters is None:
parameters = dict()

if distribution_name not in distributions:
raise KeyError('Distribution: "{}" not in a list of allowed distributions'.format(distribution_name))

self.parameter_names = {name[2:] for name in parameters.keys()}
self.distribution_parameter_names = distributions[distribution_name]["parameters"]

if set(self.distribution_parameter_names) != self.parameter_names:
raise ValueError('Parameter names not consistent with the chosen distribution, parameters needed: {},'
'parameters provided: {}'.format(repr(self.distribution_parameter_names),
repr(self.parameter_names)))

self.parameters = parameters
self.distribution_name = distribution_name

def generate(self, labels, split_idx):
"""Generate peak-specific noise abstract method, must be reimplemented in a subclass.
Expand All @@ -32,30 +77,22 @@ def generate(self, labels, split_idx):
:return: List of noise values for dimensions ordered as they appear in a peak.
:rtype: :py:class:`list`
"""
raise NotImplementedError()

atom_labels = [label[0] for label in labels]

class RandomNormalNoiseGenerator(NoiseGenerator):
"""Random normal noise generator concrete class."""
noise = []
distribution_function = distributions[self.distribution_name]["function"]
for label in atom_labels:
params = [self.parameters["{}_{}".format(label, param)][split_idx]
for param in self.distribution_parameter_names]

def __init__(self, parameters):
"""Random normal noise generator initializer.
if None in params:
dim_noise = 0.0
else:
try:
dim_noise = distribution_function(*params)
except ValueError:
raise ValueError

:param parameters: Statistical distribution parameters from random normal distribution per each peak list split.
"""
super(RandomNormalNoiseGenerator, self).__init__(parameters)
self.parameters_per_split = list(map(dict, zip(*[[(k, v) for v in value] for k, value in parameters.items()])))
noise.append(dim_noise)

def generate(self, labels, split_idx):
"""Generate peak-specific random noise from normal distribution.
:param tuple labels: Dimension labels of a peak.
:param int split_idx: Index specifying which peak list split parameters to use.
:return: List of noise values for dimensions ordered as they appear in a peak.
:rtype: :py:class:`list`
"""
atom_labels = [label[0] for label in labels]
mean_values = [self.parameters_per_split[split_idx]["{}_{}".format(label, "mean")] for label in atom_labels]
std_values = [self.parameters_per_split[split_idx]["{}_{}".format(label, "std")] for label in atom_labels]
noise = [np.random.normal(mean, std) if std > 0 else 0.0 for mean, std in zip(mean_values, std_values)]
return noise

0 comments on commit aac2d89

Please sign in to comment.