In [1]:
#export
"""
This is for all short utilities that has the boilerplate feeling
"""
from k1lib.bioinfo.cli.init import patchDefaultDelim, BaseCli, settings, Table
import k1lib.bioinfo.cli as cli, numbers, torch, numpy as np
from typing import overload, Iterator, Any, List, Set, Union
__all__ = ["size", "shape", "item", "identity",
           "toStr", "to1Str", "toNumpy", "toTensor",
           "toList", "wrapList", "toSet", "toIter", "toRange",
           "equals", "reverse", "ignore",
           "toSum", "toAvg", "toMax", "toMin",
           "lengths", "headerIdx", "dereference"]

In [2]:
#export
class size(BaseCli):
    def __init__(self, idx=None):
        """Returns number of rows and columns in the input.

:param idx: if idx is None return (rows, columns). If 0 or 1, then rows
    or columns"""
        super().__init__(); self.idx = idx
    def __ror__(self, it:Iterator[str]):
        super().__ror__(it)
        if self.idx == 0: # get rows only
            rows = 0
            for line in it: rows += 1
            return rows
        if self.idx == 1: # get #columns only
            return len(next(it))
        columns = -1; rows = 0
        for row in it:
            if columns == -1:
                try: columns = len(list(row))
                except AttributeError: columns = None
            rows += 1
        if columns == -1: columns = None
        return rows, columns
shape = size
class item(BaseCli):
    """Returns the first row"""
    def __ror__(self, it:Iterator[str]):
        return next(iter(it))
class identity(BaseCli):
    """Yields whatever the input is. Useful for multiple streams"""
    def __ror__(self, it:Iterator[Any]):
        return it

In [3]:
#export
class toStr(BaseCli):
    """Converts every line (possibly just a number) to a string."""
    def __ror__(self, it:Iterator[str]):
        for line in it: yield str(line)
class to1Str(BaseCli):
    def __init__(self, delim:str=None):
        """Merges all strings into 1, with `delim` in the middle"""
        super().__init__(); self.delim = patchDefaultDelim(delim)
    def __ror__(self, it:Iterator[str]):
        super().__ror__(it); yield self.delim.join(it | toStr())
class toNumpy(BaseCli):
    """Converts generator to numpy array"""
    def __ror__(self, it:Iterator[float]):
        return np.array(list(it))
class toTensor(BaseCli):
    """Converts generator to :class:`torch.Tensor`"""
    def __ror__(self, it):
        return torch.tensor(list(it))

In [4]:
#export
class toList(BaseCli):
    """Converts generator to list. :class:`list` would do the
same, but this is just to maintain the style"""
    def __ror__(self, it:Iterator[Any]) -> List[Any]:
        return list(it)
class wrapList(BaseCli):
    """Wraps inputs inside a list"""
    def __ror__(self, it:Any) -> List[Any]:
        return [it]
class toSet(BaseCli):
    """Converts generator to set. :class:`set` would do the
same, but this is just to maintain the style"""
    def __ror__(self, it:Iterator[Any]) -> Set[Any]:
        return set(it)
class toIter(BaseCli):
    """Converts object to iterator. `iter()` would do the
same, but this is just to maintain the style"""
    def __ror__(self, it) -> Iterator[Any]:
        return iter(it)
class toRange(BaseCli):
    """Returns iter(range(len(it))), effectively"""
    def __ror__(self, it:Iterator[Any]) -> Iterator[int]:
        for i, _ in enumerate(it): yield i
class _EarlyExp(Exception): pass
class equals:
    """Checks if all incoming columns/streams are identical"""
    def __ror__(self, streams:Iterator[Iterator[str]]):
        streams = list(streams)
        for row in zip(*streams):
            sampleElem = row[0]
            try:
                for elem in row:
                    if sampleElem != elem: yield False; raise _EarlyExp()
                yield True
            except _EarlyExp: pass
class reverse(BaseCli):
    """Prints last line first, first line last"""
    def __ror__(self, it:Iterator[str]) -> List[str]:
        return reversed(list(it))
class ignore(BaseCli):
    """Just executes everything, ignoring the output"""
    def __ror__(self, it:Iterator[Any]):
        for _ in it: pass

In [5]:
#export
class toSum(BaseCli):
    """Calculates the sum of list of numbers"""
    def __ror__(self, it:Iterator[float]):
        s = 0
        for v in it: s += v
        return s
class toAvg(BaseCli):
    """Calculates average of list of numbers"""
    def __ror__(self, it:Iterator[float]):
        s = 0; i = -1
        for i, v in enumerate(it):
            s += v
        i += 1
        if not settings["strict"] and i == 0: return float("nan")
        return s / i

In [6]:
#export
class toMax(BaseCli):
    """Calculates the max of a bunch of numbers"""
    def __ror__(self, it:Iterator[float]) -> float: return max(it)
class toMin(BaseCli):
    """Calculates the min of a bunch of numbers"""
    def __ror__(self, it:Iterator[float]) -> float: return min(it)

In [7]:
#export
class lengths(BaseCli):
    """Returns the lengths of each row."""
    def __ror__(self, it:Iterator[List[Any]]) -> Iterator[int]:
        for e in it: yield len(e)
def headerIdx():
    """Cuts out first line, put an index column next to it, and prints it
out. Useful when you want to know what your column's index is to cut it
out. Also sets the context variable "header", in case you need it later.
Example::

    # returns [[0, 'a'], [1, 'b'], [2, 'c']]
    ["abc"] | headerIdx() | dereference()
    # returns "abc"
    ctx["header"]()
    """
    return item() | cli.ctx.consume("header") | cli.tableFromList() | cli.insertIdColumn(True)

In [8]:
assert ["abc"] | headerIdx() | cli.dereference() == [[0, 'a'], [1, 'b'], [2, 'c']]

In [9]:
#export
Number = numbers.Number; Tensor = torch.Tensor; NpNumber = np.number
class inv_dereference(BaseCli):
    def __init__(self, ignoreTensors=False):
        """Kinda the inverse to :class:`dereference`"""
        super().__init__(); self.ignoreTensors = ignoreTensors
    def __ror__(self, it:Iterator[Any]) -> List[Any]:
        super().__ror__(it); ignoreTensors = self.ignoreTensors; 
        for e in it:
            if isinstance(e, cli.ctx.Promise): e = e()
            if e is None or isinstance(e, (Number, NpNumber, str)): yield e
            elif isinstance(e, Tensor):
                if not ignoreTensors and len(e.shape) == 0: yield e.item()
                else: yield e
            else:
                try: yield e | self
                except: yield e
class dereference(BaseCli):
    def __init__(self, ignoreTensors=False, maxDepth=float("inf")):
        """Recursively converts any iterator into a list. Only :class:`str`,
:class:`numbers.Number` are not converted. Example::

    # returns something like "<range_iterator at 0x7fa8c52ca870>"
    iter(range(5))
    # returns [0, 1, 2, 3, 4]
    iter(range(5)) | deference()

You can also specify a ``maxDepth``::

    # returns something like "<list_iterator at 0x7f810cf0fdc0>"
    iter([range(3)]) | dereference(maxDepth=0)
    # returns [range(3)]
    iter([range(3)]) | dereference(maxDepth=1)
    # returns [[0, 1, 2]]
    iter([range(3)]) | dereference(maxDepth=2)

:param ignoreTensors: if True, then don't loop over :class:`torch.Tensor`
    internals

.. warning::

    Can work well with PyTorch Tensors, but not Numpy's array as they screw things up
    with the __ror__ operator, so do torch.from_numpy(...) first."""
        super().__init__(); self.ignoreTensors = ignoreTensors
        self.maxDepth = maxDepth; self.depth = 0
    def __ror__(self, it:Iterator[Any]) -> List[Any]:
        super().__ror__(it); answer = []; ignoreTensors = self.ignoreTensors
        if self.depth >= self.maxDepth: return it
        self.depth += 1
        for e in it:
            if isinstance(e, cli.ctx.Promise): e = e()
            if e is None or isinstance(e, (Number, NpNumber, str)):
                answer.append(e)
            elif isinstance(e, Tensor):
                if not ignoreTensors and len(e.shape) == 0:
                    answer.append(e.item())
                else: answer.append(e)
            else:
                try: answer.append(e | self)
                except: answer.append(e)
        self.depth -= 1
        return answer
    def __invert__(self) -> BaseCli:
        """Returns a :class:`~k1lib.bioinfo.cli.init.BaseCli` that makes
everything an iterator."""
        return inv_dereference(self.ignoreTensors)

In [10]:
import numpy as np, numbers, torch
a = torch.linspace(0, 10, 50) | dereference()
b = torch.from_numpy(np.linspace(0, 10, 50)) | dereference()
assert torch.allclose(torch.tensor(b), torch.tensor(a))

assert iter([range(3)]) | dereference(maxDepth=1) == [range(3)]
assert iter([range(3)]) | dereference(maxDepth=2) == [[0, 1, 2]]

In [11]:
!../../../export.py bioinfo/cli/utils

Current dir: /home/kelvin/repos/labs/k1lib, ../../../export.py
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 0.1.12
Uninstalling k1lib-0.1.12:
  Successfully uninstalled k1lib-0.1.12
running install
running bdist_egg
running egg_info
creating k1lib.egg-info
writing k1lib.egg-info/PKG-INFO
writing dependency_links to k1lib.egg-info/dependency_links.txt
writing requirements to k1lib.egg-info/requires.txt
writing top-level names to k1lib.egg-info/top_level.txt
writing manifest file 'k1lib.egg-info/SOURCES.txt'
reading manifest file 'k1lib.egg-info/SOURCES.txt'
writing manifest file 'k1lib.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build
creating build/lib
creating build/lib/k1lib
copying k1lib/_learner.py -> build/lib/k1lib
copying k1lib/data.py -> build/lib/k1lib
copying k1lib/selector.py -> build/lib/k1lib
copying k1lib/imports.py -> build/lib/k1lib
copying k1