In [1]:
#export
"""
This is for all short and random quality-of-life utilities."""
from k1lib.cli.init import patchDefaultDelim, BaseCli, yieldT
import k1lib.cli as cli, k1lib.cli.init as init, numbers, numpy as np, dis
from k1lib.cli.typehint import *
from typing import overload, Iterator, Any, List, Set, Union, Callable
import k1lib, time, math, os, json, dill
from collections import defaultdict
try: import torch; hasTorch = True
except: torch = k1lib.Object().withAutoDeclare(lambda: type("RandomClass", (object, ), {})); hasTorch = False
try: import PIL; hasPIL = True
except: hasPIL = False
plt = k1lib.dep.plt
try: import genpy, rosbag; hasRos1 = True
except: hasRos1 = False
try: import pandas as pd; pd.core; hasPandas = True
except: hasPandas = False
__all__ = ["size", "shape", "resize", "item", "rItem", "iden", "join", "wrapList",
           "equals", "reverse", "ignore", "rateLimit", "timeLimit", "tab", "indent",
           "clipboard", "deref", "bindec", "smooth", "disassemble",
           "tree", "lookup", "lookupRange", "getitems", "backup", "sketch", "syncStepper", "zeroes", "normalize", "branch"]

In [2]:
cli.init.patchNumpy()

In [3]:
#export
settings = k1lib.settings.cli

In [4]:
#export
def exploreSize(it):
    """Returns first element and length of array. Returns [first item, length]"""
    if isinstance(it, str): return None, len(it)
    try: return it[0], len(it)
    except: pass
    sentinel = object(); it = iter(it)
    o = next(it, sentinel); count = 1
    if o is sentinel: return None, 0
    try:
        while True: next(it); count += 1
    except StopIteration: pass
    return o, count

In [5]:
#export
class size(BaseCli):
    def __init__(self, idx=None):
        """Returns number of rows and columns in the input.
Example::

    [[2, 3], [4, 5, 6], [3]]    | shape()  # returns (3, 2)
    [[2, 3], [4, 5, 6], [3]]    | shape(0) # returns 3
    [[2, 3], [4, 5, 6], [3]]    | shape(1) # returns 2
    [[], [2, 3]]                | shape()  # returns (2, 0)
    [2, 3, 5]                   | shape()  # returns (3,)
    [2, 3, 5]                   | shape(0) # returns 3
    [[[2, 1], [0, 6, 7]], 3, 5] | shape()  # returns (3, 2, 2)
    ["abc"]                     | shape()  # returns (1, 3)
    [torch.randn(2, 3)]         | shape()  # returns (1, 2, 3)
    shape()(np.random.randn(2, 3, 5))      # returns (2, 3, 5)
    "some_img.jpg" | toImg()    | shape()  # returns (width, height) for a particular image
    some_pandas_data_frame      | shape()  # returns dataframe's (#rows, #columns)

:class:`shape` is an alias of this cli. Use whichever is more intuitive for you.

:param idx: if not specified, returns a tuple of ints. If specified,
    then returns the specific index of the tuple"""
        super().__init__(); self.idx = idx;
        if idx is not None: self._f = cli.item(idx)
    def _all_array_opt(self, it, level):
        res = np.array(it.shape[level:])[tuple([None]*level)] + np.zeros(it.shape[:level], dtype=int)[(*[slice(None)]*level, None)]
        return res if self.idx is None else res | cli.rItem(self.idx).all(level)
    def _typehint(self, inp):
        if self.idx is not None: return int
        return tList(int)
    def __ror__(self, it:Iterator[str]):
        idx = self.idx
        if idx == 0: # super quick path for the really common case
            try: return len(it)
            except:
                try: return exploreSize(it)[1]
                except: pass
        if hasPIL and isinstance(it, PIL.Image.Image): return it.size if idx is None else it.size[idx]
        if hasPandas and isinstance(it, pd.core.frame.DataFrame): s = (len(it), it.size//len(it)); return s if idx is None else s[idx]
        if hasattr(it, "_shape"): return it._shape(self.idx)
        if idx is None:
            answer = []
            try:
                while True:
                    if isinstance(it, settings.arrayTypes):
                        return tuple(answer + list(it.shape))
                    it, s = exploreSize(it); answer.append(s)
            except TypeError: pass
            return tuple(answer)
        return exploreSize(it | self._f)[1]
    def _jsF(self, meta):
        fIdx = init._jsFAuto(); dataIdx = init._jsDAuto()
        post = "" if self.idx is None else f"[{cli.kjs.v(self.idx)}]"
        return f"{fIdx} = ({dataIdx}) => {dataIdx}.shape(){post}", fIdx
shape = size

In [6]:
assert [[2, 3], [4, 5, 6], [3]] | tCheck() | size() == (3, 2)
assert [[2, 3], [4, 5, 6], [3]] | tCheck() | size(0) == 3
assert [[2, 3], [4, 5, 6], [3]] | tCheck() | size(1) == 2
assert [[], [2, 3]] | tCheck() | size() == (2, 0)
assert [2, 3, 5] | tCheck() | size() == (3,)
assert [2, 3, 5] | tCheck() | size(0) == 3
assert torch.randn(3, 4) | tCheck() | size() == (3, 4)
assert [[[2, 1], [0, 6, 7]], 3, 5] | tCheck() | size() == (3, 2, 2)
assert ["abc"] | tCheck() | size() == (1, 3)
assert [torch.randn(2, 3)] | tCheck() | size() == (1, 2, 3)
assert size()(np.random.randn(2, 3, 5)) | tCheck() == (2, 3, 5)
assert np.random.randn(3, 4, 5, 6) | shape().all(2) | cli.op().shape == (3, 4, 2)
df1 = pd.DataFrame({"A": [1.0,2,3,4], "B": pd.Timestamp("20130102"), "C": pd.Series(1, index=list(range(4)), dtype="float32"), "D": np.array([3] * 4, dtype="int32"), "E": pd.Categorical(["test", "train", "test", "train"]), "F": "foo",})
assert df1 | shape() == (4, 6)

In [15]:
#export
class resize(BaseCli):
    def __init__(self, width=0, height=0, max=0):
        """Resizes the image coming in to a new value.
Example::

    img = "path/some_img.jpg" | toImg() # loads image up
    img | shape()                       # returns (400, 600) in this example, meaning width is 400, height is 600
    img | resize(200)                   # resizes image to (200, 300), keeping aspect ratio
    img | resize(height=300)            # resizes image to (200, 300), keeping aspect ratio)
    img | resize(200, 200)              # resizes image to (200, 200), disregarding aspect ratio
    img | resize(max=200)               # resizes image so that the biggest length is 200
"""
        self.width = width; self.height = height; self.max = max
    def __ror__(self, it):
        width = self.width; height = self.height; max = self.max
        if hasPIL and isinstance(it, PIL.Image.Image):
            rWidth = it.size[0]; rHeight = it.size[1]; ratio = 1 # real width & height
            if width > 0 and height > 0:
                if (max < width or max < height) and max > 0: raise Exception(f"max value ({max}) lower than width or height ")
                try: return it.resize((width, height), resample=PIL.Image.Resampling.LANCZOS)
                except: return it.resize((width, height))
            if width > 0 and height == 0: ratio = width / rWidth
            if height > 0 and width == 0: ratio = height / rHeight
            if max > 0: ratio = min(ratio, max/rWidth, max/rHeight)
            try: return it.resize((int(rWidth*ratio), int(rHeight*ratio)), resample=PIL.Image.Resampling.LANCZOS)
            except: return it.resize((int(rWidth*ratio), int(rHeight*ratio)))
        raise Exception(f"Doesn't know how to resize object of type {type(it)}")

In [None]:
assert ["abc", "def"] | cli.toImg() | resize(50) | shape(0) == 50
assert ["abc", "def"] | cli.toImg() | resize(height=50) | shape(1) == 50
assert ["abc", "def"] | cli.toImg() | resize(max=30) | shape(1) == 30
assert ["abc", "def"] | cli.toImg() | resize(50, 50) | shape() == (50, 50)

In [6]:
#export
noFill = object()
class item(BaseCli):
    def __init__(self, amt:int=1, fill=noFill):
        """Returns the first element of the input iterator.
Example::

    # returns 0
    range(5) | item()
    # returns torch.Size([5])
    torch.randn(3,4,5) | item(2) | shape()
    # returns 3
    [] | item(fill=3)

:param amt: how many times do you want to call item() back to back?
:param fill: if iterator length is 0, return this"""
        self.amt = amt; self.fill = fill
        self.fillP = [fill] if fill != noFill else [] # preprocessed, to be faster
        if self.amt != 1: self._f = cli.serial(*(item(fill=self.fill) for _ in range(self.amt)))
    def _all_array_opt(self, it, level): return it[(*[slice(None, None, None) for i in range(level)], 0)]
    def _typehint(self, inp):
        if isinstance(inp, tListIterSet): return inp.child
        if isinstance(inp, tCollection): return inp.children[0]
        if isinstance(inp, tArrayTypes):
            if inp.rank is None: return inp.__class__(inp.child, None)
            if inp.rank - self.amt >= 1: return inp.__class__(inp.child, inp.rank-self.amt)
            return inp.child
        return tAny()
    def __ror__(self, it:Iterator[str]):
        if self.amt != 1: return it | self._f
        if isinstance(it, settings.arrayTypes): return it[0]
        if hasPandas and isinstance(it, pd.DataFrame): return it[:1].to_numpy()[0]
        return next(iter(init.dfGuard(it)), *self.fillP)
    def _jsF(self, meta):
        fIdx = init._jsFAuto(); dataIdx = init._jsDAuto(); _slice = "".join(["[0]"]*self.amt)
        return f"{fIdx} = ({dataIdx}) => {dataIdx}{_slice}", fIdx
    def _pyF(self, expr, **kw): return "", "", f"{expr}[0]", {}

In [8]:
assert iter(range(5)) | tCheck() | item() == 0
assert torch.randn(3,4,5) | tCheck() | item(2) | shape() == torch.Size([5])
assert [] | item(fill=3) == 3
assert "test/crew dragon.jpg" | cli.toImg() | shape() == (1280, 853)

In [13]:
a = cli.toPyFunc() | item(); assert range(10) | a == 0; print([a.program])

['\n\ndef _pyF_350_11(_pyF_350_10):return _pyF_350_10[0]']


In [9]:
#export
class rItem(BaseCli):
    def __init__(self, idx:int):
        """Combines ``rows(idx) | item()``, as this is a pretty common pattern.
Example::

    iter(range(10)) | rItem(4) # returns 4
"""
        self.idx = idx; self.arrayTypes = (*settings.arrayTypes, list, tuple)
    def _all_array_opt(self, it, level:int): return it[(*[slice(None, None, None) for i in range(level)], self.idx)]
    def __ror__(self, it):
        idx = self.idx
        if isinstance(it, self.arrayTypes): return it[idx]
        if hasPandas and isinstance(it, pd.DataFrame): return it[idx:idx+1].to_numpy()[0]
        for i, e in zip(range(self.idx+1), it): pass
        return e
    def _jsF(self, meta):
        fIdx = init._jsFAuto(); dataIdx = init._jsDAuto()
        return f"{fIdx} = ({dataIdx}) => {dataIdx}[{cli.kjs.v(self.idx)}]", fIdx
    def _pyF(self, expr, **kw): return "", "", f"{expr}[{self.idx}]", {}

In [10]:
assert iter(range(10)) | rItem(0) == 0
assert iter(range(10)) | rItem(4) == 4
assert iter(range(10)) | rItem(9) == 9
assert np.random.randn(3, 4, 5, 6) | rItem(1) | cli.op().shape == (4, 5, 6)
assert np.random.randn(3, 4, 5, 6) | rItem(1).all() | cli.op().shape == (3, 5, 6)
assert isinstance(df1 | item(), np.ndarray)
assert isinstance(df1 | rItem(1), np.ndarray)

In [14]:
#export
class iden(BaseCli):
    def __init__(self):
        """Yields whatever the input is. Useful for multiple streams.
Example::

    # returns range(5)
    range(5) | iden()"""
        super().__init__()
    def _all_array_opt(self, it, level): return it
    def _typehint(self, inp): return inp
    def __ror__(self, it:Iterator[Any]): return it
    def _jsF(self, meta):
        fIdx = init._jsFAuto(); dataIdx = init._jsDAuto()
        return f"{fIdx} = ({dataIdx}) => {dataIdx}", fIdx
    def _pyF(self, expr, **kw): return "", "", expr, {}

In [18]:
assert range(5) | tCheck() | iden() == range(5)
a = cli.toPyFunc() | iden(); 3 | a == 3; print([a.program])

['\n\ndef _pyF_350_17(_pyF_350_16):return _pyF_350_16']


In [19]:
#export
class join(BaseCli):
    def __init__(self, delim:str=None):
        r"""Merges all strings into 1, with `delim` in the middle. Basically
:meth:`str.join`. Example::

    # returns '2\na'
    [2, "a"] | join("\n")"""
        super().__init__(); self.delim = patchDefaultDelim(delim)
    def _typehint(self, inp): return str
    def __ror__(self, it:Iterator[str]): return self.delim.join(init.dfGuard(it) | cli.apply(str))
    def _jsF(self, meta): fIdx = init._jsFAuto(); dataIdx = init._jsDAuto(); return f"{fIdx} = ({dataIdx}) => {dataIdx}.join({json.dumps(self.delim)})", fIdx
    def _pyF(self, expr, **kw): delimN = init._pyFAuto(); xN = init._pyFAuto(); return "", "", f"{delimN}.join([str({xN}) for {xN} in {expr}])", {delimN: self.delim}

In [21]:
assert [2, "a"] | tCheck() | join("\n") == '2\na'
assert [2, "a"] | (cli.toPyFunc() | join("\n")) == '2\na'

In [22]:
#export
class wrapList(BaseCli):
    def __init__(self):
        """Wraps inputs inside a list. There's a more advanced cli tool
built from this, which is :meth:`~k1lib.cli.structural.unsqueeze`. Example::

    # returns [5]
    5 | wrapList()"""
        super().__init__()
    def _all_array_opt(self, it, level): return it[(*[slice(None)]*level, None)]
    def _typehint(self, inp): return tList(inp)
    def __ror__(self, it) -> List[Any]:
        if isinstance(it, settings.arrayTypes): return it[None]
        return [it]
    def _jsF(self, meta): fIdx = init._jsFAuto(); dataIdx = init._jsDAuto(); return f"{fIdx} = ({dataIdx}) => [{dataIdx}]", fIdx
    def _pyF(self, expr, **kw): return "", "", f"[{expr}]", {}

In [26]:
assert np.random.randn(3, 4, 5) | wrapList().all() | cli.op().shape == (3, 1, 4, 5)
assert np.random.randn(3, 4, 5) | wrapList() | cli.op().shape == (1, 3, 4, 5)
assert [2, 3, 1] | (cli.toPyFunc() | wrapList()) == [[2, 3, 1]]

In [17]:
#export
class _EarlyExp(Exception): pass
class equals:
    def __init__(self):
        """Checks if all incoming columns/streams are identical"""
        super().__init__()
    def __ror__(self, streams:Iterator[Iterator[str]]):
        streams = list(streams)
        for row in zip(*streams):
            sampleElem = row[0]
            try:
                for elem in row:
                    if sampleElem != elem: yield False; raise _EarlyExp()
                yield True
            except _EarlyExp: pass

In [18]:
range(10)[slice(None, None, -1)] | cli.deref()

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

In [32]:
#export
class reverse(BaseCli):
    def __init__(self):
        """Reverses incoming list.
Example::

    # returns [3, 5, 2]
    [2, 5, 3] | reverse() | deref()"""
        super().__init__()
    def _all_array_opt(self, it, level): return it[(*[slice(None)]*level, slice(None, None, -1))]
    def _typehint(self, inp):
        if isinstance(inp, tListIterSet): return tIter(inp.child)
        return tAny()
    def __ror__(self, it:Iterator[str]) -> List[str]:
        if isinstance(it, settings.arrayTypes): return it[::-1]
        if hasPandas and isinstance(it, pd.core.arraylike.OpsMixin): return it[::-1]
        return reversed(list(it))
    def _jsF(self, meta): fIdx = init._jsFAuto(); dataIdx = init._jsDAuto(); return f"{fIdx} = ({dataIdx}) => [...{dataIdx}].reverse()", fIdx
    def _pyF(self, expr, **kw): return "", "", f"list(reversed({expr}))", {}

In [20]:
assert [2, 5, 3] | tCheck() | reverse() | cli.deref() == [3, 5, 2]
assert [2, 5, 3] | (cli.toPyFunc() | reverse()) == [3, 5, 2]
assert np.random.randn(3, 4, 5) | reverse().all() | cli.op().shape == (3, 4, 5)
assert isinstance(df1 | reverse(), pd.DataFrame)

In [36]:
#export
class ignore(BaseCli):
    def __init__(self):
        r"""Just loops through everything, ignoring the output.
Example::

    # will just return an iterator, and not print anything
    [2, 3] | apply(lambda x: print(x))
    # will prints "2\n3"
    [2, 3] | apply(lambda x: print(x)) | ignore()"""
        super().__init__()
    def _all_array_opt(self, it, level): return it
    def _typehint(self, inp): return type(None)
    def __ror__(self, it:Iterator[Any]):
        if isinstance(it, settings.arrayTypes): return
        if hasPandas and isinstance(it, pd.core.arraylike.OpsMixin): return
        for _ in it: pass
    def _jsF(self, meta): fIdx = init._jsFAuto(); dataIdx = init._jsDAuto(); return f"{fIdx} = ({dataIdx}) => {dataIdx}", fIdx
    def _pyF(self, expr, **kw): return "", "", f"[None for x in {expr}] and None", {}

In [41]:
with k1lib.captureStdout() as out:
    [2, 3] | cli.apply(lambda x: print(x))
assert len(out()) == 1
with k1lib.captureStdout() as out:
    [2, 3] | tCheck() | cli.apply(lambda x: print(x)) | ignore()
assert out() == ["2", "3", '']
assert [2, 3] | (cli.toPyFunc() | ignore()) == None

In [23]:
#export
class rateLimit(BaseCli):
    def __init__(self, f, delay=0.1):
        """Limits the execution flow rate upon a condition.
Example::

    s = 0; semaphore = 0
    def heavyAsyncOperation(i):
        global semaphore, s
        semaphore += 1
        s += i; time.sleep(1)
        semaphore -= 1; return i**2

    # returns (20,), takes 1s to run
    range(20) | applyTh(heavyAsyncOperation, 100) | shape()
    # returns (20,), takes 4s to run (20/5 = 4)
    range(20) | rateLimit(lambda: semaphore < 5) | applyTh(heavyAsyncOperation, 100) | shape()

The first test case is not rate-limited, so it will run all 20 threads at the
same time, and all of them will finish after 1 second.

The second test case is rate-limited, so that there can only be 5 concurrently
executing threads because of the semaphore count check. Therefore this takes
around 4 seconds to run.

:param f: checking function. Should return true if execution is allowed
:param delay: delay in seconds between calling ``f()``"""
        self.f = f; self.delay = delay
    def _typehint(self, inp):
        if isinstance(inp, tListIterSet): return tIter(inp.child)
        if isinstance(inp, tArrayTypes):
            if inp.rank is None: return tIter(inp)
            if inp.rank >= 2: return tIter(inp.__class__(inp.child, inp.rank - 1))
            return tIter(inp.child)
        if isinstance(inp, tCollection): return inp
        return tAny()
    def __ror__(self, it):
        f = self.f; delay = self.delay
        for e in init.dfGuard(it):
            while not f(): time.sleep(delay)
            yield e
    @staticmethod
    def cpu(maxUtilization=90):
        """Limits flow rate when cpu utilization is more than a specified
percentage amount. Needs to install the package ``psutil`` to actually work.
Example::

    # returns [0, 1, 4, 9, 16]
    range(5) | rateLimit.cpu() | apply(op()**2) | deref()"""
        import psutil
        return rateLimit(lambda: psutil.cpu_percent() < maxUtilization)

In [24]:
s = 0; semaphore = 0
def heavyAsyncOperation(i):
    global semaphore, s
    semaphore += 1
    s += i; time.sleep(1)
    semaphore -= 1; return i**2
with k1lib.timer() as t:
    assert range(20) | rateLimit(lambda: semaphore < 5) | cli.applyTh(heavyAsyncOperation, 100) | cli.shape() == (20,)
assert 3.5 < t() < 4.5
with k1lib.timer() as t:
    assert range(20) | cli.applyTh(heavyAsyncOperation, 100) | cli.shape() == (20,)
assert 0.5 < t() < 1.5
assert range(5) | rateLimit.cpu() | cli.apply(cli.op()**2) | cli.deref() == [0, 1, 4, 9, 16]

In [25]:
#export
class timeLimit(BaseCli):
    def __init__(self, t):
        """Caps the flow after a specified amount of time has
passed. Example::

    # returns 20, or roughly close to that
    repeatF(lambda: time.sleep(0.1)) | timeLimit(2) | shape(0)"""
        self.t = t
    def _typehint(self, inp):
        if isinstance(inp, tListIterSet): return tIter(inp.child)
        if isinstance(inp, tArrayTypes):
            if inp.rank is None: return tIter(inp)
            if inp.rank >= 2: return tIter(inp.__class__(inp.child, inp.rank - 1))
            return tIter(inp.child)
        if isinstance(inp, tCollection): return inp
        return tAny()
    def __ror__(self, it):
        _time = time.time; endTime = _time() + self.t
        for e in init.dfGuard(it):
            yield e
            if _time() > endTime: break

In [26]:
assert cli.repeatF(lambda: time.sleep(0.1)) | timeLimit(2) | shape(0) | cli.op() - 20 | cli.aS(abs) | cli.op() < 2

In [49]:
#export
class tab(BaseCli):
    def __init__(self, pad:str=" "*4):
        """Indents incoming string iterator.
Example::

    # prints out indented 0 to 9
    range(10) | tab() | headOut()"""
        self.pad = pad
    def __ror__(self, it):
        pad = self.pad
        for x in it: yield f"{pad}{x}"
    def _pyF(self, expr, **kw):
        return "", "", f"('{self.pad}' + str(x) for x in {expr})", {}
indent = tab

In [52]:
assert range(2) | tab() | cli.join("|") == '    0|    1'
assert range(2) | (cli.toPyFunc() | tab() | cli.join("|")) == '    0|    1'

In [29]:
#export
class clipboard(BaseCli):
    def __init__(self):
        """Saves the input to clipboard.
Example::

    # copies "abc" into the clipboard. Just use Ctrl+V to paste as usual
    "abc" | clipboard()"""
        import pyperclip; self.pyperclip = pyperclip
    def _typehint(self, inp): return type(None)
    def __ror__(self, s): self.pyperclip.copy(s)

In [30]:
#assert "def2r" | tCheck() | clipboard() == None

In [31]:
#export
a = [numbers.Number, np.number, str, bool, bytes, k1lib.UValue, cli.conv.Audio]
if hasTorch: a.append(torch.nn.Module)
if hasRos1: a.append(rosbag.bag.BagMessage)
if hasPandas: a.append(pd.core.arraylike.OpsMixin)
settings.atomic.add("deref", tuple(a), "used by deref")
Tensor = torch.Tensor; atomic = settings.atomic
class inv_dereference(BaseCli):
    def __init__(self, igT=False):
        """Kinda the inverse to :class:`dereference`"""
        super().__init__(); self.igT = igT
    def __ror__(self, it:Iterator[Any]) -> List[Any]:
        for e in it:
            if e is None or isinstance(e, atomic.deref): yield e
            elif isinstance(e, settings.arrayTypes):
                if not self.igT and len(e.shape) == 0: yield e.item()
                else: yield e
            else:
                try: yield e | self
                except: yield e
_rosmsg_tempfile = [None]; _rosmsg_autoInc = k1lib.AutoIncrement()
def rosmsg2BagMessage(msg): # kinda abandoned. Turns out you can't pickle a BagMessage cleanly afterall. I kinda have to do it the long way. If you want to be able to serialize a message, just do `obj | deref()`, it will wrap around using RosMsg(), which is serializable
    if _rosmsg_tempfile[0] is None: _rosmsg_tempfile[0] = b"" | cli.file()
    fn = f"{_rosmsg_tempfile[0]}_{os.getpid()}_{_rosmsg_autoInc()}"
    with rosbag.Bag(fn, "w") as bag: bag.write("/default", msg)
    res = rosbag.Bag(fn, "r").read_messages() | cli.item()
    os.remove(fn); return res
_rosmsg_tempfile2 = [None]; _rosmsg_autoInc2 = k1lib.AutoIncrement()
def _rosmsg_getFn2():
    if _rosmsg_tempfile2[0] is None: _rosmsg_tempfile2[0] = b"" | cli.file(); os.remove(_rosmsg_tempfile2[0])
    return f"{_rosmsg_tempfile2[0]}_{os.getpid()}_{_rosmsg_autoInc2()}"
class RosMsg:
    def __init__(self, msg): self._ab_sentinel = True; self.__msg = msg; self._ab_sentinel = False
    def __getattr__(self, attr):
        if attr == "__msg": return self.__msg
        return getattr(self.__msg, attr)
    def __getstate__(self):
        fn = _rosmsg_getFn2()
        with rosbag.Bag(fn, "w") as bag: bag.write("/default", self.__msg)
        with open(fn, "rb") as f: raw = f.read()
        os.remove(fn); return {"raw": raw}
    def __setstate__(self, d):
        fn = _rosmsg_getFn2()
        with open(fn, "wb") as f: f.write(d["raw"])
        with rosbag.Bag(fn) as bag: self.__msg = next(bag.read_messages()).message
        os.remove(fn)
    def __repr__(self): return self.__msg.__repr__()
_rosMsgArrayTypes = k1lib.settings.cli.arrayTypes
class RosMsgPlaceholder:
    def __init__(self, idx): self.idx = idx
def _rosmsg_complex_deref_replace(it, autoInc, msgs):
    if isinstance(it, np.number): return it.item()
    elif isinstance(it, k1lib.settings.cli.atomic.deref): return it
    elif isinstance(it, _rosMsgArrayTypes): return it
    elif isinstance(it, dict):  _d = {k:   _rosmsg_complex_deref_replace(v, autoInc, msgs) for k, v in it.items()}; return _d
    elif isinstance(it, tuple): _t = tuple(_rosmsg_complex_deref_replace(k, autoInc, msgs) for k    in it);         return _t
    elif isinstance(it, set):   _s = set  (_rosmsg_complex_deref_replace(k, autoInc, msgs) for k    in it);         return _s
    elif isinstance(it, genpy.message.Message): idx = autoInc(); msgs[idx] = it; return RosMsgPlaceholder(idx)
    elif isinstance(it, RosMsg): idx = autoInc(); msgs[idx] = it.__msg; return RosMsgPlaceholder(idx)
    try: iter(it)
    except: return it
    answer = []
    for e in it:
        if e is cli.yieldT: return answer
        answer.append(_rosmsg_complex_deref_replace(e, autoInc, msgs))
    return answer
def _rosmsg_complex_deref_reconstruct(it, msgs):
    if isinstance(it, np.number): return it.item()
    elif isinstance(it, k1lib.settings.cli.atomic.deref): return it
    elif isinstance(it, _rosMsgArrayTypes): return it
    elif isinstance(it, dict):  _d = {k:   _rosmsg_complex_deref_reconstruct(v, msgs) for k, v in it.items()}; return _d
    elif isinstance(it, tuple): _t = tuple(_rosmsg_complex_deref_reconstruct(k, msgs) for k    in it);         return _t
    elif isinstance(it, set):   _s = set  (_rosmsg_complex_deref_reconstruct(k, msgs) for k    in it);         return _s
    elif isinstance(it, RosMsgPlaceholder): return msgs[it.idx]
    try: iter(it)
    except: return it
    answer = []
    for e in it:
        if e is cli.yieldT: return answer
        answer.append(_rosmsg_complex_deref_reconstruct(e, msgs))
    return answer
class RosMsgComplex:
    def __init__(self, data):
        """An attempt to speed up serialization of ROS messages.
Normally, I'd do this::

    [msg1, msg2, ...] | deref() | aS(dill.dumps) | file("...")

But this is a little inefficient as the process of writing to and reading from a temp bag file
is not that fast. So this kinda bunches up all messages, write them into a single bag file, and
have clever mechanism to reconstruct the structure.

Turns out lots of messages can bog down the system. This does reduce load time by 2 times and disk
size by 3 times. So it's effective, but just not wildly effective. This is not exposed automatically
on the docs cause I don't feel like it's fast enough to justify that, but I couldn't just delete this."""
        self.data = data
    def __getstate__(self):
        fn = _rosmsg_getFn2()
        with rosbag.Bag(fn, "w") as bag:
            msgs = {}; struct = _rosmsg_complex_deref_replace(self.data, k1lib.AutoIncrement(prefix="/_rosmsg_"), msgs)
            for k, v in msgs.items(): bag.write(k, v)
        with open(fn, "rb") as f: raw = f.read()
        res = {"struct": dill.dumps(struct), "raw": raw}; os.remove(fn); return res
    def __setstate__(self, d):
        fn = _rosmsg_getFn2()
        with open(fn, "wb") as f: f.write(d["raw"])
        msgs = {x.topic:x for x in rosbag.Bag(fn).read_messages()}
        self.data = _rosmsg_complex_deref_reconstruct(d["struct"], msgs); os.remove(fn)

In [32]:
#export
class deref(BaseCli):
    def __init__(self, maxDepth=float("inf"), igT=True):
        """Recursively converts any iterator into a list.
Example::

    
    iter(range(5))              # returns something like "<range_iterator at 0x7fa8c52ca870>"
    iter(range(5)) | deref()    # returns [0, 1, 2, 3, 4]
    [2, 3, yieldT, 6] | deref() # returns [2, 3], yieldT stops things early

You can also specify a ``maxDepth``::

    iter([range(3)]) | deref(0) # returns something like "<list_iterator at 0x7f810cf0fdc0>"
    iter([range(3)]) | deref(1) # returns [range(3)]
    iter([range(3)]) | deref(2) # returns [[0, 1, 2]]

There are a few classes/types that are considered atomic, and :class:`deref`
will never try to iterate over it. If you wish to change it, do something like::

    settings.cli.atomic.deref = (int, float, ...)

:param maxDepth: maximum depth to dereference. Starts at 0 for not doing anything
    at all
:param igT: short for "ignore tensor". If True, then don't loop over :class:`torch.Tensor`
    and :class:`numpy.ndarray` internals"""
        super().__init__(); self.igT = igT
        self.maxDepth = maxDepth; self.depth = 0
        if hasTorch: self.arrayType = (torch.Tensor, np.ndarray) if k1lib.settings.startup.or_patch.numpy else torch.Tensor
        else: self.arrayType = (np.ndarray,) if k1lib.settings.startup.or_patch.numpy else ()
    def _typehint(self, inp, depth=float("inf")):
        if depth == 0: return inp
        if depth == float("inf"): depth = self.maxDepth
        if isinstance(inp, type) and issubclass(inp, atomic.deref): return inp
        if isinstance(inp, tArrayTypes):
            if self.igT: return inp
            if inp.rank is None: return tList(tAny())
            if inp.rank == 1:
                if isinstance(inp, tTensor):
                    return tList(type(torch.tensor(3, dtype=inp.child).item()))
                if isinstance(inp, tNpArray):
                    return tList(type(np.array(3, dtype=inp.child).item()))
            return tList(self._typehint(inp.item(), depth-1))
        if isinstance(inp, tListIterSet):
            return tList(self._typehint(inp.child, depth-1))
        if isinstance(inp, tCollection):
            return tCollection(*(self._typehint(e, depth-1) for e in inp.children))
        return tAny()
    def __ror__(self, it:Iterator[Any]) -> List[Any]:
        if self.depth >= self.maxDepth: return it
        elif isinstance(it, np.number): return it.item()
        elif isinstance(it, atomic.deref): return it
        elif isinstance(it, self.arrayType):
            if self.igT: return it
            if len(it.shape) == 0: return it.item()
        elif isinstance(it, dict):  self.depth += 1; _d = {k:   self.__ror__(v) for k, v in it.items()}; self.depth -= 1; return _d
        elif isinstance(it, tuple): self.depth += 1; _t = tuple(self.__ror__(k) for k    in it);         self.depth -= 1; return _t
        elif isinstance(it, set):   self.depth += 1; _s = set  (self.__ror__(k) for k    in it);         self.depth -= 1; return _s
        elif hasRos1 and isinstance(it, genpy.message.Message): return RosMsg(it) # return rosmsg2BagMessage(it)
        try: iter(it)
        except: return it
        self.depth += 1; answer = []
        for e in it:
            if e is cli.yieldT: return answer
            answer.append(self.__ror__(e))
        self.depth -= 1; return answer
    def __invert__(self) -> BaseCli:
        """Returns a :class:`~k1lib.cli.init.BaseCli` that makes
everything an iterator. Not entirely sure when this comes in handy, but it's
there."""
        return inv_dereference(self.igT)
    def _jsF(self, meta):
        fIdx = init._jsFAuto(); dataIdx = init._jsDAuto()
        return f"{fIdx} = ({dataIdx}) => {dataIdx}", fIdx
    @staticmethod
    def js():
        """Deref incoming object and turn them into a js object (NOT json string!).
Example::

    # returns "[...Array(10).keys()]"
    range(10) | deref.json()

How does it know to transpile it? Based on the dictionary at `settings.cli.kjs.jsonF`
and the object's "._jsonF" function. Say you have a custom list object, you can do
something like this::

    class CustomList:
        def __init__(self): ...
        def _jsonF(self): return "your js string here"

Or, you can do something like this::

    class CustomList: ...
    settings.cli.kjs.jsonF[CustomList] = lambda obj: "your js string here"

A variety of data types are included out of the box already for common types,
view the source code of this method to check them out."""
        jsonF = settings.kjs.jsonF
        jsonF[list] = lambda x: "[" + ", ".join([deref_js(e) for e in x]) + "]"
        jsonF[str] = lambda x: json.dumps(x)
        jsonF[tuple] = jsonF[list]; jsonF[set] = lambda x: "new Set(" + jsonF[list](x) + ")"
        jsonF[type(None)] = lambda x: "null"
        jsonF[np.ndarray] = lambda x: json.dumps(x | deref(igT=False))
        if hasTorch: jsonF[torch.Tensor] = lambda x: json.dumps(x | deref(igT=False))
        jsonF[type(iter(range(10)))] = lambda x: "[" + ", ".join([str(e) for e in x]) + "]"
        jsonF[type((x for x in range(0)))] = jsonF[list]
        jsonF[type({}.keys())] = jsonF[list]; jsonF[type({}.values())] = jsonF[list]
        jsonF[dict] = lambda x: "{" + ", ".join([f"{json.dumps(k)}: {deref_js(v)}" for k,v in x.items()]) + "}"
        jsonF[defaultdict] = jsonF[dict]
        deref.js = lambda: cli.aS(deref_js); return deref.js() # initializes at runtime, then patches deref.json() to get a faster path!

In [33]:
import numpy as np, numbers, torch
a = torch.linspace(0, 10, 50) | deref(igT=False)
b = torch.from_numpy(np.linspace(0, 10, 50)) | deref(igT=False)
assert torch.allclose(torch.tensor(b), torch.tensor(a))
assert iter([range(3)]) | tCheck() | deref(1) == [range(3)]
assert iter([range(3)]) | tCheck() | deref(2) == [[0, 1, 2]]
assert torch.randn(2, 3) | tCheck() | deref(igT=False) | ~deref() | deref() | shape() == (2, 3)
assert [2, 3, cli.yieldT, 6] | deref() == [2, 3]
with k1lib.settings.cli.atomic.context(deref=(int, float, ...)): assert k1lib.settings.cli.atomic.deref == (int, float, ...)
assert np.array([[1, 2, 3], [4, 5, 6]]) | tCheck() | deref(igT=False) == [[1, 2, 3], [4, 5, 6]]
assert {"a": range(3)} | tCheck() | deref() == {'a': [0, 1, 2]}
assert deref(1, igT=False)._typehint(tTensor(float, 2)) == tList(tTensor(float, 1))
assert deref(1, igT=False)._typehint(tTensor(float, 2)) != tList(tTensor(float, 2))
assert deref(igT=False)._typehint(tTensor(float, 2)) == tList(tList(float))
assert deref()._typehint(tList(tList(int))) == tList(tList(int))

In [34]:
#export
def deref_js(obj):
    # only 2 special cases, perf considerations, everything else is pluggable
    if isinstance(obj, bool): return "true" if obj else "false"
    if isinstance(obj, (numbers.Number, np.number)): return str(obj)
    fn = settings.kjs.jsonF.get(type(obj), None)
    if fn: return fn(obj)
    if hasattr(obj, "_jsonF"): return obj._jsonF()
    raise Exception(f"Don't know how to transcribe object with class {type(obj)}. Either add the serialization function to `settings.cli.kjs.jsonF`, or implement the function `._jsonF()` to your custom class")

In [35]:
def g(): yield 4; yield 2
assert [range(10), 4, set(range(10)), None, np.linspace(2, 10, 5), iter(range(5)), g(), "abc", '"abc"', True, {"a": 3, "b": [3, 4]}] | deref.js() == '[[...Array(10).keys()], 4, new Set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), null, [2.0, 4.0, 6.0, 8.0, 10.0], [0, 1, 2, 3, 4], [4, 2], "abc", "\\"abc\\"", true, {"a": 3, "b": [3, 4]}]'

In [36]:
#export
class bindec(BaseCli):
    def __init__(self, cats:List[Any], f=None):
        """Binary decodes the input.
Example::

    # returns ['a', 'c']
    5 | bindec("abcdef")
    # returns 'a,c'
    5 | bindec("abcdef", join(","))

:param cats: categories
:param f: transformation function of the selected elements. Defaulted to :class:`~k1lib.cli.conv.toList`, but others like :class:`join` is useful too"""
        self.cats = cats; self.f = f or cli.toList()
    def __ror__(self, it):
        it = bin(int(it))[2:][::-1]
        return (e for i, e in zip(it, self.cats) if i == '1') | self.f

In [37]:
assert 5 | bindec("abcdef") == ['a', 'c']
assert 5 | bindec("abcdef", join(",")) == "a,c"

In [38]:
#export
settings.add("smooth", 10, "default smooth amount, used in utils.smooth")
class smooth(BaseCli):
    def __init__(self, consecutives=None, windowing=False):
        """Smoothes out the input stream.
Literally just a shortcut for::

    batched(consecutives) | toMean().all()

Example::

    # returns [4.5, 14.5, 24.5]
    range(30) | smooth(10) | deref()

Smoothing over :class:`torch.Tensor` or :class:`numpy.ndarray` will
be much faster::

    # returns torch.Tensor with shape (2)
    torch.randn(10, 3, 4) | smooth(4)

The default consecutive value is in ``settings.cli.smooth``. This
is useful if you are smoothing over multiple lists at the same
time, like this::

    # can change a single smooth value temporarily here, and all sequences will be smoothed in the same way
    with settings.cli.context(smooth=5):
        x = list(np.linspace(-2, 2, 50))
        y = x | apply(op()**2) | deref()
        plt.plot(x | smooth() | deref(), y | smooth() | deref())

:param consecutives: if not defined, then used the value inside ``settings.cli.smooth``"""
        n = consecutives or settings.smooth; self.b = cli.window(n) if windowing else cli.batched(n)
        self.consecutives = consecutives; self.windowing = windowing
    def _all_array_opt(self, it, level): return it | (self.b | cli.toMean().all()).all(level)
    def __ror__(self, it): return init.dfGuard(it) | self.b | cli.toMean().all()
    def _jsF(self, meta):
        if self.windowing: raise Exception(f"._jsF() does not support windowing in smooth() yet")
        fIdx = init._jsFAuto(); dataIdx = init._jsDAuto()
        return f"{fIdx} = ({dataIdx}) => {dataIdx}.smooth({cli.kjs.v(self.consecutives)})", fIdx

In [39]:
assert range(30) | smooth(10) | deref() == [4.5, 14.5, 24.5]
assert range(30) | smooth() | deref() == [4.5, 14.5, 24.5]
with settings.context(smooth=5):
    x = list(np.linspace(-2, 2, 50))
    y = x | cli.apply(cli.op()**2) | deref()
    assert x | smooth() | shape(0) == 10
    assert y | smooth() | shape(0) == 10
assert torch.randn(10, 3, 4) | smooth(4) | shape() | cli.op() == (2,)
assert np.random.randn(10,4,3) | smooth(2, True).all() | cli.aS(type) == np.ndarray

In [40]:
#export
def _f(): pass
_code = type(_f.__code__)
def disassemble(f=None):
    """Disassembles anything piped into it.
Normal usage::

    def f(a, b):
        return a**2 + b
    # both of these print out disassembled info
    f | disassemble()
    disassemble(f)
    
    # you can pass in lambdas
    disassemble(lambda x: x + 3)
    
    # or even raw code
    "lambda x: x + 3" | disassemble()"""
    c = f
    if c is None: return cli.aS(disassemble)
    if isinstance(c, str): c = compile(c, "", "exec")
    try: c = c.__code__
    except: pass
    if not isinstance(c, _code): raise RuntimeError(f"`{c}` is not a code object/function/class method/string code")
    print(f"co_argcount: {c.co_argcount}")
    print(f"co_cellvars: {c.co_cellvars}")
    print(f"co_consts: {c.co_consts}")
    print(f"co_filename: {c.co_filename}")
    print(f"co_firstlineno: {c.co_firstlineno}")
    print(f"co_flags: {c.co_flags}")
    print(f"co_freevars: {c.co_freevars}")
    print(f"co_kwonlyargcount: {c.co_kwonlyargcount}")
    print(f"co_lnotab: {c.co_lnotab | cli.apply(str) | join(' ')}")
    print(f"co_name: {c.co_name}")
    print(f"co_names: {c.co_names}")
    print(f"co_nlocals: {c.co_nlocals}")
    print(f"co_posonlyargcount: {c.co_posonlyargcount}")
    print(f"co_stacksize: {c.co_stacksize}")
    print(f"co_varnames: {c.co_varnames}")
    print(f"Disassembly:"); dis.disassemble(c)
    with k1lib.captureStdout() as out:
        c.co_consts | cli.filt(lambda x: "code" in str(type(x))) | cli.tee(lambda _: "----------------------- inner code object -----------------------\n") | cli.apply(disassemble) | cli.ignore()
    out() | cli.filt(cli.op().strip() != "") | cli.apply("|" + cli.op()) | cli.indent() | cli.stdout()

In [41]:
def f(a, b):
    return a**2 + b
with k1lib.captureStdout() as out:
    f | disassemble(); disassemble(f);
    "lambda x: x + 3" | disassemble()
    disassemble(lambda x: x + 3)
assert out() | cli.shape(0) > 0

In [42]:
#export
shortName = lambda s: s.split(os.sep)[-1]
def tree(fL=10, dL=10, depth=float("inf"), ff:Callable[[str], bool]=(lambda s: True), df:Callable[[str], bool]=(lambda s: True)):
    """Recursively gets all files and folders. Output format might be a bit
strange, so this is mainly for visualization. Example::

    "." | tree() | deref()

This is way less powerful and structured than clis from the module :mod:`k1lib.cli.ktree`.
Check that out. This cli is mainly for backwards compability.

:param fL: max number of file per directory included in output
:param dL: max number of child directories per directory included in output
:param depth: explore depth
:param ff: optional file filter function
:param df: optional directory filter function"""
    processFolders = cli.apply(lambda x: [shortName(x), x]) | cli.apply(lambda x: x | tree(fL, dL, depth-1, ff, df) if depth > 0 else [], 1) | cli.toDict()
    a = cli.filt(os.path.isfile) | cli.filt(ff) | cli.head(fL) | cli.apply(shortName) | cli.aS(set)
    b = ~cli.filt(os.path.isfile) | cli.filt(df) | cli.head(dL) | processFolders
    return cli.ls() | ~cli.sortF(os.path.isfile) | (a & b)

In [43]:
assert "." | tree() | shape() | shape(0) > 0

In [53]:
#export
class lookup(BaseCli):
    def __init__(self, d:dict, col:int=None, fill=None, mode:str="error"):
        """Looks up items from a dictionary/object. Example::

    d = {"a": 3, "b": 5, "c": 52}
    "abcca" | lookup(d) | deref() # returns [3, 5, 52, 52, 3]

    "abccad" | lookup(d) | deref()                     # raises Exception, as key "d" does not exist
    "abccad" | lookup(d, fill="(not found)") | deref() # returns [3, 5, 52, 52, 3, '(not found)'], mode automatically switched to "fill"
    "abccad" | lookup(d, mode="fill")  | deref()       # returns [3, 5, 52, 52, 3, None]. Do this when you really want to return None
    "abccad" | lookup(d, fill=input)   | deref()       # returns [3, 5, 52, 52, 3, 'd'], mode automatically switched to "input"
    "abccad" | lookup(d, mode="input") | deref()       # returns [3, 5, 52, 52, 3, 'd'], similar to above
    "abccad" | lookup(d, mode="rm")    | deref()       # returns [3, 5, 52, 52, 3], removing the unknown element

    [range(5), "abcca"] | transpose() | lookup(d, 1) | deref() # returns [[0, 3], [1, 5], [2, 52], [3, 52], [4, 3]]

The ``mode`` param needs a little explaning. It specifies what should happen when an element is not found
within the given dictionary. There are 3 modes total:
- error: if ``.fill`` is not None, then throws an error. If ``.fill`` is specified, then this acts like mode "fill" instead
- input: returns whatever the input element is
- rm: removes (aka ignore) the element
- fill: returns the arg ``.fill``

:param d: any object that can be sliced with the inputs
:param col: if None, lookup on each row, else lookup a specific column only
:param fill: fill value for elements that are not in the provided dictionary. Explained more above
:param mode: "error", "input", "rm", "fill". Explained more above"""
        self.d = d; self.col = col; self.fill = fill
        if mode == "error": # override .mode so that it's backwards compatible
            if fill is input: mode = "input"; fill = None
            elif fill is not None: mode = "fill"
        self.mode = mode; self.rmSentinel = rmSentinel = object()
        if mode == "error": f = lambda e: d[e]
        elif mode == "input": f = lambda e: d.get(e, e)
        elif mode == "rm": f = lambda e: d.get(e, rmSentinel)
        elif mode == "fill": f = lambda e: d.get(e, fill)
        else: raise Exception("Invalid mode. Only 'error', 'input', 'rm' and 'fill' are allowed")
        self.f = f
        def fa(it, col):
            if mode == "rm": return it | cli.apply(lambda e: d.get(e, rmSentinel), col) | cli.filt(lambda x: x is not rmSentinel, col)
            return it | cli.apply(f, col)
        self.fa = fa
    def _typehint(self, inp):
        t = inferType(list(self.d.values()))
        if isinstance(t, tListIterSet): return tIter(t.child)
        if isinstance(t, tCollection): return tIter(tLowest(*t.children))
        return tIter(tAny())
    def __ror__(self, it):
        col = self.col
        if hasPandas and isinstance(it, pd.DataFrame):
            if col is None: it = init.dfGuard(it)
            else:
                f = self.f; rmSentinel = self.rmSentinel; c = [f(e) for e in it[list(it)[col]]]; it = it.replaceCol(list(it)[col], c)
                return it.iloc[[i for i, e in enumerate(c) if e is not rmSentinel]] if self.mode == "rm" else it
                # return pd.DataFrame({getattr(c, "name", ogName if i == col else next(genName)):c for i,c in enumerate(cols)})
        return self.fa(it, col)
    def _jsF(self, meta):
        if self.mode not in ("input", "rm", "fill"): raise Exception(f"lookup()._jsF() only supports modes 'input', 'rm' and 'fill'. Either specify a mode, or a default fill value")
        fIdx = init._jsFAuto(); dictIdx = f"{init._jsDAuto()}_{round(time.time())}"; dataIdx = init._jsDAuto()
        return f"//k1_moveOutStart\n{dictIdx} = {json.dumps(self.d)}; //k1_moveOutEnd\n{fIdx} = ({dataIdx}) => {dataIdx}.lookup({dictIdx}, {cli.kjs.v(self.col)}, {cli.kjs.v(self.fill)}, `{self.mode}`)", fIdx
    def _pyF(self, expr, **kw):
        if self.col is not None: return None, None, NotImplemented, None
        mode = self.mode; dN = init._pyFAuto(); xN = init._pyFAuto(); vD = {dN: self.d}
        if mode == "error": return "", "", f"({dN}[{xN}] for {xN} in {expr})", vD
        if mode == "input": return "", "", f"({dN}.get({xN}, {xN}) for {xN} in {expr})", vD
        if mode == "rm": return "", "", f"({dN}[{xN}] for {xN} in {expr} if {xN} in {dN})", vD
        if mode == "fill": fillN = init._pyFAuto(); vD[fillN] = self.fill; return "", "", f"({dN}.get({xN}, {fillN}) for {xN} in {expr})", vD

In [61]:
d = {"a": 3, "b": 5, "c": 52}
assert "abcca" | lookup(d) | cli.deref() == [3, 5, 52, 52, 3]
try: "abccad" | lookup(d) | cli.deref(); raise Exception("Failed")
except KeyError: pass
assert "abccad" | lookup(d, fill="(not found)") | cli.deref() == [3, 5, 52, 52, 3, '(not found)']
assert "abccad" | lookup(d, fill=input) | cli.deref() == [3, 5, 52, 52, 3, 'd']
assert [range(5), "abcca"] | cli.transpose() | lookup(d, 1) | cli.deref() == [[0, 3], [1, 5], [2, 52], [3, 52], [4, 3]]
assert lookup(d)._typehint(3) == tIter(int)
assert "abccad" | lookup(d, mode="rm") | cli.deref() == [3, 5, 52, 52, 3]


assert "abcca" | (cli.toPyFunc() | lookup(d) | cli.deref()) == [3, 5, 52, 52, 3]
try: "abccad" | (cli.toPyFunc() | lookup(d)) | cli.deref(); raise Exception("Failed")
except KeyError: pass
assert "abccad" | (cli.toPyFunc() | lookup(d, fill="(not found)")) | cli.deref() == [3, 5, 52, 52, 3, '(not found)']
assert "abccad" | (cli.toPyFunc() | lookup(d, fill=input)) | cli.deref() == [3, 5, 52, 52, 3, 'd']
assert [range(5), "abcca"] | (cli.toPyFunc() | cli.transpose() | lookup(d, 1)) | cli.deref() == [[0, 3], [1, 5], [2, 52], [3, 52], [4, 3]]
assert "abccad" | (cli.toPyFunc() | lookup(d, mode="rm")) | cli.deref() == [3, 5, 52, 52, 3]

In [None]:
assert isinstance(df1 | lookup({1:2,2:3,3:4,4:5}, 0), pd.DataFrame)
assert df1 | lookup({1:2,2:3,3:4}, 0, mode="rm") | cli.shape() == (3, 6)

In [47]:
#export
_sorted = sorted
class lookupRange(BaseCli):
    def __init__(self, ranges, col:int=None, sorted=True, fill=None, mode="error"):
        """Looks up values within some range.
Example::

    ranges = [[2, 3, "a"], [4, 5, "b"], [6, 7, "c"]]
    vs = [1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5]
    vs | lookupRange(ranges, mode="error") | deref() # raises an exception cause it can't find "1" in any ranges
    vs | lookupRange(ranges, mode="fill")  | deref() # returns [None, None, 'a', 'a', None, None, 'b', 'b', None, None]
    vs | lookupRange(ranges, mode="rm")    | deref() # returns ['a', 'a', 'b', 'b']
    vs | lookupRange(ranges, mode="input") | deref() # returns [1, 1.5, 'a', 'a', 3, 3.5, 'b', 'b', 5, 5.5]
    
    vs = list(zip([1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5], "abcdefghij"))
    vs | lookupRange(ranges, 0, mode="rm") | deref() # returns [['a', 'c'], ['a', 'd'], ['b', 'g'], ['b', 'h']]

So, ``ranges`` should be a table with 3 columns: start, stop and value. This cli will search across all ranges,
and if the input iterator has values within a single range, it will yield that range's value. The exact
comparison expression is "start <= input < stop". Internally, there're 2 implementations:

First implementation assumes the ranges are not overlapping, activated by "sorted=True". This will
assume the ranges are sorted based on the start values, then it searches for the value using binary
search. Time complexity is O(n*log(m)), where n is the input size, m is the ranges's length

Second implementation doesn't assume the ranges are not overlapping, activated by "sorted=False".
This won't sort the ranges, and searches for the value using linear search, yielding the first
range that contains the value. Time complexity is O(n*m)

See also: :class:`lookup`

:param ranges: table of size (N, 3), with each row (start, stop, value)
:param col: column to act upon
:param sorted: if True, use binary search, else use linear search. Explained more above
:param fill: if specified, and if no ranges contain the value, then yield this value instead
:param mode: explained above. See :class:`lookup` as well"""
        try: ranges[:]; len(ranges)
        except: ranges = ranges | deref(2)
        if mode == "error" and fill is not None: mode = "fill"
        if mode == "error" and fill == input: mode = "input"
        self.ranges = ranges; self.col = col; self.sorted = sorted; self.fill = fill; self.mode = mode
        if mode not in ("error", "rm", "fill", "input"): raise Exception(f".mode can only be 'error', 'rm', 'fill' or 'input'")
    def __ror__(self, it):
        ranges = self.ranges; col = self.col; fill = self.fill; mode = self.mode; sentinel = object(); it = init.dfGuard(it)
        colIsNone = col is None; modeFill = mode == "fill"; modeRmOrError = mode == "rm" or mode == "error"; modeInput = mode == "input"; modeError = mode == "error"
        def edit(row, value): row = list(row); row[col] = value; return row
        if self.sorted:
            for row in it:
                v = row if col is None else row[col]
                start = 0; end = len(ranges)-1; e = sentinel
                while start <= end:
                    mid = round((start + end)/2)
                    r = ranges[mid]
                    if r[0] <= v < r[1]: e = r[2]; break
                    if v < r[0]: end = mid-1
                    else: start = mid+1
                if colIsNone:
                    if modeFill: yield fill if e is sentinel else e
                    elif modeRmOrError and e is not sentinel: yield e
                    elif modeInput: yield v if e is sentinel else e
                    elif modeError: raise KeyError(f"Can't find element {v} in any ranges")
                else:
                    if modeFill: row = list(row); row[col] = fill if e is sentinel else e; yield row
                    elif modeRmOrError and e is not sentinel: row = list(row); row[col] = e; yield row
                    elif modeInput: row = list(row); row[col] = v if e is sentinel else e; yield row
                    elif modeError: raise KeyError(f"Can't find element {v} in any ranges")
        else:
            for row in it:
                v = row if col is None else row[col]
                e = next((vv for x,y,vv in ranges if x <= v < y), sentinel)
                if colIsNone:
                    if modeFill: yield fill if e is sentinel else e
                    elif modeRmOrError and e is not sentinel: yield e
                    elif modeInput: yield v if e is sentinel else e
                    elif modeError: raise KeyError(f"Can't find element {v} in any ranges")
                else:
                    if modeFill: row = list(row); row[col] = fill if e is sentinel else e; yield row
                    elif modeRmOrError and e is not sentinel: row = list(row); row[col] = e; yield row
                    elif modeInput: row = list(row); row[col] = v if e is sentinel else e; yield row
                    elif modeError: raise KeyError(f"Can't find element {v} in any ranges")

In [48]:
ranges = [[2, 3, "a"], [4, 5, "b"], [6, 7, "c"]]
vs = [1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5]
assert vs | lookupRange(ranges, mode="fill")  | cli.deref() == [None, None, 'a', 'a', None, None, 'b', 'b', None, None]
assert vs | lookupRange(ranges, mode="rm")    | cli.deref() == ['a', 'a', 'b', 'b']
assert vs | lookupRange(ranges, mode="input") | cli.deref() == [1, 1.5, 'a', 'a', 3, 3.5, 'b', 'b', 5, 5.5]
try: vs | lookupRange(ranges, mode="error")   | cli.deref(); assert False
except KeyError: pass
assert (vs | lookupRange(ranges, mode="rm") | cli.deref()) == (vs | lookupRange(ranges, mode="rm", sorted=False) | cli.deref())
vs = list(zip([1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5], "abcdefghij"))
assert vs | lookupRange(ranges, 0, mode="rm") | cli.deref() == [['a', 'c'], ['a', 'd'], ['b', 'g'], ['b', 'h']]

In [66]:
#export
class getitems(BaseCli):
    def __init__(self, *fields, default=None):
        """Basically [input[x] for x in fields].
Example::

    # returns [3, 1, '']
    {"a": 1, "b": 2, "c": 3} | getitems("c", "a", "d")
"""
        self.fields = fields; self.default = default
    def __ror__(self, d):
        ans = []; default = self.default
        for f in self.fields:
            try: ans.append(d[f])
            except: ans.append(default)
        return ans

In [68]:
assert {"a": 1, "b": 2, "c": 3} | getitems("c", "a", "d") == [3, 1, None]

In [51]:
#export
class backup(BaseCli):
    def __init__(self):
        """Backs up a file/folder.
Example::

    "some/folderOrFile" | backup()
    "some/folderOrFile" | backup.restore()

Really straightforward. Uses bash internally to copy files recursively, so
not available on Windows."""
        pass
    def __ror__(self, it):
        it = os.path.expanduser(it)
        None | cli.cmd(f"rm -rf '{it}.backup'") | cli.ignore()
        None | cli.cmd(f"cp -r '{it}' '{it}.backup'") | cli.ignore()
    @staticmethod
    def restore():
        def inner(it):
            it = os.path.expanduser(it)
            None | cli.cmd(f"rm -rf '{it}'") | cli.ignore()
            None | cli.cmd(f"cp -r '{it}.backup' '{it}'") | cli.ignore()
        return cli.aS(inner)

In [52]:
#export
sketch_interceptor = {}
class sketch(BaseCli):
    _jsF_ctxIdx = None
    def __init__(self, transforms:List[Callable]=[], titles:List[str]=None, im:bool=False, ncols:int=None, n:int=None, axes:int=None):
        """Convenience tool to plot multiple matplotlib plots at the same
time, while still keeping everything short and in 1 line. For this example,
we're trying to plot x^1, x^2, ..., x^8 on 2 separate plots, one left one
right. The left will have x^1 till x^4, the right will have x^5 to x^8.

How you would do this normally::

    x = np.linspace(-2, 2); exps = range(1, 9)

    fig, axes = plt.subplots(1, 2, figsize=(10, 4))
    # simplest solution
    plt.sca(axes[0]); plt.plot(x, x**1); plt.plot(x, x**2); plt.plot(x, x**3); plt.plot(x, x**4); plt.legend([1, 2, 3, 4]); plt.xlabel("x axis")
    # solution using a little bit of cli
    plt.sca(axes[1]); range(5, 9) | apply(lambda a: [x, x**a]) | ~apply(plt.plot) | ignore();     plt.legend([5, 6, 7, 8]); plt.xlabel("x axis")

But this is long, and I'm incredibly lazy to write it all out. So here's how
it's going to work using this cli::

    # plotting the first 4 lines only, in a single plot. Should be familiar and make sense to you before moving on
    exps | apply(lambda a: [x, x**a]) | batched(4) | item() | ~apply(plt.plot) | ignore()

    # plotting 8 lines across 2 plots. Simplest example using sketch(). It kinda captures clis after it and use it to plot each plot
    exps | apply(lambda a: [x, x**a]) | batched(4) | (sketch() | ~apply(plt.plot))

    # same as above, but adding a grid and x axis label to all plots. Transformation functions can be anything you would
    # put inside a normal cli (`plt` will be passed as argument): string code, op()-capture, lambda functions, other cli tools
    transforms = ["x.grid(True)", op().xlabel("x axis"), lambda x: x.ylabel("y axis")]
    exps | apply(lambda a: [x, x**a]) | batched(4) | (sketch(transforms) | ~apply(plt.plot))

    # same as above, but adding legends. [x, x**a] will eventually be directed to ~apply(plt.plot), while f"x^{a}" will be directed to aS(plt.legend)
    exps | apply(lambda a: [[x, x**a], f"x^{a}"]) | batched(4) | (sketch() | transpose() | ~apply(plt.plot) + iden() | deref() | rItem(1) | aS(plt.legend)) | deref()

Last line will generate this plot:

.. image:: ../images/probScale.png

Is it worth the extra confusion? Afterall, it just saves you 2-3 lines of
code. To me, it is worth it, because you can quickly change styles (add
a grid, make y axis log)

See also: :class:`~k1lib.cli.output.plotImgs`

Check out a gallery of more examples at `kapi/9-mpl <https://mlexps.com/kapi/9-mpl/>`_.

:param transforms: transform functions to be run when drawing every plot. ``plt`` (aka ``matplotlib.pyplot``) will be passed in
:param titles: if specified, use these titles for each plot. Kinda hacky I have to admit
:param im: if True, returns a PIL image and closes the sketch, else return nothing but still have the sketch open
:param ncols: if specified, will sketch with this number of columns
:param n: if specified, use this number of sketch instead of figuring out automatically
:param axes: if specified, forgo calculating #axes and initialization altogether and just use the provided axes"""
        super().__init__(capture=True); self.titles = titles; self.im = im
        self.transforms = [cli.fastF(t) for t in transforms]; self.ncols = ncols; self.n = n; self.axes = axes
    def __ror__(self, it):
        it = list(it); n = self.n or len(it); s = self.capturedSerial; transforms = self.transforms
        ncols = self.ncols or math.ceil(n**0.5); nrows = math.ceil(n/ncols)
        if self.axes: axes = self.axes
        else:
            fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*5, nrows*4))
            if nrows*ncols == 1: axes = [axes]
        if axes | cli.shape() | cli.shape(0) > 1: axes = axes.flatten()
        for i, [ax, e, title] in enumerate(zip(axes, it, self.titles or ("" | cli.repeat()))):
            plt.sca(ax); e | s | cli.deref()
            if title: plt.title(title)
            for trans in transforms: trans(plt)
        if self.n is None: axes[i+1:] | cli.op().remove().all() | cli.deref(); plt.tight_layout()
        if self.im: return plt.gcf() | cli.toImg()
        if self.n: return axes[i+1:]
    def _jsF(self, meta):
        if self.n: raise Exception("sketch()._jsF() doesn't support .n parameter yet")
        if self.axes: raise Exception("sketch()._jsF() doesn't support .axes parameter yet")
        fIdx = init._jsFAuto(); dataIdx = init._jsDAuto(); ctxIdx = init._jsDAuto()
        # generate all child functions here
        sketch._jsF_ctxIdx = ctxIdx
        header, _fIdx, _async = k1lib.kast.asyncGuard(self.capturedSerial._jsF(meta))
        # then generate all transforms here, using a tracing compiler
        tfStmts = ""
        if len(self.transforms) > 0:
            class Interceptor:
                def __getattr__(self, attr):
                    if getattr(plt, attr) not in sketch_interceptor: raise Exception(f"Transpiling function `plt.{attr}` is not supported at the moment")
                    return lambda *args, **kwargs: sketch_interceptor[getattr(plt, attr)](*args, **kwargs)
            tfStmts = tfs = self.transforms | cli.apply(cli.init.fastF) | cli.op()(Interceptor()).all() | cli.join("; ")
        sketch._jsF_ctxIdx = None
        return f"""\
{ctxIdx} = null;\n{header}
{fIdx} = async ({dataIdx}) => {{ // dataIdx should have
    const ctx = []; // this is the object that will be sent to the rendering server!
    const titles = {json.dumps(self.titles)} ?? Array({dataIdx}.length);
    for (const i of [...Array({dataIdx}.length).keys()]) {{
        {ctxIdx} = [];
        // actually executing function and plotting function downstream
        {'await ' if _async else ''}{_fIdx}({dataIdx}[i]);
        if (titles[i]) {ctxIdx}.push(["title", titles[i]]);
        // inject all transforms here
        {tfStmts};
        ctx.push({ctxIdx}); {ctxIdx} = null;
    }}
    // console.log(ctx);
    // console.log(JSON.stringify(ctx));
    const res = await (await fetch("https://local.mlexps.com/routeServer/kapi_9-mpl", {{
      method: "POST",
      body: JSON.stringify({{ "ctx": JSON.stringify(ctx) }}),
      headers: {{ "Content-Type": "application/json" }}
    }})).json()
    if (res.success) {{
        const base64 = res.data;
        console.log("mpl fetched");
        return `<img src="data:image/jpg;base64, ${{base64}}" />`
    }} else {{ throw new Error(res.reason); }}
    // return ctx;
}}""", fIdx
        return f"{fIdx} = ({dataIdx}) => {dataIdx}.repeatFrom({cli.kjs.v(self.limit)})", fIdx
def _jsF_plt_ctxGuard():
    if sketch._jsF_ctxIdx is None: raise Exception("Have to wrap any plotting operations around sketch(). So, transform your code from `data | (toJsFunc() | ~aS(plt.plot))` into `[data] | (toJsFunc() | (sketch() | ~aS(plt.plot)))`")
    return sketch._jsF_ctxIdx
try: import matplotlib.pyplot as plt; hasMpl = True
except: hasMpl = False
if hasMpl:
    def _jsF_plt_plot(meta, c=None):
        fIdx = init._jsFAuto(); xIdx = init._jsDAuto(); yIdx = init._jsDAuto(); ctxIdx = _jsF_plt_ctxGuard()
        return f"""\
    {fIdx} = ({xIdx}, {yIdx}=null) => {{
        if (!{yIdx}) {{ // handle only xIdx is available case
            {yIdx} = {xIdx}; {xIdx} = [...Array({yIdx}.length).keys()];
        }}
        {ctxIdx}.push(["plot", {xIdx}, {yIdx}]);
    }}""", fIdx
    settings.kjs.jsF[plt.plot] = _jsF_plt_plot
    def _jsF_plt_title(meta): # version that passes args in js side
        fIdx = init._jsFAuto(); titleIdx = init._jsDAuto(); ctxIdx = _jsF_plt_ctxGuard()
        return f"""{fIdx} = ({titleIdx}) => {{ {ctxIdx}.push(["title", {titleIdx}]); }}""", fIdx
    settings.kjs.jsF[plt.title] = _jsF_plt_title # below is version that passes args in python side, returns statement, instead of (header, fIdx) like usual
    sketch_interceptor[plt.title] = lambda title: f"""{_jsF_plt_ctxGuard()}.push(["title", `{title}`])"""
    def _jsF_plt_grid(meta):
        fIdx = init._jsFAuto(); tfIdx = init._jsDAuto(); ctxIdx = _jsF_plt_ctxGuard()
        return f"""{fIdx} = ({tfIdx}) => {{ {ctxIdx}.push(["grid", {tfIdx}]); }}""", fIdx
    settings.kjs.jsF[plt.grid] = _jsF_plt_grid; sketch_interceptor[plt.grid] = lambda tf=True: f"""{_jsF_plt_ctxGuard()}.push(["grid", {cli.kjs.v(tf)}])"""
    def _jsF_plt_legend(meta, framealpha=1):
        fIdx = init._jsFAuto(); legendIdx = init._jsDAuto(); ctxIdx = _jsF_plt_ctxGuard()
        return f"""{fIdx} = ({legendIdx}) => {{ {ctxIdx}.push(["legend", {legendIdx}, {framealpha}]); }}""", fIdx
    settings.kjs.jsF[plt.legend] = _jsF_plt_legend; sketch_interceptor[plt.legend] = lambda legend=None, framealpha=1: f"""{_jsF_plt_ctxGuard()}.push(["legend", {cli.kjs.v(legend)}, {cli.kjs.v(framealpha)}])"""

    sketch_interceptor[plt.xlim] = lambda left=None, right=None: f"""{_jsF_plt_ctxGuard()}.push(["xlim", {cli.kjs.v(left)}, {cli.kjs.v(right)}])"""
    sketch_interceptor[plt.ylim] = lambda bottom=None, top=None: f"""{_jsF_plt_ctxGuard()}.push(["ylim", {cli.kjs.v(bottom)}, {cli.kjs.v(top)}])"""
    sketch_interceptor[plt.xscale] = lambda scale: f"""{_jsF_plt_ctxGuard()}.push(["xscale", {cli.kjs.v(scale)}])"""
    sketch_interceptor[plt.xlabel] = lambda label: f"""{_jsF_plt_ctxGuard()}.push(["xlabel", {cli.kjs.v(label)}])"""
    sketch_interceptor[plt.ylabel] = lambda label: f"""{_jsF_plt_ctxGuard()}.push(["ylabel", {cli.kjs.v(label)}])"""

In [53]:
x = np.linspace(-2, 2); exps = range(1, 9)
exps | cli.apply(lambda a: [[x, x**a], f"x^{a}"]) | cli.batched(4) | (sketch(im=True) | cli.transpose() | ~cli.apply(plt.plot) + cli.iden() | cli.deref() | cli.rItem(1) | cli.aS(plt.legend)) | cli.toBytes() | cli.file("../../docs/images/utils_sketch.png")

'../../docs/images/utils_sketch.png'

In [54]:
#export
import numbers, sys; from collections import deque
class syncStepper(BaseCli):
    def __init__(self, col=0, sort=False):
        """Steps forward all streams at a time, yielding same results from min to max.
That's a bit vague, so let's see an example::

    a = [["a", 1], ["b", 7 ], ["c", 4], ["e", 6]]
    b = [["b", 5], ["c", 1 ], ["d", 3], ["f", 5]]
    c = [["a", 2], ["c", -4], ["d", 9], ["e", 4]]

    [a, b, c] | syncStepper() | deref() # sync-step by the 1st column
    [a, b, c] | syncStepper(1, True) | deref() # sync-step by the 2nd column. Have to sort it explicitly

The first line returns this::

    [[['a', 1], None, ['a', 2]],
     [['b', 7], ['b', 5], None],
     [['c', 4], ['c', 1], ['c', -4]],
     [None, ['d', 3], ['d', 9]],
     [['e', 6], None, ['e', 4]],
     [None, ['f', 5], None]]

The second line returns this::

    [[None, None, ['c', -4]],
     [['a', 1], ['c', 1], None],
     [None, None, ['a', 2]],
     [None, ['d', 3], None],
     [['c', 4], None, ['e', 4]],
     [None, ['b', 5], None],
     [['e', 6], None, None],
     [['b', 7], None, None],
     [None, None, ['d', 9]]]

``col`` can be None, but it's quite a strange use case::

    [['a', 'b', 'c', 'e'], ['b', 'c', 'd', 'f'], ['a', 'c', 'd', 'e']] | syncStepper(None) | deref()

It returns this::

    [[['a'], None, ['a']],
     [['b'], ['b'], None],
     [['c'], ['c'], ['c']],
     [None, ['d'], ['d']],
     [['e'], None, ['e']],
     [None, ['f'], None]]

As you can see, for each line, it kinda yields elements with the same column. If
that element doesn't exist, it'll just put None there. This expects the input
streams are sorted at the column of interest. If they are not, specify ``sort=True``.

It has roughly the same vibe as :class:`~k1lib.cli.structural.groupBy`, in that
it groups everything by a specific column. The main difference here is that you
can sync-step them line-by-line, loading very little into memory, so you can run
this on giant datasets and not have to worry about running out of memory.

With k streams each having n elements, you should expect memory complexity to be
O(k), and the time complexity to be O(n*k^2/2). That k^2 term is kinda worrying,
but in most use cases, k is small and so k^2 can be treated as a constant

See also: :class:`~k1lib.cli.structural.latch`

:param col: column where it should compare values and merge them together. Can be None, but that would be quite a weird use case
:param sort: whether to sort the streams or not. This cli requires it, but it's
    not turned on by default because it's an intensive operation"""
        if col is None: self.col = 0; self.colPreprocess = cli.wrapList().all()
        else: self.col = col; self.colPreprocess = cli.iden()
        self.bank = deque(); self.sentinel = object(); self._sort = sort
    def _append(self, stIdx1, val1, elem1): # append to bank in the correct position
        i = 0; val2 = self.minObj
        for i, [stIdx2, val2, elem2] in enumerate(self.bank):
            if val1 <= val2: break
        if val1 <= val2: self.bank.insert(i, [stIdx1, val1, elem1])
        else: self.bank.append([stIdx1, val1, elem1])
    def _yieldNext(self): # yield the next set of values
        n = len(self.sts); res = [None]*n; last = None; hasInit = False; changed = False; bank = self.bank; sentinel = self.sentinel
        for i, [stIdx, val, elem] in enumerate(bank):
            if not hasInit and elem is sentinel: return res, changed
            if last == val or not hasInit: changed = True; res[stIdx] = elem
            elif hasInit: break
            hasInit = True; last = val
        while bank[0][1] == last: # popping the values off
            stIdx, val1, elem1 = bank.popleft(); val2, elem2 = next(self.sts[stIdx])
            if val1 > val2: raise Exception(f"Stream {stIdx} has not been sorted yet! Please sort all streams before passing it into syncStepper")
            self._append(stIdx, val2, elem2)
        return res, changed
    def __ror__(self, sts): # sts = "streams"
        col = self.col; it = init.dfGuard(it)
        # --------------------- All of this is just to figure out the type of the column dynamically. So painful ---------------------
        samples, sts = sts | self.colPreprocess.all() | cli.apply(cli.peek()) | cli.transpose() | cli.cut(col) + cli.iden() | cli.apply(list)
        if len([e for e in sts if e != []]) == 0: return # no elements to yield at all!
        n_nums = sum([1 if isinstance(e, numbers.Number) else 0 for e in samples])
        n_strs = sum([1 if isinstance(e, str) else 0 for e in samples]); n = len(samples)
        if n_nums*(n-n_nums) + n_strs*(n-n_strs) > 0: raise Exception("The requested column in some of the streams is not purely of numeric or string type, a requirement of syncStepper(). Please fix your data structure and try again.")
        if n_nums + n_strs == 0: raise Exception("The requested column in some of the streams is not of numeric or string type, so can't compare them to sync-step them")
        # n = 3; n_strs = 1
        text = n_strs > 0; self.minObj = "" if text else float("-inf"); self.maxObj = chr(sys.maxunicode) if text else float("inf"); senObj = [self.maxObj, self.sentinel]
        # --------------------- And here's the meat of the cli ---------------------
        sts = sts | (cli.sort(col, not text).all() if self._sort else cli.iden()) | cli.apply(lambda st: [st | cli.apply(lambda elem: [elem[col], elem]), senObj | cli.repeat()] | cli.joinStreams()) | cli.aS(list)
        sts | cli.apply(next) | cli.insertIdColumn() | ~cli.apply(lambda idx,e: self._append(idx, *e)) | cli.ignore(); self.sts = sts
        while True:
            res, changed = self._yieldNext()
            if not changed: break
            yield res

In [55]:
a = [["a", 1], ["b", 7],  ["c", 4], ["e", 6]]
b = [["b", 5], ["c", 1],  ["d", 3], ["f", 5]]
c = [["a", 2], ["c", -4], ["d", 9], ["e", 4]]

# yields first element [["a", 1], None, ["a", 2]]
#[a, b, c] | sort(1).all() | syncStepper(1) | deref()
assert [a, b, c] | syncStepper() | deref() == [[['a', 1], None, ['a', 2]], [['b', 7], ['b', 5], None], [['c', 4], ['c', 1], ['c', -4]], [None, ['d', 3], ['d', 9]], [['e', 6], None, ['e', 4]], [None, ['f', 5], None]]
assert [a, b, c] | syncStepper(1, True) | deref() == [[None, None, ['c', -4]], [['a', 1], ['c', 1], None], [None, None, ['a', 2]], [None, ['d', 3], None], [['c', 4], None, ['e', 4]], [None, ['b', 5], None], [['e', 6], None, None], [['b', 7], None, None], [None, None, ['d', 9]]]
assert [['a', 'b', 'c', 'e'], ['b', 'c', 'd', 'f'], ['a', 'c', 'd', 'e']] | syncStepper(None) | deref() == [[['a'], None, ['a']], [['b'], ['b'], None], [['c'], ['c'], ['c']], [None, ['d'], ['d']], [['e'], None, ['e']], [None, ['f'], None]]

In [69]:
#export
class zeroes(BaseCli):
    def __init__(self, col:int=None, log=False, offset:float=0):
        """Shift the specified column so that the first element is zero
Example::

    range(13, 20)   | zeroes()         | deref() # returns [0, 1, 2, 3, 4, 5, 6]
    range(13, 20)   | zeroes(offset=5) | deref() # returns [5, 6, 7, 8, 9, 10, 11]
    [2, 3, 1, 4, 7] | zeroes()         | deref() # returns [0, 1, -1, 2, 5]

Assumes the first element is going to be transformed to zero, thus the last example.
This cli also has log mode, where the natural log of the values will be shifted to zero::

    # returns [1.0, 1.5, 0.5, 2.0, 3.5]
    [2, 3, 1, 4, 7] | zeroes(log=True)           | aS(round, 2).all() | deref()
    # returns [2.72, 4.08, 1.36, 5.44, 9.51]
    [2, 3, 1, 4, 7] | zeroes(offset=1, log=True) | aS(round, 2).all() | deref()

This is essentially the same as dividing everything by 2, so that the first element
turns into 1. Super neat. The 2nd example is equivalent to multiplying everything by e/2.

This cli can function in a table (.col != None)::

    # returns [[0, 'a'], [1, 'b'], [2, 'c'], [3, 'd'], [4, 'e'], [5, 'f'], [6, 'g']]
    [[13, 'a'], [14, 'b'], [15, 'c'], [16, 'd'], [17, 'e'], [18, 'f'], [19, 'g']] | zeroes(0) | deref()

This cli can also act across multiple list of numbers::

    data = [[2, 3, 1, 4, 7], [1, 4, 3, 6, 9]]
    data2 = [[[2, 'b'], [3, 'c'], [1, 'a'], [4, 'd'], [7, 'g']], [[1, 'a'], [4, 'd'], [3, 'c'], [6, 'f'], [9, 'i']]]

    # returns [[0, 1, -1, 2, 5], [5, 8, 7, 10, 13]]
    data | ~zeroes() | deref()
    # returns [[1, 2, 0, 3, 6], [6, 9, 8, 11, 14]]
    data | ~zeroes(offset=1) | deref()
    # returns [[1.0, 1.5, 0.5, 2.0, 3.5], [3.5, 14.0, 10.5, 21.0, 31.5]]
    data | ~zeroes(log=True) | aS(round, 2).all(2) | deref()
    
    # returns [[[0, 'b'], [1, 'c'], [-1, 'a'], [2, 'd'], [5, 'g']], [[5, 'a'], [8, 'd'], [7, 'c'], [10, 'f'], [13, 'i']]]
    data2 | ~zeroes(0) | deref()

So as you can see, the offsets are adjusted so that the first element of each list
starts from the last element of the previous list

:param col: column to shift values
:param offset: custom offset of the minimum value, defaulted to zero
:param log: whether to zero it linearly or zero it logarithmically"""
        self.col = col; self.log = log; self.offset = offset; self.inverted = False
    def __invert__(self): res = zeroes(self.col, self.log, self.offset); res.inverted = True; return res
    def __ror__(self, it):
        col = self.col; log = self.log; offset = self.offset; it = init.dfGuard(it)
        if self.inverted:
            def gen():
                currentOffset = offset
                for arr in it:
                    arr = arr | zeroes(col, log, currentOffset)
                    if isinstance(arr, settings.arrayTypes):
                        bm = np if isinstance(arr, np.ndarray) else (torch if hasTorch and isinstance(arr, torch.Tensor) else None)
                        if bm:
                            if col is None: currentOffset = bm.log(arr[-1]) if log else arr[-1]
                            else: currentOffset = bm.log(arr[-1][col]) if log else arr[-1][col]
                            yield arr; continue
                    # yes, we have to deref() them, even though perf will suffer, because let's say
                    # that the user then does rItem(3), and discards elements 0, 1 and 2. Then 0, 1, 2
                    # won't be run, so element 3 won't know its offset!
                    if col is None: arr = list(arr);        currentOffset = math.log(arr[-1])      if log else arr[-1]
                    else: arr = [list(row) for row in arr]; currentOffset = math.log(arr[-1][col]) if log else arr[-1][col]
                    yield arr
            return gen()
        if isinstance(it, settings.arrayTypes):
            bm = np if isinstance(it, np.ndarray) else (torch if hasTorch and isinstance(it, torch.Tensor) else None)
            if bm:
                cloneF = np.copy if isinstance(it, np.ndarray) else torch.clone
                if log:
                    if col is None: minValue = bm.log(it[0]) - offset; return bm.exp(bm.log(it) - minValue)
                    else: minValue = bm.log(it[0, col]) - offset; it = cloneF(it); it[:,col] = bm.exp(bm.log(it[:,col]) - minValue); return it
                else:
                    if col is None: minValue = it[0] - offset; return it - minValue
                    else: minValue = it[0, col] - offset; it = cloneF(it); it[:,col] = it[:,col] - minValue; return it
        row, it = it | cli.peek()
        if it == []: return []
        if log:
            mlog = math.log; mexp = math.exp
            if col is None: minValue = mlog(row) - offset; return (mexp(mlog(row) - minValue) for row in it)
            else: minValue = mlog(row[col]) - offset; return ([*row[:col], mexp(mlog(row[col]) - minValue), *row[col+1:]] for row in it)
        else:
            if col is None: minValue = row - offset; return (row - minValue for row in it)
            else: minValue = row[col] - offset; return ([*row[:col], row[col] - minValue, *row[col+1:]] for row in it)

In [70]:
assert range(13, 20) | zeroes(offset=5) | cli.deref() == [5, 6, 7, 8, 9, 10, 11]
assert range(13, 20) | zeroes() | cli.deref() == [0, 1, 2, 3, 4, 5, 6]
assert [2, 3, 1, 4, 7] | zeroes() | cli.deref() == [0, 1, -1, 2, 5]
assert [2, 3, 1, 4, 7] | zeroes(log=True) | cli.aS(round, 2).all() | cli.deref() == [1.0, 1.5, 0.5, 2.0, 3.5]
assert [2, 3, 1, 4, 7] | zeroes(offset=1, log=True) | cli.aS(round, 2).all() | cli.deref() == [2.72, 4.08, 1.36, 5.44, 9.51]
assert [[13, 'a'], [14, 'b'], [15, 'c'], [16, 'd'], [17, 'e'], [18, 'f'], [19, 'g']] | zeroes(0) | cli.deref() == [[0, 'a'], [1, 'b'], [2, 'c'], [3, 'd'], [4, 'e'], [5, 'f'], [6, 'g']]
data = [[2, 3, 1, 4, 7], [1, 4, 3, 6, 9]]
data2 = data | cli.aS(lambda x: [x, chr(x+97-1)]).all(2) | cli.deref()
data3 = data | cli.aS(lambda x: [x, x+10]).all(2) | cli.deref()
assert data               | ~zeroes() | cli.deref()          == [[0, 1, -1, 2, 5], [5, 8, 7, 10, 13]]
assert np.array(data)     | ~zeroes() | cli.deref(igT=False) == [[0, 1, -1, 2, 5], [5, 8, 7, 10, 13]]
assert torch.Tensor(data) | ~zeroes() | cli.deref(igT=False) == [[0, 1, -1, 2, 5], [5, 8, 7, 10, 13]]
assert data | ~zeroes(offset=1) | cli.deref() == [[1, 2, 0, 3, 6], [6, 9, 8, 11, 14]]
assert data | ~zeroes(log=True) | cli.aS(round, 2).all(2) | cli.deref() == [[1.0, 1.5, 0.5, 2.0, 3.5], [3.5, 14.0, 10.5, 21.0, 31.5]]
assert data2 | ~zeroes(0) | cli.deref() == [[[0, 'b'], [1, 'c'], [-1, 'a'], [2, 'd'], [5, 'g']], [[5, 'a'], [8, 'd'], [7, 'c'], [10, 'f'], [13, 'i']]]
assert data3 | cli.aS(np.array) | ~zeroes(0) | cli.deref(igT=False) == [[[0, 12], [1, 13], [-1, 11], [2, 14], [5, 17]], [[5, 11], [8, 14], [7, 13], [10, 16], [13, 19]]]
x = np.random.randn(100)+10; xT = torch.randn(100)+10
assert x | zeroes()         | shape() == (100,)
assert x | zeroes(log=True) | shape() == (100,)
try: x | zeroes(1); raise Exception("Failed")
except IndexError: pass
try: x | zeroes(1, log=True); raise Exception("Failed")
except IndexError: pass
assert xT | zeroes()         | shape() == (100,)
assert xT | zeroes(log=True) | shape() == (100,)
try: xT | zeroes(1); raise Exception("Failed")
except IndexError: pass
try: xT | zeroes(1, log=True); raise Exception("Failed")
except IndexError: pass
x = np.random.randn(100, 30)+10; xT = torch.randn(100, 30)+10
assert x | zeroes()  | shape() == (100, 30)
assert x | zeroes(3) | shape() == (100, 30)
assert x | zeroes(log=True)    | shape() == (100, 30)
assert x | zeroes(3, log=True) | shape() == (100, 30)
assert xT | zeroes()  | shape() == (100, 30)
assert xT | zeroes(3) | shape() == (100, 30)
assert xT | zeroes(log=True)    | shape() == (100, 30)
assert xT | zeroes(3, log=True) | shape() == (100, 30)

In [91]:
#export
class normalize(BaseCli):
    def __init__(self, col:int=None, mode:int=0):
        """Normalize the data going in.
Example::

    arr = np.random.randn(100)+10
    arr | normalize()       # returns array with mean around 0
    arr | normalize(mode=1) # returns array with mean around 0.5, min 0, max 1

    arr = np.random.randn(100, 20)+10
    arr | normalize(2)         # returns array with 2nd (0-indexing!) column have mean around 0. Other columns not touched
    arr | normalize(2, mode=1) # returns array with 2nd (0-indexing!) column have mean around 0.5

Modes:

- 0: ``(x - x.mean()) / x.std()``
- 1: ``(x - x.min()) / (x.max() - x.min())``
- 2: ``a = log10(x); (a - a.min()) / (a.max() - a.min())``

:param col: column to apply the normalization to
:param mode: see above"""
        self.col = col; self.mode = mode
    def _all_array_opt(self, it, level):
        col = self.col; n = len(it.shape); s = slice(None, None, None); mode = self.mode
        log10 = np.log10 if isinstance(it, np.ndarray) else torch.log10
        if col is None:
            # (*level, N, *rest (>0)) -> (*level, N, rest) -> (*level, N*rest) -> (*level) (this is mean & std) -> (*level, N, *rest)
            if level+1 == len(it.shape): it = it[(*[s]*len(it.shape), None)]; n += 1
            b = it | cli.joinSt(n-level-2).all(level+1); c = b | cli.joinSt().all(level)
            if mode == 0:
                mean = c.mean(level)[(*[s]*level,None,None)]
                std = c.std(level)[(*[s]*level,None,None)]
                return ((b - mean)/std).reshape(it.shape)
            elif mode == 1:
                min_ = c.min(level)[(*[s]*level,None,None)]
                max_ = c.max(level)[(*[s]*level,None,None)]
                return ((b - min_)/(max_ - min_)).reshape(it.shape)
            else:
                min_ = log10(c).min(level)[(*[s]*level,None,None)]
                max_ = log10(c).max(level)[(*[s]*level,None,None)]
                return ((log10(b) - min_)/(max_ - min_)).reshape(it.shape)
        else:
            # (*level, N, F, *rest (>0)) -> (*level, N, *rest) -> (*level, N, rest) -> (*level, N*rest) -> (*level) (this is mean & std) -> (*level, N, F, *rest)
            a = np.copy(it) if isinstance(it, np.ndarray) else torch.clone(it); unsqueezed = False; s = slice(None, None, None)
            if level+2 == len(a.shape): a = a[(*[s]*len(a.shape), None)]; unsqueezed = True; n += 1
            b = a[(*[slice(None,None,None)]*(level+1),col)] | cli.joinSt(n-level-3).all(level+1) # (*level, N, rest (>0, hence unsqueeze))
            c = b | cli.joinSt(len(b.shape)-level-1).all(level) # (*level, N*rest)
            if mode == 0:
                mean = c.mean(level)[(*[s]*level,None,None)]
                std = c.std(level)[(*[s]*level,None,None)]
                b[:] = (b - mean)/std
            elif mode == 1:
                min_ = c.min(level)[(*[s]*level,None,None)]
                max_ = c.max(level)[(*[s]*level,None,None)]
                b[:] = (b - min_)/(max_ - min_)
            else:
                min_ = log10(c).min(level)[(*[s]*level,None,None)]
                max_ = log10(c).max(level)[(*[s]*level,None,None)]
                b[:] = (log10(b) - min_)/(max_ - min_)
            return (a | cli.joinSt().all(len(a.shape)-2)) if unsqueezed else a
        return NotImplemented
    def __ror__(self, x):
        col = self.col; mode = self.mode; x = init.dfGuard(x)
        if isinstance(x, k1lib.settings.cli.arrayTypes):
            dims = len(x.shape); log10 = np.log10 if isinstance(x, np.ndarray) else torch.log10
            if col is None:
                if mode == 0: return (x - x.mean())/x.std()
                elif mode == 1: return (x - x.min())/(x.max() - x.min())
                else: x = log10(x); return (x - x.min())/(x.max() - x.min())
            else:
                if mode == 0: xc = x[:,col]; x[:,col] = (xc - xc.mean())/xc.std(); return x
                elif mode == 1: xc = x[:,col]; x[:,col] = (xc - xc.min())/(xc.max() - xc.min()); return x
                else: xc = log10(x[:,col]); x[:,col] = (xc - xc.min())/(xc.max() - xc.min()); return x
        if col is None: return np.array(list(x)) | self
        else:
            it = x; ans = []; it = it | cli.deref(2); log10 = math.log10
            if len(it) == 0: return []
            if mode == 0:
                mean = [row[col] for row in it] | cli.toMean()
                std = [row[col] for row in it] | cli.toStd()
                for row in it: row = list(row); row[col] = (row[col]-mean)/std; ans.append(row)
            elif mode == 1:
                _min = min([row[col] for row in it])
                _max = max([row[col] for row in it])
                for row in it: row = list(row); row[col] = (row[col]-_min)/(_max-_min); ans.append(row)
            else:
                _min = min([log10(row[col]) for row in it])
                _max = max([log10(row[col]) for row in it])
                for row in it: row = list(row); row[col] = (log10(row[col])-_min)/(_max-_min); ans.append(row)
            return ans

In [105]:
x = np.random.randn(100)+10; xT = torch.randn(100)+10
assert abs(x  | normalize()  | cli.toMean()) < 1e-4
assert abs(xT | normalize()  | cli.toMean()) < 1e-4
assert abs(x  | cli.deref(igT=False) | normalize()  | cli.toMean()) < 1e-4
assert abs(xT | cli.deref(igT=False) | normalize()  | cli.toMean()) < 1e-4
x = np.random.randn(100, 30)+10; xT = torch.randn(100, 30)+10
assert abs(x  | normalize()  | cli.toMean()) < 1e-4
assert abs(xT | normalize()  | cli.toMean()) < 1e-4
assert abs(x  | normalize(2) | cli.toMean()) > 8
assert abs(xT | normalize(2) | cli.toMean()) > 8
assert (x  | normalize(2) | cli.op()[:,2] | cli.toMean()) < 1e-4
assert (xT | normalize(2) | cli.op()[:,2] | cli.toMean()) < 1e-4
assert (x  | cli.deref(igT=False) | normalize(2) | cli.toMean().all() | cli.toMean()) > 8
assert (xT | cli.deref(igT=False) | normalize(2) | cli.toMean().all() | cli.toMean()) > 8
assert sum((x+10 | normalize(mode=1) != x+10 | normalize(mode=2)).flatten()) > 1500
# _all_array_opts
a = np.random.randn(3,4,5,6); ogA = np.copy(a); a | shape()
assert (abs((a | normalize(1).all()) - a) < 1e-5) | cli.joinSt(3) | cli.count() | ~cli.sort() | cli.item(2) == 288 # 360/5 = 3*4*5*6/5
assert isinstance(a | normalize(1).all(), np.ndarray)
assert isinstance(a | normalize().all(), np.ndarray)
assert not np.allclose(a | normalize(), a | normalize(1))
a = np.array([[[1,2], [3,4], [5,6], [7,8.0]]])
assert np.allclose(a | normalize(1, 0).all(), np.array([[[ 1.        , -1.34164079], [ 3.        , -0.4472136 ], [ 5.        ,  0.4472136 ], [ 7.        ,  1.34164079]]]))
assert np.allclose(np.array([[[1,2], [3,4], [5,6], [7,8.0]]]) | normalize(1, 0).all(), np.array([[[ 1.        , -1.34164079], [ 3.        , -0.4472136 ], [ 5.        ,  0.4472136 ], [ 7.        ,  1.34164079]]]))
assert np.allclose(np.array([[[1,2], [3,4], [5,6], [7,8.0]]]) | normalize(1, 1).all(), np.array([[[1.        , 0.        ], [3.        , 0.33333333], [5.        , 0.66666667], [7.        , 1.        ]]]))

In [102]:
#export
class branch(BaseCli):
    def __init__(self, f, f1, f2):
        """Works like an if statement, for when you don't want to make a separate
function as it's too time consuming.

    3 | branch(lambda x: x>2, lambda x: x+4, lambda x: x+5) # returns 7
    3 | branch(op()>2, op()+4, op()+5)                      # returns 7
    3 | branch("x>2", "x+4", "x+5")                         # returns 7

    3 | aS(lambda x: (x + 4) if (x > 2) else (x + 5))       # returns 7

So all of them kinda does the same thing as the 4th line. Is it worth it? Debatable, but I've
had so many times that I have to wrap things in parenthesis around expressions to make sure
it's not doing anything weird and that takes long enough to disrupt my thought process
that I kinda have to make this

:param f: predicate function. If returns True, use the first function (f1), else use the second function (f2)"""
        self.f = f; self._fC = cli.fastF(f)
        self.f1 = f1; self._fC1 = cli.fastF(f1)
        self.f2 = f2; self._fC2 = cli.fastF(f2)
    def __ror__(self, it): return self._fC1(it) if self._fC(it) else self._fC2(it)

In [103]:
assert 3 | branch(lambda x: x>2, lambda x: x+4, lambda x: x+5) == 7
assert 3 | branch("x>2", "x+4", "x+5") == 7
assert 3 | branch(cli.op()>2, cli.op()+4, cli.op()+5) == 7
assert 3 | cli.aS(lambda x: x+4 if x > 2 else x+5) == 7

In [26]:
!../../export.py cli/utils --upload=True

./export started up - /home/quang/miniconda3/envs/torch/bin/python3
----- exportAll
16076   0   61%   
10444   1   39%   
Found existing installation: k1lib 1.7
Uninstalling k1lib-1.7:
  Successfully uninstalled k1lib-1.7
Looking in indexes: https://pypi.org/simple, http://10.104.0.3:3141/
Processing /home/quang/k1lib
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: k1lib
  Building wheel for k1lib (setup.py) ... [?25ldone
[?25h  Created wheel for k1lib: filename=k1lib-1.7-py3-none-any.whl size=5103384 sha256=b087769196aca5501f351fbd9351a71c6c3dc05bcc151f56e61c81ec2db540e3
  Stored in directory: /tmp/pip-ephem-wheel-cache-9rhfjz5a/wheels/11/94/07/711323eb4091c7ef1b180ccc3793fc75a96521821bdd2932ac
Successfully built k1lib
Installing collected packages: k1lib
Successfully installed k1lib-1.7


In [64]:
!../../export.py cli/utils

./export started up - /home/quang/miniforge3/bin/python
----- exportAll
16748   0   60%   
11071   1   40%   
installing...
Found existing installation: k1lib 1.8
Uninstalling k1lib-1.8:
  Successfully uninstalled k1lib-1.8
[33mDEPRECATION: Loading egg at /home/quang/miniforge3/lib/python3.12/site-packages/aigu-0.1-py3.12.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0mLooking in indexes: https://pypi.org/simple, http://10.104.0.3:3141/
Processing /home/quang/k1lib
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: k1lib
  Building wheel for k1lib (setup.py) ... [?25ldone
[?25h  Created wheel for k1lib: filename=k1lib-1.8-py3-none-any.whl size=5134291 sha256=2c17cc8639da43d0b8d9b0cfb8e002d5c94e67346b897c5b0edfa0f992c8efdc
  Stored in directory: /tmp/pip-ephem-wheel-cache-mht5c3u7/wheels/b5/

In [60]:
!../../export.py cli/utils --bootstrap=True

Traceback (most recent call last):
  File "/home/kelvin/repos/labs/k1lib/k1lib/cli/../../export.py", line 10, in <module>
    try: from k1lib.imports import *; hasK1 = True
  File "/home/kelvin/repos/labs/k1lib/k1lib/__init__.py", line 9, in <module>
    from . import cli
  File "/home/kelvin/repos/labs/k1lib/k1lib/cli/__init__.py", line 17, in <module>
    from .utils import *
  File "/home/kelvin/repos/labs/k1lib/k1lib/cli/utils.py", line 339, in <module>
    if hasPandas: a.append(pd.core.arraylike.OpsMixin)                               # clipboard
AttributeError: module 'pandas' has no attribute 'core'
./export started up - /home/kelvin/anaconda3/envs/ray2/bin/python3
----- bootstrapping
Current dir: /home/kelvin/repos/labs/k1lib, /home/kelvin/repos/labs/k1lib/k1lib/cli/../../export.py
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 1.6
Uninstalling k1lib-1.6:
  Successfully uninstalled k1lib-1.6
running install
running bdist_egg
runni