In [1]:
#export
from typing import List, Iterator, Any, NewType, TypeVar, Generic
import k1lib.cli as cli; from numbers import Number
import k1lib, itertools, copy, xml, warnings, traceback, sys, random, ast, time; import numpy as np
from collections import deque
import xml.etree.ElementTree
try: import torch; hasTorch = True
except: hasTorch = False; torch = k1lib.dep.torch
try: import pandas as pd; pd.core; hasPandas = True
except: hasPandas = False
__all__ = ["BaseCli", "Table", "T", "fastF", "yieldT",
           "serial", "oneToMany", "mtmS"]

In [2]:
cli.init.patchNumpy()

In [3]:
#export
settings = k1lib.Settings()
atomic = k1lib.Settings()
settings.add("atomic", atomic, "classes/types that are considered atomic and specified cli tools should never try to iterate over them")
settings.add("defaultDelim", "\t", "default delimiter used in-between columns when creating tables. Defaulted to tab character.")
settings.add("defaultIndent", "  ", "default indent used for displaying nested structures")
settings.add("strict", False, "turning it on can help you debug stuff, but could also be a pain to work with")
settings.add("inf", float("inf"), "infinity definition for many clis. Here because you might want to temporarily not loop things infinitely")
k1lib.settings.add("cli", settings, "from k1lib.cli module")

Settings:                                                               
- displayCutoff       = 50                                              ​cutoff length when displaying a Settings object                                                                                                                                                                                                      
- svgScale            = 0.7                                             ​default svg scales for clis that displays graphviz graphs                                                                                                                                                                                            
- wd                  = /home/kelvin/repos/labs/k1lib/k1lib/cli         ​default working directory, will get from `os.getcwd()`. Will update using `os.chdir()` automatically when changed                                                                                                       

In [39]:
#export
yieldT = object()
def patchDefaultDelim(st:str):
    """
:param s:
    - if not None, returns self
    - else returns the default delimiter in :attr:`~k1lib.settings`"""
    return settings.defaultDelim if st is None else st
def patchDefaultIndent(st:str):
    """
:param s:
    - if not None, returns self
    - else returns the default indent character in :attr:`~k1lib.settings`"""
    return settings.defaultIndent if st is None else st

In [5]:
#export
T = TypeVar("T")
"""Generic type variable"""
class _MetaType(type):
    def __getitem__(self, generic):
        d = {"__args__": generic, "_n": self._n, "__doc__": self.__doc__}
        return _MetaType(self._n, (), d)
    def __repr__(self):
        def main(self):
            def trueName(o):
                if isinstance(o, _MetaType): return main(o)
                try: return o.__name__
                except: return f"{o}"
            if hasattr(self, "__args__"):
                if isinstance(self.__args__, tuple):
                    return f"{self._n}[{', '.join([trueName(e) for e in self.__args__])}]"
                else: return f"{self._n}[{trueName(self.__args__)}]"
            return self._n
        return main(self)
def newTypeHint(name, docs=""):
    """Creates a new type hint that can be sliced and yet still looks fine
in sphinx. Crudely written by my poorly understood idea of Python's
metaclasses. Seriously, this shit is bonkers, read over it https://stackoverflow.com/questions/100003/what-are-metaclasses-in-python

Example::

    Table = newTypeHint("Table", "some docs")
    Table[int] # prints out as "Table[int]", and sphinx fell for it too
    Table[Table[str], float] # prints out as "Table[Table[str], float]"
"""
    return _MetaType(name, (), {"_n": name, "__doc__": docs})
#Table = newTypeHint("Table", """Essentially just Iterator[List[T]]. This class is just here so that I can generate the docs with nicely formatted types like "Table[str]".""")
#Table = NewType("Table", List)
class Table(Generic[T]):
    """Essentially just Iterator[List[T]]. This class is just here so that I can generate the docs with nicely formatted types like "Table[str]"."""
    pass
Table._name = "Table"
#Table.__module__ = "cli"
class Row(list):
    """Not really used currently. Just here for potential future feature"""
    pass

In [6]:
#export
_jsFAuto = k1lib.AutoIncrement(prefix=f"_jsF_{random.randint(100, 999)}_{round(time.time())}_")
_jsDAuto = k1lib.AutoIncrement(prefix=f"_jsD_{random.randint(100, 999)}_{round(time.time())}_")
class ArrayOptException(Exception): pass
class BaseCli:
    """A base class for all the cli stuff. You can definitely create new cli tools that
have the same feel without extending from this class, but advanced stream operations
(like ``+``, ``&``, ``.all()``, ``|``) won't work.

At the moment, you don't have to call super().__init__() and super().__ror__(),
as __init__'s only job right now is to solidify any :class:`~k1lib.cli.modifier.op`
passed to it, and __ror__ does nothing."""
    def __init__(self, fs:list=[], capture=False):
        """Not expected to be instantiated by the end user.

**fs param**

Expected to use it like this::

    class A(BaseCli):
        def __init__(self, f):
            fs = [f]; super().__init__(fs); self.f = fs[0]

Where ``f`` is some (potentially exotic) function. This will replace f with a "normal"
function that's executable. See source code of :class:`~k1lib.cli.filt.filt` for an
example of why this is useful. Currently, it will:

- Replace with last recorded ``4 in op()``, if ``f`` is :data:`True`, because Python does
  not allow returning complex objects from __contains__ method
- Solidifies every :class:`~k1lib.cli.modifier.op`.

:param capture: whether to capture all clis to the right of it and make it accessible under capturedClis and capturedSerial properties"""
        if isinstance(fs, tuple): raise AttributeError("`fs` should not be a tuple. Use a list instead, so that new functions can be returned")
        _k1_init_l = []
        for _k1_init_f in fs: cli.op.solidify(_k1_init_f); _k1_init_l.append(_k1_init_f) # this is supposed to turn the exotic function into a normal function and leave normal functions alone. Purposefully don't do heavy optimizations here, cause we might want to poke around and change its internal representation
        fs.clear(); fs.extend(_k1_init_l);
        self.capture = capture; self._capturedClis = []; self._capturedSerial = None
    @property
    def capturedClis(self):
        if isinstance(self._capturedClis, list):
            ans = []
            for e in self._capturedClis: ans.append(cli.op.solidify(e))
            self._capturedClis = tuple(ans)
        return self._capturedClis
    @property
    def capturedSerial(self):
        if not self.capture: return None
        if self._capturedSerial is None: self._capturedSerial = serial(*self.capturedClis)
        return self._capturedSerial
    def hint(self, _hint:"cli.typehint.tBase"):
        """Specifies output type hint."""
        self._hint = _hint; return self
    @property
    def hasHint(self): return "_hint" in self.__dict__ and self._hint is not None
    def _typehint(self, inp:"cli.typehint.tBase"=None) -> "cli.typehint.tBase": return cli.typehint.tAny() if "_hint" not in self.__dict__ else self._hint
    def __and__(self, cli:"BaseCli") -> "oneToMany":
        """Duplicates input stream to multiple joined clis.
Example::

    # returns [[5], [0, 1, 2, 3, 4]]
    range(5) | (shape() & iden()) | deref()

Kinda like :class:`~k1lib.cli.modifier.apply`. There're just multiple ways of doing
this. This I think, is more intuitive, and :class:`~k1lib.cli.modifier.apply` is more
for lambdas and columns mode. Performances are pretty much identical."""
        if isinstance(self, oneToMany): return self._copy()._after(cli)
        if isinstance(cli, oneToMany): return cli._copy()._before(self)
        return oneToMany(self, cli)
    def __add__(self, cli:"BaseCli") -> "mtmS":
        """Parallel pass multiple streams to multiple clis.
Example::

    # returns [8, 15]
    [2, 3] | ((op() * 4) + (op() * 5)) | deref()"""
        if isinstance(self, mtmS): return self._copy()._after(cli)
        if isinstance(cli, mtmS): return cli._copy()._before(self)
        return mtmS(self, cli)
    def all(self, n:int=1) -> "BaseCli":
        """Applies this cli to all incoming streams.
Example::

    # returns (3,)
    torch.randn(3, 4) | toMean().all() | shape()
    # returns (3, 4)
    torch.randn(3, 4, 5) | toMean().all(2) | shape()

:param n: how many times should I chain ``.all()``?"""
        if n < 0: raise AttributeError(f"Does not make sense for `n` to be \"{n}\"")
        s = self
        for i in range(n): s = cli.apply(s)
        return s
    def __or__(self, cli_) -> "BaseCli": # cli is guaranteed (by typical usage, not law) that it's a BaseCli
        """Joins clis end-to-end.
Example::

    c = apply(op() ** 2) | deref()
    # returns [0, 1, 4, 9, 16]
    range(5) | c"""
        if not isinstance(self, cli.op) and hasattr(self, "capture") and self.capture: self._capturedClis.append(cli_); return self
        if isinstance(self, serial): return self._after(cli_)
        if isinstance(cli_, serial): return cli_._before(self)
        return serial(self, cli_)
    def __ror__(self, it): return NotImplemented
    def f(self):
        """Creates a normal function :math:`f(x)` which is equivalent to
``x | self``."""
        return lambda it: self.__ror__(it)
    def __lt__(self, it):
        """Backup pipe symbol `>`, purely for style, so that you can do something like
this::

    range(4) > file("a.txt")"""
        return self.__ror__(it)
    def __call__(self, it, *args):
        """Another way to do ``it | cli``. If multiple arguments are fed, then the
argument list is passed to cli instead of just the first element. Example::

    @applyS
    def f(it):
        return it
    f(2) # returns 2
    f(2, 3) # returns [2, 3]"""
        if len(args) == 0: return self.__ror__(it)
        else: return self.__ror__([it, *args])
    def __neg__(self):
        """Alias for __invert__, for clis that support inverting stuff."""
        return ~self
    def _all_array_opt(self, it, level:int):
        """Array types optimization for ``operator.all(level)``.

Essentially, a lot of times, I'm trying to do ``array | op()[3].all()``,
or ``array | transpose().all()``. But without this optimization, that ``.all()``
function kinda loops through each element and operates on them in vanilla Python,
which is super slow. So, this is a mechanism to speed it up. Here's how it works::

    # you wrote this
    array | operator.all() | deref()
    # apply() detects that you're trying to operate on an array type. It then figures
    # out how many nested apply() levels are there. In this case it's 1, so apply() returns this instead
    operator._all_array_opt(array, 1)
    # if that throws an error or returns NotImplemented, then it'll just loop through the array normally
    
    # if you wrote this instead
    array | operator.all(3) | deref()
    # or this
    array | apply(apply(operator.all())) | deref()
    # apply() will try to execute this instead
    operator._all_array_opt(array, 3)

Also, if the operator is a complex one, made of an entire pipeline, then ``serial`` can break
them apart and do this kind of optimization on each simple operator like this::

    operator = op()[3] | transpose()
    array | operator.all() | deref()
    # that gets transformed into this
    array | op()[3].all() | transpose().all() | deref()
    # then, array() will be called 2 times
    arr2 = op()[3]._all_array_opt(array, 1)
    transpose()._all_array_opt(arr2, 1)

It also works on something more complicated and nested like this::

    # returns 
    np.random.randn(3,4,5,6,7,8) | apply(transpose().all(3) | item()) | shape()

This breakdown also happens with op() (anticipated feature, not implemented yet)::

    array | op()[3][:4].all() | deref()
    # this will be broken down into
    array | op()[3].all() | op()[:4].all() | deref()
    # each piece will now have a chance to optimize the array structure independently,
    # so even if op()[:4] can't be done, op()[3] still have a chance to do the C-optimized version

Why don't I build a more standardized structure for these optimization passes? Well
I did, along the lines of LLVM. But, the whole optimization process kinda takes a long
time and I'm not sure if it's truly flexible for the kinds of workloads that I'm thinking
about. So, I'll just do this quick dumb optimization hack to get it over with, and when
I can think more clearly about this, I might move this mechanism back to LLVM."""
        return NotImplemented
    def _jsF(self, meta):
        """JS transpiler default function. See "JS transpiler" section in the docs"""
        return NotImplemented
    def _pyF(self, meta):
        """Cli to Python transpiler default function."""
        return NotImplemented
    def _cppF(self, meta):
        """C++ transpiler default function"""
        return NotImplemented
    def _javaF(self, meta):
        """Java transpiler default function"""
        return NotImplemented
    def _sqlF(self, meta):
        """SQL transpiler default function"""
        return NotImplemented

In [7]:
assert [2, 3] | ((cli.op() * 4) + (cli.op() * 5)) | cli.deref() == [8, 15]
assert torch.randn(3, 4) | cli.toMean().all() | cli.shape() == (3,)
assert torch.randn(3, 4, 5) | cli.toMean().all(2) | cli.shape() == (3, 4)
c = cli.apply(cli.op() ** 2) | cli.deref()
assert range(5) | c == [0, 1, 4, 9, 16]
assert (range(4) > cli.file("test/init.txt")) == "test/init.txt"
assert np.random.randn(3,4,5,6,7,8) | cli.apply(cli.transpose().all(3) | cli.item()) | cli.op().shape == (3, 5, 6, 8, 7)

In [8]:
#export
def _k1_init_frames():
    _k1_init_frames_count = 0
    try:
        while True:
            yield sys._getframe(_k1_init_frames_count) # `sys._getframe()` trick stolen from pd.DataFrame.query
            _k1_init_frames_count += 1
    except: pass
def _k1_global_frame():
    try:
        _k1_init_frames_ans = {}
        for _k1_init_frames_frame in reversed(list(_k1_init_frames())):
            _k1_init_frames_ans = {**_k1_init_frames_ans, **_k1_init_frames_frame.f_locals}
        return _k1_init_frames_ans
    except: return {}

In [28]:
#export
def fastF(c, x=None):
    """Tries to figure out what's going on, is it a normal function, or an applyS,
or a BaseCli, etc., and return a really fast function for execution. Example::

    # both returns 16, fastF returns "lambda x: x**2", so it's really fast
    fastF(op()**2)(4)
    fastF(applyS(lambda x: x**2))(4)

At the moment, parameter ``x`` does nothing, but potentially in the future, you can
pass in an example input to the cli, so that this returns an optimized, C compiled
version.

:param x: sample data for the cli"""
    if isinstance(c, str):
        _k1_expr = ast.parse(c).body[0].value
        if isinstance(_k1_expr, ast.Lambda): return fastF(eval(c, _k1_global_frame()))
        else: return fastF(eval(f"lambda x: {c}", _k1_global_frame()))
    if isinstance(c, cli.op): return c.ab_fastF()
    if isinstance(c, cli.applyS):
        f = fastF(c.f)
        if len(c.args) == 0 and len(c.kwargs) == 0: return f
        else: return lambda x, *args, **kwargs: f(x, *c.args, **c.kwargs)
    if isinstance(c, BaseCli): return c.__ror__
    return c

In [32]:
assert fastF(cli.op() ** 2)(4) == 16
assert fastF("x**2")(4) == 16
assert fastF("lambda x: x**2")(4) == 16
assert fastF("lambda x=3: x**2")(4) == 16
assert fastF("lambda x=3: x**2")() == 9

In [22]:
#export
def dfGuard(x):
    """If input is a pandas dataframe, then return a regular table instead, to interopt well with
clis. Of course, this is not very performant because this will likely return an object array, which
can't use C-accerated functions. If the clis have a faster way of doing it then it shouldn't use this"""
    return x.to_numpy() if hasPandas and isinstance(x, pd.core.frame.DataFrame) else x
def preprocessPd(it, col:"int|None", f, farr=None):
    """Given either a series or a dataframe, a function and a column, return f(it[:,col]) 1d numpy array.

:param f: eltwise operation. Can be vectorized
:param farr: explicitly vectorized operation. Optional"""
    ndim = 1 if isinstance(it, pd.core.series.Series) else len(it | cli.shape())
    if ndim == 1:
        if col is not None: raise ValueError("Can't apply to Series as .col is not None. Use a DataFrame or set .col to None")
        try: return (farr or f)(it)
        except: return np.array([f(e) for e in it])
    if ndim >= 2:
        if col is None: raise ValueError("Can't apply to DataFrame as .col is not None. Use a Series or set .col to some value")
        s = it[list(it)[col]]
        try: return (farr or f)(s)
        except: return np.array([f(e) for e in s])

In [23]:
df1 = pd.DataFrame({"A": 1.0, "B": pd.Timestamp("20130102"), "C": pd.Series(1, index=list(range(4)), dtype="float32"), "D": np.array([3] * 4, dtype="int32"), "E": pd.Categorical(["test", "train", "test", "train"]), "F": "foo",})
assert preprocessPd(df1, 3, lambda x: x**2) | cli.shape() == (4,)
assert preprocessPd(df1["D"], None, lambda x: x**2) | cli.shape() == (4,)
assert (preprocessPd(df1, 4, lambda x: x.startswith("te")) == np.array([True, False, True, False])).all()
try: preprocessPd(df1["D"], 3, lambda x: x**2); assert False
except ValueError: pass

In [34]:
#export
def checkRor(c):
    if isinstance(c, BaseCli): return c
    if hasattr(c, "__ror__"): return cli.aS(c.__ror__)
    if callable(c): return cli.aS(c)
    if isinstance(c, str): return cli.aS(fastF(c))
    raise Exception(f"Trying to add an operator to the pipeline, but the given object is not derived from BaseCli nor does it define a __ror__ method")
class serialRepeat(BaseCli):
    def __init__(self, f, n:int):
        self.f = f; self.n = n; self._fC = fastF(f)
    def __ror__(self, it):
        f = self._fC
        for i in range(self.n): it = f(it)
        return it
    def _jsF(self, meta):
        f = self.f; fIdx = _jsFAuto(); dataIdx = _jsDAuto(); res = k1lib.kast.asyncGuard(k1lib.kast.prepareFunc3(f, meta))
        if res is NotImplemented: raise Exception(f"{cli.__class__} can't be transpiled into js. Either it doesn't make sense, or it hasn't been built yet")
        header, fn, _async = res
        return f"""\
{header}\n{fIdx} = {'async ' if _async else ''}({dataIdx}) => {{
    for (let i = 0; i < {self.n}; i++) {{
        {dataIdx} = {'await ' if _async else ''}{fn}({dataIdx});
    }}
    return {dataIdx};
}}""", fIdx
class serial(BaseCli):
    def __init__(self, *clis:List[BaseCli]):
        """Merges clis into 1, feeding end to end. Used in chaining clis
together without a prime iterator. Meaning, without this, stuff like this
fails to run::

    [1, 2] | a() | b() # runs
    c = a() | b(); [1, 2] | c # doesn't run if this class doesn't exist"""
        fs = [checkRor(c) for c in clis]; super().__init__(fs); self.clis = fs; self._runOpt()
    def _runOpt(self):
        self._hasTrace = any(isinstance(c, cli.trace) for c in self.clis)
        self._cliCs = [fastF(c) for c in self.clis]; return self
    def _typehint(self, inp=None):
        for c in self.clis: inp = c._typehint(inp) or cli.typehint.tAny()
        return inp
    def __ror__(self, it:Iterator[Any]) -> Iterator[Any]:
        if self._hasTrace: # slower, but tracable
            for cli in self.clis: it = it | cli
        else: # faster, but not tracable
            for cli in self._cliCs: it = cli(it)
        return it
    def _before(self, c): return serial(checkRor(c), *self.clis)
    def _after (self, c): return serial(*self.clis, checkRor(c))
    def _jsF(self, meta):
        headers = []; fns = []; asyncs = []; fIdx = _jsFAuto(); dataIdx = _jsDAuto()
        for cli in self.clis:
            res = k1lib.kast.asyncGuard(cli._jsF(meta))
            if res is NotImplemented: raise Exception(f"{cli.__class__} can't be transpiled into js. Either it doesn't make sense, or it hasn't been built yet")
            header, fn, _async = res
            headers.append(header); fns.append(fn); asyncs.append(_async)
        body = dataIdx
        for fn, _async in zip(fns, asyncs): body = f"{'await ' if _async else ''}{fn}({body})"
        return "\n".join(headers) + f"""\n{fIdx} = {'async ' if any(asyncs) else ''}({dataIdx}) => {{ return {body}; }};""", fIdx
    @staticmethod
    def repeat(f, n:int):
        """Executes this function over and over again for n times.
Example::

    # returns 6561, or ((3^2)^2)^2
    3 | serial.repeat(op()**2, 3)

Of course, you can also do something like this::

    3 | serial(*[lambda x: x**2]*3)

And it would achieve the same result, but using this method, you can
vary n if you were to transpile it to JS.

:param f: function to execute
:param n: how many times to execute this function serially"""
        return serialRepeat(f, n)

In [43]:
@cli.applyS
def f(it): return it
assert f(2) == 2
assert f(2, 3) == [2, 3]
assert range(5) | (cli.shape() & cli.iden()) | cli.deref() == [(5,), [0, 1, 2, 3, 4]]
assert isinstance([np.random.randn(2, 3, 4)] | (cli.item() | serial(cli.shape())), tuple)
assert 3 | serial(*[lambda x: x**2]*3) == 6561
assert 3 | serial.repeat("x**2", 3) == 6561

In [12]:
# examples in cli rst docs
assert cli.shape()(np.random.randn(2, 3, 5)) == (2, 3, 5)
assert [np.random.randn(2, 3, 5)] | (cli.item() | cli.shape()) == (2, 3, 5)

In [13]:
#export
atomic.add("baseAnd", (Number, np.number, str, dict, bool, bytes, list, tuple, *([torch.Tensor] if hasTorch else []), np.ndarray, xml.etree.ElementTree.Element), "used by BaseCli.__and__")
def addAtomic(klass):
    atomic.baseAnd = (*atomic.baseAnd, klass)
    atomic.deref = (*atomic.deref, klass)
def _iterable(it):
    try: iter(it); return True
    except: return False
class oneToMany(BaseCli):
    def __init__(self, *clis:List[BaseCli]):
        """Duplicates 1 stream into multiple streams, each for a cli in the
list. Used in the "a & b" joining operator. See also: :meth:`BaseCli.__and__`"""
        fs = [checkRor(c) for c in clis]; super().__init__(fs); self.clis = fs; self._cache()
    def _typehint(self, inp):
        ts = []
        for f in self.clis:
            try: ts.append(f._typehint(inp))
            except: ts.append(cli.typehint.tAny())
        return cli.typehint.tCollection(*ts).reduce()
    def __ror__(self, it:Iterator[Any]) -> Iterator[Iterator[Any]]:
        if isinstance(it, atomic.baseAnd) or isinstance(it, k1lib.cli.splitSeek) or not _iterable(it):
            for cli in self._cliCs: yield cli(it)
        else:
            its = itertools.tee(it, len(self.clis))
            for cli, it in zip(self._cliCs, its): yield cli(it)
    def _cache(self): self._cliCs = [fastF(c) for c in self.clis]; return self
    def _before(self, c): self.clis = [checkRor(c)] + self.clis; return self._cache()
    def _after(self, c): self.clis = self.clis + [checkRor(c)]; return self._cache()
    def _copy(self): return oneToMany(*self.clis)
    def _jsF(self, meta):
        headers = []; fns = []; asyncs = []; fIdx = _jsFAuto(); dataIdx = _jsDAuto()
        for cli in self.clis:
            res = k1lib.kast.asyncGuard(cli._jsF(meta))
            if res is NotImplemented: raise Exception(f"{cli.__class__} can't be transpiled into js. Either it doesn't make sense, or it hasn't been built yet")
            header, fn, _async = res
            headers.append(header); fns.append(fn); asyncs.append(_async)
        body = "[" + ", ".join([f"{'await ' if _async else ''}{fn}({dataIdx})" for fn, _async in zip(fns, asyncs)]) + "]"
        return "\n".join(headers) + f"""\n{fIdx} = {'async ' if any(asyncs) else ''}({dataIdx}) => {body};""", fIdx

In [14]:
assert oneToMany(cli.iden(), cli.wrapList())._typehint(str) == cli.typehint.tCollection(str, cli.typehint.tList(str))

In [15]:
#export
class mtmS(BaseCli):
    def __init__(self, *clis:List[BaseCli]):
        """Applies multiple streams to multiple clis independently. Used in
the "a + b" joining operator. See also: :meth:`BaseCli.__add__`.

Weird name is actually a shorthand for "many to many specific"."""
        fs = [checkRor(c) for c in clis]; super().__init__(fs=fs); self.clis = fs; self._cache()
    def _inpTypeHintExpand(self, t):
        n = len(self.clis);
        if isinstance(t, (cli.typehint.tCollection, *cli.typehint.tListIterSet, cli.typehint.tArrayTypes)): return t.expand(n)
        else: return [cli.typehint.tAny()]*n
    def _typehint(self, t):
        n = len(self.clis); outTs = []
        for c, t in zip(self.clis, self._inpTypeHintExpand(t)):
            try: outTs.append(c._typehint(t))
            except: outTs.append(cli.typehint.tAny())
        return cli.typehint.tCollection(*outTs).reduce()
    def _cache(self): self._cliCs = [fastF(c) for c in self.clis]; return self
    def _before(self, c): self.clis = [checkRor(c)] + self.clis; return self._cache()
    def _after (self, c): self.clis = self.clis + [checkRor(c)]; return self._cache()
    def __ror__(self, its:Iterator[Any]) -> Iterator[Any]:
        for cli, it in zip(self._cliCs, its): yield cli(it)
    @staticmethod
    def f(f, i:int, n:int=100):
        """Convenience method, so
that this::

    mtmS(iden(), op()**2, iden(), iden(), iden())
    # also the same as this btw:
    (iden() + op()**2 + iden() + iden() + iden())

is the same as this::

    mtmS.f(op()**2, 1, 5)

Example::

    # returns [5, 36, 7, 8, 9]
    range(5, 10) | mtmS.f(op()**2, 1, 5) | deref()

:param i: where should I put the function?
:param n: how many clis in total? Defaulted to 100"""
        return mtmS(*([cli.iden()]*i + [f] + [cli.iden()]*(n-i-1)))
    def _copy(self): return mtmS(*self.clis)
    def _jsF(self, meta):
        headers = []; fns = []; asyncs = []; fIdx = _jsFAuto(); dataIdx = _jsDAuto()
        for cli in self.clis:
            res = k1lib.kast.asyncGuard(cli._jsF(meta))
            if res is NotImplemented: raise Exception(f"{cli.__class__} can't be transpiled into js. Either it doesn't make sense, or it hasn't been built yet")
            header, fn, _async = res
            headers.append(header); fns.append(fn); asyncs.append(_async)
        body = "[" + ", ".join([f"{'await ' if _async else ''}{fn}({dataIdx}[{i}])" for i, (fn, _async) in enumerate(zip(fns, asyncs))]) + "]"
        return "\n".join(headers) + f"""\n{fIdx} = {'async ' if any(asyncs) else ''}({dataIdx}) => {body};""", fIdx

In [16]:
assert len((cli.iden() + cli.op()**2 + cli.iden() + cli.iden() + cli.iden()).clis) == 5
assert len(mtmS(cli.iden(), cli.op()**2, cli.iden(), cli.iden(), cli.iden()).clis) == 5
assert range(5, 10) | mtmS.f(cli.op()**2, 1, 5) | cli.deref() == [5, 36, 7, 8, 9]

In [17]:
#export
def patchNumpy():
    """Patches numpy arrays and data types, so that piping like
this work::

    a = np.random.randn(3)
    a | shape() # returns (3,)"""
    try:
        if np._k1_patched: return
    except: pass
    try:
        import forbiddenfruit, inspect; #forbiddenfruit.reverse(np.ndarray, "__or__") # old version
        oldOr = np.ndarray.__or__
        def _newNpOr(self, v):
            if isinstance(v, BaseCli): return NotImplemented
            try: return oldOr(self, v)
            except: warnings.warn(traceback.format_exc())
        forbiddenfruit.curse(np.ndarray, "__or__", _newNpOr)
        a = [getattr(np, dk) for dk in np.__dict__.keys()] # patching all numpy's numeric types
        for _type in [x for x in a if inspect.isclass(x) and issubclass(x, np.number) and not issubclass(x, np.integer)]:
            _oldOr = _type.__or__
            def _typeNewOr(self, v):
                if isinstance(v, BaseCli): return NotImplemented
                try: return _oldOr(self, v)
                except: warnings.warn(traceback.format_exc())
            forbiddenfruit.curse(_type, "__or__", _typeNewOr)
        np._k1_patched = True
    except Exception as e: warnings.warn(f"Tried to patch __or__ operator of built-in type `np.ndarray` but can't because: {e}")

In [18]:
#export
dict_keys = type({"a": 3}.keys());   oldDKOr = dict_keys.__or__
dict_items = type({"a": 3}.items()); oldDIOr = dict_items.__or__
oldSetOr = set.__or__
def patchDict():
    """Patches dictionaries's items and keys, so that piping
works::

    d = {"a": 3, "b": 4}
    d.keys() | deref() # returns ["a", "b"]
    d.items() | deref() # returns [["a", 3], ["b", 4]]"""
    try:
        if np._k1_dict_patched: return
    except: pass
    try:
        import forbiddenfruit, traceback
        def _newDOr(self, v):
            """Why is this so weird? For some reason, if you patch dict_keys, you will
            also patch dict_items. So, if you were to have 2 functions, one for each,
            then they will override each other. The way forward is to have 1 single
            function detect whether it's dict_keys or dict_items, and call the correct
            original function. So why are there 2 curses? Well cause I'm lazy to check
            for this behavior in multiple python versions, so just have 2 to make sure."""
            if isinstance(v, BaseCli): return NotImplemented
            try:
                # print(self, type(self), v, type(v))
                if isinstance(self, dict_keys): return oldDKOr(self, v)
                elif isinstance(self, dict):
                    if isinstance(v, dict_keys): return oldSetOr(set(self.keys()), set(v))
                    return oldDIOr(self, v)
                elif isinstance(self, set):
                    if isinstance(v, dict_keys): return oldSetOr(self, set(v))
                    return oldSetOr(self, v)
            except:
                print(self, type(self), v, type(v))
                warnings.warn(traceback.format_exc())
                return NotImplemented
        forbiddenfruit.curse(dict_keys, "__or__", _newDOr)
        forbiddenfruit.curse(dict_items, "__or__", _newDOr)
        np._k1_dict_patched = True
    except Exception as e: warnings.warn(f"Tried to patch __or__ operator of built-in type `dict_keys` and `dict_items` but can't because: {e}")

In [1]:
!../../export.py cli/init --upload=True

./export started up - /home/quang/miniconda3/envs/torch/bin/python3
----- exportAll
16134   0   61%   
10444   1   39%   
Found existing installation: k1lib 1.7
Uninstalling k1lib-1.7:
  Successfully uninstalled k1lib-1.7
Looking in indexes: https://pypi.org/simple, http://10.104.0.3:3141/
Processing /home/quang/k1lib
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: k1lib
  Building wheel for k1lib (setup.py) ... [?25ldone
[?25h  Created wheel for k1lib: filename=k1lib-1.7-py3-none-any.whl size=5105965 sha256=11cad2524f7f1577a65e67daca6c2c6a34fcba46cf582ec78c5756bdd2b371be
  Stored in directory: /tmp/pip-ephem-wheel-cache-pgff0e2i/wheels/11/94/07/711323eb4091c7ef1b180ccc3793fc75a96521821bdd2932ac
Successfully built k1lib
Installing collected packages: k1lib
Successfully installed k1lib-1.7


In [43]:
!../../export.py cli/init

2024-03-08 07:13:58,046	INFO worker.py:1458 -- Connecting to existing Ray cluster at address: 192.168.1.17:6379...
2024-03-08 07:13:58,053	INFO worker.py:1633 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
./export started up - /home/kelvin/anaconda3/envs/ray2/bin/python3
----- exportAll
15684   0   61%   
10059   1   39%   
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 1.6.2
Uninstalling k1lib-1.6.2:
  Successfully uninstalled k1lib-1.6.2
running install
running bdist_egg
running egg_info
creating k1lib.egg-info
writing k1lib.egg-info/PKG-INFO
writing dependency_links to k1lib.egg-info/dependency_links.txt
writing requirements to k1lib.egg-info/requires.txt
writing top-level names to k1lib.egg-info/top_level.txt
writing manifest file 'k1lib.egg-info/SOURCES.txt'
reading manifest file 'k1lib.egg-info/SOURCES.txt'
adding license file 'LICENSE'
writing manifest file 'k1lib.egg-info/SOURCES.txt'
install

In [37]:
!../../export.py cli/init --bootstrap=True

Traceback (most recent call last):
  File "/home/kelvin/repos/labs/k1lib/k1lib/cli/../../export.py", line 10, in <module>
    try: from k1lib.imports import *; hasK1 = True
  File "/home/kelvin/repos/labs/k1lib/k1lib/__init__.py", line 9, in <module>
    from . import cli
  File "/home/kelvin/repos/labs/k1lib/k1lib/cli/__init__.py", line 17, in <module>
    from .utils import *
  File "/home/kelvin/repos/labs/k1lib/k1lib/cli/utils.py", line 339, in <module>
    if hasPandas: a.append(pd.core.arraylike.OpsMixin)                               # clipboard
AttributeError: module 'pandas' has no attribute 'core'
./export started up - /home/kelvin/anaconda3/envs/ray2/bin/python3
----- bootstrapping
Current dir: /home/kelvin/repos/labs/k1lib, /home/kelvin/repos/labs/k1lib/k1lib/cli/../../export.py
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 1.6
Uninstalling k1lib-1.6:
  Successfully uninstalled k1lib-1.6
running install
running bdist_egg
runni