In [1]:
#export
"""This module for tools that will likely start the processing stream."""
from typing import Iterator, Union, Any
import urllib, subprocess, warnings, os, k1lib, threading
from k1lib.cli import BaseCli; import k1lib.cli as cli
__all__ = ["cat", "curl", "wget", "ls", "cmd", "requireCli"]

In [13]:
#export
def _catSimple(fileName:str=None, text:bool=True, _all:bool=False) -> Iterator[Union[str, bytes]]:
    fileName = os.path.expanduser(fileName)
    if text:
        if _all:
            with open(fileName) as f:
                lines = f.read().splitlines()
                yield lines
        else:
            with open(fileName) as f:
                while True:
                    line = f.readline()
                    if line == "": return
                    if line[-1] == "\n": yield line[:-1]
                    else: yield line
    else:
        with open(fileName, "rb") as f: yield f.read()
def _catWrapper(fileName:str, text:bool, _all:bool):
    res = _catSimple(fileName, text, _all)
    return res if text and (not _all) else next(res)
class _cat(BaseCli):
    def __init__(self, text, _all:bool): self.text = text; self._all = _all
    def __ror__(self, fileName:str) -> Union[Iterator[str], bytes]:
        return _catWrapper(fileName, self.text, self._all)
def cat(fileName:str=None, text:bool=True, _all=False):
    """Reads a file line by line.
Example::

    # display first 10 lines of file
    cat("file.txt") | headOut()
    # piping in also works
    "file.txt" | cat() | headOut()
    
    # rename file
    cat("img.png", False) | file("img2.png", False)

:param fileName: if None, then return a :class:`~k1lib.cli.init.BaseCli`
    that accepts a file name and outputs Iterator[str]
:param text: if True, read text file, else read binary file
:param _all: if True, read entire file at once, instead of reading
    line-by-line. Faster, but uses more memory. Only works with text
    mode, binary mode always read the entire file"""
    if fileName is None: return _cat(text, _all)
    else: return _catWrapper(fileName, text, _all)

In [14]:
assert len("inp.py" | cat() | cli.head() | cli.toList()) == 10
assert len(cat("inp.py") | cli.head() | cli.toList()) == 10

In [4]:
#export
def curl(url:str) -> Iterator[str]:
    """Gets file from url. File can't be a binary blob.
Example::

    # prints out first 10 lines of the website
    curl("https://k1lib.github.io/") | headOut()"""
    for line in urllib.request.urlopen(url):
        line = line.decode()
        if line[-1] == "\n": yield line[:-1]
        else: yield line
def wget(url:str, fileName:str=None):
    """Downloads a file. Also returns the file name, in case you want to pipe it
to something else.

:param url: The url of the file
:param fileName: if None, then tries to infer it from the url"""
    if fileName is None: fileName = url.split("/")[-1]
    urllib.request.urlretrieve(url, fileName)
    return fileName

In [5]:
assert curl("https://k1lib.github.io/") | cli.shape(0) > 0

In [6]:
#export
def ls(folder:str=None):
    """List every file and folder inside the specified folder.
Example::

    # returns List[str]
    ls("/home")
    # same as above
    "/home" | ls()
    # only outputs files, not folders
    ls("/home") | filt(os.path.isfile)"""
    if folder is None: return _ls()
    else: return folder | _ls()
class _ls(BaseCli):
    def __ror__(self, folder:str):
        folder = os.path.expanduser(folder.rstrip(os.sep))
        return [f"{folder}{os.sep}{e}" for e in os.listdir(folder)]

In [7]:
assert len(ls("/home")) == len("/home" | ls())
assert len(ls("/home")) > 0
assert len("/home/kelvin" | ls() | cli.filt(os.path.isfile) | cli.deref()) > 0
assert len("~" | ls() | cli.filt(os.path.isfile) | cli.deref()) > 0

In [8]:
#export
k1lib.settings.cli.add("quiet", False, "whether to mute extra outputs from clis or not")
newline = b'\n'[0]
class lazySt:
    def __init__(self, st, text:bool):
        """Converts byte stream into lazy text/byte stream, with nice __repr__."""
        self.st = st; self.text = text;
    def __iter__(self):
        f = (lambda x: x.decode("utf-8")) if self.text else (lambda x: x)
        while True:
            line = self.st.readline()
            if len(line) == 0: break
            yield f(line[:-1]) if line[-1] == newline else f(line)
    def __repr__(self): self | cli.stdout(); return ""
def executeCmd(cmd:str, inp:bytes, text):
    """Runs a command, and returns stdout and stderr streams"""
    p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=k1lib.settings.wd)
    if inp is not None: p.stdin.write(inp)
    p.stdin.close(); return lazySt(p.stdout, text), lazySt(p.stderr, text)
def printStderr(err):
    if not k1lib.settings.cli.quiet:
        e, it = err | cli.peek()
        if it != []: it | cli.insert("\nError encountered:\n") | cli.apply(k1lib.fmt.txt.red) | cli.stdout()
def requireCli(cliTool:str):
    """Searches for a particular cli tool (eg. "ls"), throws ImportError if not
found, else do nothing"""
    a = cmd(cliTool); None | a;
    if len(a.err) > 0: raise ImportError(f"""Can't find cli tool {cliTool}. Please install it first.""")

In [9]:
#export
class cmd(BaseCli):
    def __init__(self, cmd:str, mode:int=1, text=True, block=False): # 0: return (stdout, stderr). 1: return stdout, 2: return stderr
        """Runs a command, and returns the output line by line. Can pipe in some
inputs. If no inputs then have to pipe in :data:`None`. Example::

    # return detailed list of files
    None | cmd("ls -la")
    # return list of files that ends with "ipynb"
    None | cmd("ls -la") | cmd('grep ipynb$')

It might be tiresome to pipe in :data:`None` all the time. So, you can use ">"
operator to yield values right away::

    # prints out first 10 lines of list of files
    cmd("ls -la") > headOut()

If you're using Jupyter notebook/lab, then if you were to display a :class:`cmd`
object, it will print out the outputs. So, a single command ``cmd("mkdir")``
displayed at the end of a cell is enough to trigger creating the directory.

Reminder that ">" operator in here sort of has a different meaning to that of
:class:`~k1lib.cli.init.BaseCli`. So you kinda have to becareful about this::

    # returns a serial cli, cmd not executed
    cmd("ls -la") | deref()
    # executes cmd with no input stream and pipes output to deref
    cmd("ls -la") > deref()
    # returns a serial cli
    cmd("ls -la") > grep("txt") > headOut()
    # executes pipeline
    cmd("ls -la") > grep("txt") | headOut()

General advice is, right ater a :class:`cmd`, use ">", and use "|" everywhere else.

Let's see a few more exotic examples. File ``a.sh``:

.. code-block:: bash

    #!/bin/bash

    echo 1; sleep 0.5
    echo This message goes to stderr >&2
    echo 2; sleep 0.5
    echo $(</dev/stdin)
    sleep 0.5; echo 3

Examples::

    # returns [b'1', b'2', b'45', b'3'] and prints out the error message
    "45" | cmd("./a.sh", text=False) | deref()
    # returns [b'This message goes to stderr']
    "45" | cmd("./a.sh", mode=2, text=False) | deref()
    # returns [[b'1', b'2', b'45', b'3'], [b'This message goes to stderr']]
    "45" | cmd("./a.sh", mode=0, text=False) | deref()

Performance-wise, stdout and stderr will yield values right away as soon
as the process outputs it, so you get real time feedback. However, this will
convert the entire input into a :class:`bytes` object, and not feed it bit by
bit lazily, so if you have a humongous input, it might slow you down a little.

Settings:
- cli.quiet: if True, won't display errors in mode 1

:param mode: if 0, returns ``(stdout, stderr)``. If 1, returns ``stdout`` and prints
    ``stderr`` if there are any errors. If 2, returns ``stderr``
:param text: whether to decode the outputs into :class:`str` or return raw :class:`bytes`
:param block: whether to wait for the task to finish before returning to Python or not"""
        super().__init__(); self.cmd = cmd; self.mode = mode
        self.text = text; self.block = block; self.ro = k1lib.RunOnce()
    def __ror__(self, it:Union[None, str, bytes, Iterator[Any]]) -> Iterator[Union[str, bytes]]:
        """Pipes in lines of input, or if there's nothing to
pass, then pass None"""
        if not self.ro.done():
            if it != None:
                if not isinstance(it, (str, bytes)): it = it | cli.toStr() | cli.join("\n")
                if not isinstance(it, bytes): it = it.encode("utf-8")
            self.out, self.err = executeCmd(self.cmd, it, self.text); mode = self.mode
        if self.block:
            self.out = self.out | cli.deref()
            self.err = self.err | cli.deref()
        if mode == 0: return (self.out, self.err)
        elif mode == 1:
            threading.Thread(target=lambda: printStderr(self.err)).start()
            return self.out
        elif mode == 2: return self.err
    def __gt__(self, it): return None | self | it
    def __repr__(self):
        return (None | self).__repr__()

In [10]:
assert None | cmd("ls -la") | cli.shape(0) > 0
assert None | cmd("ls -la", block=True) | cli.shape(0) > 0
assert None | cmd("ls -la") | cmd('grep ipynb$') | cli.shape(0) > 0
with k1lib.captureStdout() as out: cmd("ls -la") > cli.headOut()
assert len(out()) > 3
assert type(cmd("ls -la") | cli.deref()).__name__ == "serial"
assert len(cmd("ls -la") > cli.deref()) > 0
assert None | cmd("ls -la") | cmd("grep ipynb") | cli.deref() | cli.shape(0) > 0
with k1lib.captureStdout() as out: assert "45" | cmd("test/a.sh", text=False) | cli.deref() == [b'1', b'2', b'45', b'3']
assert len(out()) > 2
assert "45" | cmd("test/a.sh", mode=2, text=False) | cli.deref() == [b'This message goes to stderr']
assert "45" | cmd("test/a.sh", mode=0, text=False) | cli.deref() == [[b'1', b'2', b'45', b'3'], [b'This message goes to stderr']]

In [16]:
!../../export.py cli/inp

Current dir: /home/kelvin/repos/labs/k1lib, ../../export.py
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 0.16a1
Uninstalling k1lib-0.16a1:
  Successfully uninstalled k1lib-0.16a1
running install
running bdist_egg
running egg_info
creating k1lib.egg-info
writing k1lib.egg-info/PKG-INFO
writing dependency_links to k1lib.egg-info/dependency_links.txt
writing requirements to k1lib.egg-info/requires.txt
writing top-level names to k1lib.egg-info/top_level.txt
writing manifest file 'k1lib.egg-info/SOURCES.txt'
reading manifest file 'k1lib.egg-info/SOURCES.txt'
adding license file 'LICENSE'
writing manifest file 'k1lib.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build
creating build/lib
creating build/lib/k1lib
copying k1lib/_learner.py -> build/lib/k1lib
copying k1lib/fmt.py -> build/lib/k1lib
copying k1lib/_context.py -> build/lib/k1lib
copying k1lib/selector.py ->