In [1]:
#export
import k1lib, json, base64, threading, time, random, struct, collections, os, sys, tempfile, contextlib, math, functools, inspect, random, pprint, html; import k1lib.cli as cli; k1 = k1lib
from collections import deque

Kinda like _basics, but here, it's assumed that the environment is fully loaded, so I can use any functionality, not just pure Python alone

In [2]:
#export
__all__ = ["log", "aes_encrypt", "aes_decrypt", "aes_encrypt_json", "aes_decrypt_json", "tempObj", "TimeSeries", "speed", "compileCExt"]

In [3]:
#export
_logObj = {"loaded": False, "logMsgs": deque(), "path": None}
def _thTarget():
    import asyncio, base64, json; from k1lib import kws
    async def main():
        async with kws.WsClient("wss://ws.logs.mlexps.com/_k1_ingest") as ws:
            while True:
                if len(_logObj["logMsgs"]) == 0: await asyncio.sleep(0.01)
                else: await ws.send(_logObj["logMsgs"].popleft())
    asyncio.new_event_loop().run_until_complete(main())
def log(path:str, obj:"any"):
    """Logs random debug statements to logs.mlexps.com server.
Example::

    k1.log("ggdrive/topic1", "some message")
    k1.log("ggdrive/topic1/sub2", {"some": "json", "object": 2})
    
    # I typically do it like this, so that I can filter down only the messages that I want based on severity
    k1.log("ggdrive/info", {"some": "json", "object": 2})
    k1.log("ggdrive/error", {"some": "json", "object": 2})

Visit the website https://logs.mlexps.com/watch/ggdrive, or
/watch/ggdrive/topic1, or /watch/ggdrive/topic1/sub2 to view all logs
coming in."""
    if not _logObj["loaded"]: _logObj["loaded"] = True; threading.Thread(target=_thTarget).start()
    if not isinstance(obj, (str, float, int)):
        obj = base64.b64encode(json.dumps(obj).encode()).decode()
    _logObj["logMsgs"].append(f"{path}/{obj}")

In [4]:
#export
if k1lib.settings.startup.import_optionals:
    try:
        from scipy import stats
        __all__.append("pValue")
        def pValue(zScore):
            """2-sided p value of a particular z score. Requires :mod:`scipy`."""
            return stats.norm.sf(abs(zScore))*2
    except: pass

In [5]:
#export
try:
    Crypto = k1lib.dep("Crypto", "pycryptodome", url="https://pycryptodome.readthedocs.io/en/latest/")
    def aes_encrypt(plaintext:bytes) -> str: Crypto.Cipher
    def aes_decrypt(plaintext:bytes) -> str: Crypto.Cipher
    def aes_encrypt_json(obj:dict) -> str: Crypto.Cipher
    def aes_decrypt_json(ciphertext:str) -> dict: Crypto.Cipher
    from Crypto.Cipher import AES
    from Crypto.Random import get_random_bytes
    from Crypto.Util.Padding import pad, unpad
    def aes_encrypt(plaintext:bytes, key:bytes=None) -> str:
        """Encrypts a message using AES.
Example::

    res = k1.aes_encrypt(b"some message") # can return '3HV7PKKQL2DLWQWBBTETQTXNMC4Q6DJ2FSS73A7NCRAX6K4ZZKXQ===='
    k1.aes_descrypt(res) # returns b"some message"

After encrypting, this is encoded using base32, ready to be used in urls. This function
is a convenience function meant for small messages here and there, and is not intended
for heavy duty encryption.

The key is automatically generated, and is configurable via ``settings.cred.aes.key``

See also: :meth:`aes_encrypt_json`

:param plaintext: plaintext to encrypt
:param key: 128 bit key, if not specified then will auto generate one on library load at ``settings.cred.aes.key`` """
        if not isinstance(plaintext, bytes): plaintext = f"{plaintext}".encode()
        cipher = AES.new(key or k1lib.settings.cred.aes.key, AES.MODE_CBC); ciphertext = cipher.encrypt(pad(plaintext, AES.block_size))
        return base64.b32encode(cipher.iv + ciphertext).decode().replace(*"/_").replace(*"+-")
    def aes_decrypt(ciphertext:str, key:bytes=None) -> bytes:
        """Decrypts a message using AES.
See :meth:`aes_encrypt` for more information.

:param ciphertext: ciphertext to decrypt
:param key: 128 bit key, if not specified then will auto generate one on library load at ``settings.cred.aes.key``"""
        ciphertext = base64.b32decode(ciphertext.replace(*"-+").replace(*"_/").encode()); iv = ciphertext[:AES.block_size]; cipher = AES.new(key or k1lib.settings.cred.aes.key, AES.MODE_CBC, iv)
        return unpad(cipher.decrypt(ciphertext[AES.block_size:]), AES.block_size)
    def aes_encrypt_json(obj:dict) -> str:
        """Encrypts a Python object using AES.
Example::

    a = k1.aes_encrypt_json({"a": 3})
    k1.aes_decrypt_json(a) # returns {"a": 3}

    k1.aes_decrypt_json(k1.aes_encrypt_json([1, 2, 3])) # returns [1, 2, 3]
    k1.aes_decrypt_json(k1.aes_encrypt_json("abc"))     # returns "abc"

See also: :meth:`aes_encrypt`"""
        return aes_encrypt(json.dumps(obj).encode())
    def aes_decrypt_json(ciphertext:str) -> dict:
        return json.loads(aes_decrypt(ciphertext).decode())
    k1lib.settings.cred.add("aes", k1lib.Settings().add("key", get_random_bytes(16), "16-byte aes key, used in aes_encrypt() and aes_decrypt()", sensitive=True), "anything related to AES block cipher")
except: pass

In [6]:
assert aes_decrypt(aes_encrypt(b"some message")) == b"some message"
assert aes_decrypt_json(aes_encrypt_json({"a": 3})) == {"a": 3}
assert aes_decrypt_json(aes_encrypt_json([1, 2, 3])) == [1, 2, 3]
assert aes_decrypt_json(aes_encrypt_json("abc")) == "abc"

In [7]:
#export
k1lib.settings.add("tempObjLifetime", 60, "Default lifetime in seconds used in k1.tempObj()");
_tempObjs = {}; _tempTimeouts = {}; _tempObj_autoInc = k1lib.AutoIncrement(prefix="_k1_tempObj_"); _tempObjThreadStarted = [False]
def tempObj(x, timeout=None):
    """Stores an object that's meant to exist for a short amount of time,
and then will be automatically deleted. Example::

    key = k1.tempObj("Suika Ibuki", 10) # stores some string that will only last for 10 seconds
    k1.tempObj(key)                     # returns "Suika Ibuki"
    time.sleep(20)
    k1.tempObj(key)                     # returns None

The default timeout value is 60 seconds, configurable in :data:`~k1lib.settings`.tempObjLifetime"""
    if not _tempObjThreadStarted[0]: k1lib.cron(delay=1)(_tempCleanupThread)
    if isinstance(x, str) and x.startswith("_k1_tempObj_"): return _tempObjs.get(x, None)
    else:
        k = _tempObj_autoInc()
        if timeout is None: timeout = k1lib.settings.tempObjLifetime
        _tempObjs[k] = x; _tempTimeouts[k] = time.time() + timeout; return k
def _tempCleanupThread():
    now = time.time()
    for k,v in list(_tempTimeouts.items()):
        if now > v: del _tempObjs[k]; del _tempTimeouts[k]

In [8]:
x = tempObj("Suika Ibuki", 1); assert tempObj(x) == "Suika Ibuki"; time.sleep(2); assert tempObj(x) is None

In [72]:
import pprint

In [None]:
#export
_time = time.time; _timeSeriesD = {}; _timeSeriesID = {}; _timeSeriesAutoInc = k1.AutoIncrement(prefix="_k1_ts_"); _timeSeriesIdxAutoInc = k1.AutoIncrement()
class TimeSeries:
    def __init__(self, name:str=None, fn:str=None, storeRaw:bool=True, retention:int=7*86400, coldStore:bool=False):
        """Manages time series data, compresses them, back them up on disk if necessary.
Example::

    ts1 = k1.TimeSeries(name="ts1")
    ts1.append(3, 4, 5) # do this anywhere you'd like. This saves 1 data point containing 3 floats to the sqlite database
    
    for i in range(600): # deposits 600 samples over 1 minute time span
        ts1.append(random.random(), random.random(), random.random())
        time.sleep(0.1)
    
    ts1.getRaw()  # returns something like [[1737213223.4139452, (3, 4, 5)], ...]
    ts1.getRate() # returns something like [(1737213313.0752494, 10.066128035852211), ...]
    ts1.getPert() # returns something like [[1737213568.9260075, [(0.009, 0.07, 0.47, 0.89, 0.99), (0.001, 0.11, 0.56, 0.90, 0.99), (0.0006, 0.08, 0.46, 0.89, 0.99)]], ...]

For :meth:`getRate`, first number is the timestamp, second is the number of data points/second.
For :meth:`getPert`, this will return the percentiles of the input data (0%, 10%, 50%, 90%, 100%) for each variable

Why does this functionality exists? Well, it's because managing time series usually involves
a lot of pain. You need to setup a time series database like Prometheus, or Postgresql to be
extra simple. But setting up all that infrastructure takes a lot of effort, and again, if it's
hard, you won't do it, or will be incentivised not to do it. So this class is meant to be an
object that manages time series data. It manages it in such a way so that you can spam this
all over the place and get lots of functionalities right out of the box, without an external
server. All data is stored in several tables inside a sqlite file. Each time series gets its
own sqlite file. Some performance numbers to keep in mind:

- Data write speed: 100k data points/s
- Data read speed:  400k data points/s
- Disk space used: 50 bytes/data point for small amounts of variables (say ~3)

Other features include the ability to auto delete old data so as not to accumulate over time.
When old data is deleted, there's also an option to save the deleted data in a separate file
for cold storage, so that it's more efficient storage-wise than sqlite, but harder to access.
Cold storage space used: 14 + nVars * 4. This is 5x smaller than sqlite for 3 variables

There will be scans every 10 seconds on another thread, that compresses the raw data into a
usable form. If there's too few data points (<20 data points), then it will skip that scan
cycle. Data (refined and raw) will be saved into a sqlite database, stored at the specified
file name ``fn``. If no file name is specified, then this will create a temp file and turn
that into a sqlite database.

For every method, you can also specify an index number::

    ts1 = k1.TimeSeries(name="ts1")
    ts1.appendIdx(2, 3, 4, 5) # index 2 with 3 values (3, 4, 5)
    
    ts1.getRaw(idx=2)  # returns all data points with idx=2, something like [[1737213223.4139452, (3, 4, 5)], ...]
    ts1.getPert(idx=2) # returns all percentiles with idx=2, something like [[1737213568.9260075, [(0.009, 0.07, 0.47, 0.89, 0.99), (0.001, 0.11, 0.56, 0.90, 0.99), (0.0006, 0.08, 0.46, 0.89, 0.99)]], ...]

This is useful in situations like when you have lots of sensors for different devices. Then
each idx can be the device id, and so you can store lots of variables in 1 go for 1 specific
device. Then you can query the time series later on for 1 specific device only.

You can get all TimeSeries via :meth:`allData`.

.. note::

    Performance details
    
    This is a more detailed section going over what actually happens underneath in case you
    care about performance. Everything is stored in sqlite. If file name is not given, then
    it still uses sqlite, but in memory instead.
    
    When a new .append() happens, no database interaction happens at all. It's simply just
    appended to a totally normal, internal list, so <1us. Then there are 2 background
    threads to help collate and store the data. One is fast (10s scan) and one is slow
    (60s scan). The fast one distributes new data points to 3 internal stacks, "rawRaw",
    "rateRaw" and "pertD". If rawRaw has any elements, it will be stored in sqlite. If
    rateRaw has at least 10 elements, it will calculate the rate and store in sqlite. If
    pertD (indexed by idx) has at least 100 elements, it will calculate percentiles and
    store in sqlite.
    
    This architecture has several ramifications:
    
    * Time taken to execute .append() is very fast
    * If no .append() are called for a long time, no sqlite queries will be executed
    * If too little .append() are called, data might not show up in rate and percentile views at all
    * Might have to wait for at least 10 seconds before .getRaw() has the newest data
    * If python process exited, there may still be data stored in memory that's not recorded on sqlite yet
    
    For the second loop, it grabs all rows from sqlite that was longer than ``retention``
    seconds ago, compresses them to an efficient binary format, then appends to the cold
    storage file. My code is not as bulletproof as sqlite, but still works fine. Because
    the loop is very slow, it shouldn't affect performance much.

:param name: just for cosmetic, to remind you what this does
:param fn: sqlite file name to store this time series data. If not specified, then stores database in memory
:param storeRaw: whether to store raw data points
:param retention: seconds before deleting old data point permanently, default 1 week
:param coldStore: if True, when data points past retention time, it will be
    packed into a single binary file for cold storage"""
        if coldStore and fn is None: raise Exception("If using cold storage, has to specify file name!") # TODO: ts1 | aS(repr), ts1 | toHtml()
        self._initialized = False; self.name = name or _timeSeriesAutoInc(); self.idx = _timeSeriesIdxAutoInc(); self.fn = fn; self.storeRaw = storeRaw; self.retention = retention; self.coldStore = coldStore; self.dbLock = threading.Lock()
        if "/" in self.name: raise Exception("Can't have forward slash in the name")
        if self.name in _timeSeriesD: raise Exception(f"Name '{self.name}' has appeared before. Please use a different name")
        self._raw = []; self._rawRaw = []; self._rateRaw = []; _timeSeriesD[self.name] = _timeSeriesID[self.idx] = self; self._setupDb(); self._pertD = collections.defaultdict(lambda: []); self._initialized = True # maps idx -> [time, values]
        self._latest = [] # latest values
    def _setupDb(self):
        fn = self.fn
        if fn is not None and os.path.exists(f"{fn}.db"): self._s = s = cli.sql(f"{fn}.db", mode="lite")["default"]
        else: self._s = s = cli.sql(":memory:" if fn is None else f"{fn}.db", mode="lite")["default"]
        s.query("CREATE TABLE IF NOT EXISTS rate (id INTEGER PRIMARY KEY AUTOINCREMENT, time INTEGER, rate REAL);"); s.query("CREATE INDEX IF NOT EXISTS rate_time ON rate (time);"); 
        s.query("CREATE TABLE IF NOT EXISTS raw (id INTEGER PRIMARY KEY AUTOINCREMENT, time INTEGER, idx INTEGER, data BLOB);"); s.query("CREATE INDEX IF NOT EXISTS raw_time ON raw (time);"); s.query("CREATE INDEX IF NOT EXISTS raw_idx ON raw (idx);"); # .data is struct.pack("ffff", values)
        s.query("CREATE TABLE IF NOT EXISTS pert (id INTEGER PRIMARY KEY AUTOINCREMENT, time INTEGER, idx INTEGER, data BLOB);"); s.query("CREATE INDEX IF NOT EXISTS pert_time ON pert (time);"); s.query("CREATE INDEX IF NOT EXISTS pert_idx ON pert (idx);"); # .data is struct.pack of [*n*[0, 10, 50, 90, 100]]
        s.query("CREATE TABLE IF NOT EXISTS tfs (id INTEGER PRIMARY KEY AUTOINCREMENT, method INTEGER, idx INTEGER, dIdx INTEGER, coeffs BLOB, mplJson TEXT, docs TEXT);"); s.query("CREATE INDEX IF NOT EXISTS tfs_idx ON tfs (idx);"); s.query("CREATE INDEX IF NOT EXISTS tfs_dIdx ON tfs (dIdx);"); # .data is struct.pack, depending on type, .method is the method to transform (0: linear, y = a*x+b)
        while s["tfs"] is None: print("."); time.sleep(0.1) # hangs until tables are created
        self._dbRaw = s["raw"]; self._dbRate = s["rate"]; self._dbPert = s["pert"]; self._dbTfs = s["tfs"]
    @staticmethod
    def allData(): return _timeSeriesD
    @staticmethod
    def allIData(): return _timeSeriesID
    @k1.cache(timeout=10, name="ts_idxs", docs="caches k1.TimeSeries idxs, aka all available idx")
    def idxs(self) -> "list[int]":
        """Grabs all available idxs"""
        if self.storeRaw: return [x[0] for x in self._dbRaw.query("select distinct idx from raw")]
        return [x[0] for x in self._dbPert.query("select distinct idx from pert")]
    @k1.cache(timeout=30, name="ts_dIdx", docs="caches k1.TimeSeries dIdx")
    def getDIdx(self, idx:int=0, timeStr:str="1 day", limit:int=10000) -> int:
        """Grabs all available dIdx of a particular idx in the past 1 day.
Internally, this scans for all raw data points and grab the largest length"""
        x = [len(values) for t, values in self.getRaw(*k1.parseTimeStr(timeStr), idx=idx, limit=limit)]
        return max(x) if len(x) > 0 else 0
    def append(self, *values): sig = "f"*len(values); self._raw.append([_time(), 0, values, struct.pack(sig, *values)]); return values
    def appendIdx(self, idx, *values): sig = "f"*len(values); self._raw.append([_time(), idx, values, struct.pack(sig, *values)]); return values
    def getLatest(self, idx:int=0) -> "float, list[float]":
        """Grabs data of current instance that has been committed. Returns something like ``(1737213223.4139452, (3, 4, 5))``. If no data, returns ``(0, [])``"""
        res = self._dbRaw.query(f"select time, data from raw where idx = {idx} order by id desc limit 1")
        if len(res) == 0: return [0, []]
        t, data = res[0]; return [t, struct.unpack((len(data)//4)*"f", data)]
    def getLatestTf(self, idx:int=0) -> "float, list[float]":
        res = self._dbRaw.query(f"select time, data from raw where idx = {idx} order by id desc limit 1")
        if len(res) == 0: return [0, []]
        t, data = res[0]; data = struct.unpack((len(data)//4)*"f", data)
        return [t, [f(x) for x,f in zip(data, [self.getTf(idx, dIdx)[4] for dIdx in range(len(data))])]]
    def setTf(self, idx:int=0, dIdx:int=0, method:int=0, coeffs:list[float]=None, mplJson:object=None, docs:str=""):
        coeffs = coeffs or [1, 0]; coeffs = struct.pack(len(coeffs)*"f", *coeffs); mplJson = json.dumps(mplJson or {}); res = self.getTf(idx, dIdx)
        if res[0] is None: self._dbTfs.query(f"insert into tfs (idx, dIdx, method, coeffs, mplJson, docs) values (?, ?, ?, ?, ?, ?)", idx, dIdx, method, coeffs, mplJson, docs)
        else: self._dbTfs.query(f"update tfs set idx = ?, dIdx = ?, method = ?, coeffs = ?, mplJson = ?, docs = ? where id = {res[0]}", idx, dIdx, method, coeffs, mplJson, docs)
    @k1.cache(timeout=30, maxsize=300, name="ts_getTf", docs="TimeSeries.getTf() cache")
    def getTf(self, idx:int=0, dIdx:int=0):
        res = self._dbTfs.query(f"select id, method, coeffs, mplJson, docs from tfs where idx = ? and dIdx = ?", idx, dIdx)
        if len(res) == 0: return [None, 0, [1, 0], {}, lambda x: x, ""]
        tfId, method, coeffs, mplJson, docs = res[0]; f = lambda x: x
        if method == 0: coeffs = struct.unpack("ff", coeffs); a, b = coeffs; f = lambda x: x*a+b
        return tfId, method, list(coeffs), json.loads(mplJson), f, docs
    def getRaw(self, startTime:int=None, stopTime:int=None, idx:int=0, limit:int=1000_000):
        """Grabs raw data of this time series. Returns something like ``[[1737213223.4139452, (3, 4, 5)], ...]`` """
        if not self.storeRaw: raise Exception(".storeRaw is False, so all raw data has been deleted")
        s = f"select time, data from raw where true"
        if idx != "all": s += f" and idx = {idx}"
        if startTime: s += f" and time >= {startTime}"
        if stopTime:  s += f" and time <  {stopTime}"
        s += f" order by time limit {limit}"; data = self._dbRaw.query(s)
        if len(data) == 0: return []
        s = (len(data[0][1])//4)*"f"; res = []
        try: # fast way, caches "s" computation
            for t, vs in data: res.append([t, struct.unpack(s, vs)])
        except: # slow way, in case number of variables are different
            for t, vs in data: res.append([t, struct.unpack((len(vs)//4)*"f", vs)])
        return res
    def getRawTf(self, startTime:int=None, stopTime:int=None, idx:int=0, limit:int=1000_000):
        """Grabs transformed data of this time series. Returns something like ``[[1737213223.4139452, (3, 4, 5)], ...]`` """
        if not self.storeRaw: raise Exception(".storeRaw is False, so all raw data has been deleted")
        res = self.getRaw(startTime, stopTime, idx, limit); maxVars = max(len(x[1]) for x in res) if len(res) else 0; tfFs = [self.getTf(idx, i)[4] for i in range(maxVars)]
        for t, values in res: yield t, [f(x) for x,f in zip(values, tfFs)]
    def getRate(self, startTime:int=None, stopTime:int=None, limit:int=10000):
        """Grabs data ingest rate of this time series. Returns something like ``[(1737213313.0752494, 10.066128035852211), ...]``"""
        s = f"select time, rate from rate where true"
        if startTime: s += f" and time >= {startTime}"
        if stopTime:  s += f" and time  <  {stopTime}"
        s += f" order by time limit {limit}"; data = self._dbRate.query(s)
        return data
    def getPert(self, startTime:int=None, stopTime:int=None, idx:int=0, limit:int=10000):
        """Grabs data percentiles of this time series. Returns something like ``[[1737213568.9260075, [(0.009, 0.07, 0.47, 0.89, 0.99), (0.001, 0.11, 0.56, 0.90, 0.99), (0.0006, 0.08, 0.46, 0.89, 0.99)]], ...]``"""
        def _batched(x): return [x[5*i:5*(i+1)] for i in range(len(x)//5)]
        s = f"select time, data from pert where idx = {idx}"
        if startTime: s += f" and time >= {startTime}"
        if stopTime:  s += f" and time <  {stopTime}"
        s += f" order by time limit {limit}"; data = self._dbPert.query(s)
        if len(data) == 0: return []
        nvars = len(data[0][1])//4; s = nvars*"f"; res = []
        try: # fast way, caches "s" computation
            for t, vs in data: x = struct.unpack(s, vs); res.append([t, [x[5*i:5*(i+1)] for i in range(nvars//5)]])
        except: # slow way, in case number of variables are different between lines
            for t, vs in data: x = struct.unpack((len(vs)//4)*"f", vs); res.append([t, [x[5*i:5*(i+1)] for i in range(len(x)//5)]])
        return res
    def _ls(self): return self.getRaw(limit=100)
    @k1.cache(timeout=60, maxsize=1000, name="ts_len", docs="caches k1.TimeSeries lengths")
    def __len__(self):
        if not self.storeRaw: raise Exception(f"TimeSeries '{self.name}'.storeRaw is False, no length available")
        res = self._dbRaw.query("""select max(id) from raw limit 1;"""); return res[0][0] if len(res) > 0 else 0
    def __repr__(self): return f"<TimeSeries name='{self.name}' fn='{self.fn}' storeRaw={self.storeRaw} retention={self.retention} coldStore={self.coldStore}>"
    def plotRaw(self, startTime:float, stopTime:float, idx:int=0, dIdx:"int|str"="all", window:int=1):
        import matplotlib.pyplot as plt; s = self; data = s.getRaw(startTime, stopTime, idx=idx) | ~cli.apply(lambda x,y: [[x, e] for e in y]) | cli.T() | cli.apply(cli.T() | (cli.apply(cli.window(10) | cli.toMean().all()) if window > 1 else cli.iden())) | cli.deref()
        with k1.mplLock:
            if len(data) == 0: return "No data available"
            if dIdx == "all": data | cli.apply(~cli.aS(plt.dplot, 7, True, ".-")) | cli.ignore()
            else: data | cli.rItem(dIdx) | ~cli.aS(plt.dplot, 7, True, ".-")
            plt.title(f"Raw '{self.name}', idx {idx}, dIdx {dIdx}, window {window}"); plt.tight_layout(); im = plt.gcf() | cli.toImg()
        return im | cli.toHtml()
    @staticmethod
    def splotRaw(name:str, startTime:float, stopTime:float, idx:int=0, dIdx:"int|str"="all", window:int=1):
        d = k1.TimeSeries.allData(); s = d.get(name, None)
        if s is None: return f"No TimeSeries with the name '{name}' found"
        return s.plotRaw(startTime, stopTime, idx=idx, dIdx=dIdx, window=window)
    def plotRawTf(self, startTime:float, stopTime:float, idx:int=0, dIdx:"int|str"="all", window:int=1):
        import matplotlib.pyplot as plt; s = self; data = s.getRawTf(startTime, stopTime, idx=idx) | ~cli.apply(lambda x,y: [[x, e] for e in y]) | cli.T() | cli.apply(cli.T() | (cli.apply(cli.window(10) | cli.toMean().all()) if window > 1 else cli.iden())) | cli.deref()
        with k1.mplLock:
            if len(data) == 0: return "No data available"
            if dIdx == "all":
                data | cli.apply(~cli.aS(plt.dplot, 7, True, ".-")) | cli.ignore()
                plt.title(f"Raw tf '{self.name}', idx {idx}, dIdx {dIdx}, window {window}")
            else:
                data | cli.rItem(dIdx) | ~cli.aS(plt.dplot, 7, True, ".-"); tfId, method, coeffs, mplJson, f, docs = self.getTf(idx, dIdx)
                if mplJson.get("header", None): plt.title(mplJson["header"])
                if mplJson.get("ylabel", None): plt.ylabel(mplJson["ylabel"])
                if mplJson.get("xlabel", None): plt.ylabel(mplJson["xlabel"])
            plt.tight_layout(); im = plt.gcf() | cli.toImg()
        return im | cli.toHtml()
    @staticmethod
    def splotRawTf(name:str, startTime:float, stopTime:float, idx:int=0, dIdx:"int|str"="all", window:int=1):
        d = k1.TimeSeries.allData(); s = d.get(name, None)
        if s is None: return f"No TimeSeries with the name '{name}' found"
        return s.plotRawTf(startTime, stopTime, idx=idx, dIdx=dIdx, window=window)
    def plotRate(self, startTime:float, stopTime:float, window:int=1):
        import matplotlib.pyplot as plt; s = self; data = s.getRate(startTime, stopTime) | (cli.T.wrap(cli.apply(cli.window(window) | cli.toMean().all())) if window > 1 else cli.iden()) | cli.T() | cli.deref()
        with k1.mplLock:
            if len(data) == 0: return "No data available"
            data | ~cli.aS(plt.dplot, 7, True, ".-"); plt.xlabel("Time"); plt.ylabel("Rate (calls/s)"); plt.title(f"Rate '{self.name}', window {window}"); plt.tight_layout(); im = plt.gcf() | cli.toImg()
        return im | cli.toHtml()
    @staticmethod
    def splotRate(name:str, startTime:float, stopTime:float, window:int=1):
        d = k1.TimeSeries.allData(); s = d.get(name, None)
        if s is None: return f"No TimeSeries with the name '{name}' found"
        return s.plotRate(startTime, stopTime, window=window)
    def plotPert(self, startTime:float, stopTime:float, idx:int=0, dIdx:"int|str"=0, window:int=1):
        import matplotlib.pyplot as plt; s = self; data = s.getPert(startTime, stopTime, idx) | cli.apply(lambda x: x[dIdx], 1) | ~cli.apply(lambda x,y: [[x, e] for e in y]) | cli.T() | cli.apply(cli.T() | (cli.apply(cli.window(window) | cli.toMean().all()) if window > 1 else cli.iden())) | cli.deref()
        with k1.mplLock:
            if len(data) == 0: return "No data available"
            data | cli.apply(~cli.aS(plt.dplot, 7, True, ".-")) | cli.ignore(); plt.legend(["0%", "10%", "50%", "90%", "100%"]); plt.xlabel("Time"); plt.ylabel("Value"); plt.title(f"Percentile '{self.name}', idx {idx}, dIdx {dIdx}, window {window}"); plt.tight_layout(); im = plt.gcf() | cli.toImg()
        return im | cli.toHtml()
    @staticmethod
    def splotPert(name:str, startTime:float, stopTime:float, idx:int=0, dIdx:int=0, window:int=1):
        d = k1.TimeSeries.allData(); s = d.get(name, None)
        if s is None: return f"No TimeSeries with the name '{name}' found"
        return s.plotPert(startTime, stopTime, idx=idx, dIdx=dIdx, window=window)
    @staticmethod
    def flask(app, **kwargs):
        """Attaches a TimeSeries management plane to a flask app.
Example::

    app = flask.Flask(__name__)
    k1.TimeSeries.flask(app)
    app.run(host="0.0.0.0", port=80)

Then, you can access the route "/k1/ts" to see an overview of all TimeSeries

:param app: flask app object
:param kwargs: extra random kwargs that you want to add to ``app.route()`` function"""
        bootstrapJs = """
async function dynamicLoad(selector, endpoint, rawHtml=null) { // loads a remote endpoint containing html and put it to the selected element. If .rawHtml is available, then don't send any request, and just use that html directly
    const elem = document.querySelector(selector); elem.innerHTML = rawHtml ? rawHtml : (await (await fetch(endpoint)).text());
    await new Promise(r => setTimeout(r, 100)); let currentScript = "";
    try { for (const script of elem.getElementsByTagName("script")) { currentScript = script.innerHTML; eval(script.innerHTML); }
    } catch (e) { console.log(`Error encountered: `, e, e.stack, currentScript); }
}"""; bootstrapHtml = f"""
<head>
    <meta charset="UTF-8"><title>DHCP low level server</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link href="https://static.aigu.vn/daisyui.css" rel="stylesheet" type="text/css" />
    <style>
        h1 {{ font-size: 2.25rem !important; line-height: 2.5rem !important; }}
        h2 {{ font-size: 1.5rem !important; line-height: 2rem !important; margin: 10px 0px !important; }}
        h3 {{ font-size: 1.125rem !important; line-height: 1.75rem !important; margin: 6px 0px !important; }}
        textarea {{ border: 1px solid; padding: 8px 12px !important; border-radius: 10px !important; }}
        body {{ padding: 12px; }}
    </style><script>{bootstrapJs}</script>
</head>"""; tss = k1.TimeSeries.allData()
        # time series stuff
        @app.route("/k1/ts", **kwargs)
        def k1_ts_index():
            pre = cli.init._jsDAuto(); ui1 = tss.items() | ~cli.apply(lambda k,v: [k, v.fn, v.storeRaw, v.retention, v.coldStore, v | (cli.tryout() | cli.aS(len))]) | cli.deref() | (cli.toJsFunc("term") | cli.grep("${term}") | k1.viz.Table(["name", "fn", "storeRaw", "retention", "coldStore", "#hits"], height=600, onclickFName=f"{pre}_select", selectable=True, sortF=True)) | cli.op().interface() | cli.toHtml()
            return f"""{bootstrapHtml}<h1>TimeSeries</h1><div style="overflow-x: auto; margin-top: 12px">{ui1}</div><div id="{pre}_details"></div>
    <script>\nfunction {pre}_select(row, i, e) {{ dynamicLoad("#{pre}_details", `/k1/ts/fragment/${{row[0]}}`); }}</script>"""
        @app.route("/k1/ts/fragment/<name>", **kwargs)
        def k1_ts_fragment_overview(name):
            """Gets overview of a particular TimeSeries"""
            s = tss.get(name, None)
            if s is None: return f"No TimeSeries '{name}' found"
            idxs = s.idxs(); pre = cli.init._jsDAuto(); ui1 = idxs | cli.wrapList().all() | (cli.toJsFunc("idx") | cli.grep("${idx}") | k1.viz.Table(sortF=True, onclickFName=f"{pre}_selectIdx", height=400, selectable=True)) | cli.op().interface() | cli.toHtml(); return f"""
<h2>TimeSeries '{s.name}'</h2><div style="border: 1px solid black; padding: 8px">{ui1}</div>
<div id="{pre}_details"></div><script>function {pre}_selectIdx(row, i, e) {{ document.querySelector("#{pre}_details").innerHTML = "(loading...)"; dynamicLoad("#{pre}_details", `/k1/ts/fragment/{name}/${{row[0]}}`); }}</script>"""
        @app.route("/k1/ts/fragment/<name>/<int:idx>", **kwargs)
        def k1_ts_fragment_idx_overview(name, idx:int):
            pre = cli.init._jsDAuto(); s = tss.get(name, None)
            if s is None: return f"No TimeSeries '{name}' found"
            t, values = s.getLatestTf(idx); values = {i:x for i,x in enumerate(values)}
            ui1 = range(s.getDIdx(idx)) | cli.apply(lambda dIdx: [dIdx, *s.getTf(idx, dIdx), values.get(dIdx, None)]) | cli.cut(0, 2, 3, 4, 6, 7) | (cli.aS(str) | cli.aS(html.escape)).all(2) | (cli.toJsFunc("term") | cli.grep("${term}") | k1.viz.Table(["dIdx", "method", "coeff", "mplJson", "docs", "latest tf value"], onclickFName=f"{pre}_selectDIdx", selectable=True, height=400)) | cli.op().interface() | cli.toHtml(); return f"""
<h3>idx {idx}</h3><div style="border: 1px solid black; padding: 8px">{ui1}</div>
<div id="{pre}_details"></div><script>function {pre}_selectDIdx(row, i, e) {{ document.querySelector("#{pre}_details").innerHTML = "(loading...)"; dynamicLoad("#{pre}_details", `/k1/ts/fragment/{name}/{idx}/${{row[0]}}`) }}</script>"""
        @app.route("/k1/ts/fragment/<name>/<int:idx>/<int:dIdx>", **kwargs)
        def k1_ts_fragment_idx_dIdx_overview(name, idx:int, dIdx:int):
            pre = cli.init._jsDAuto(); s = tss.get(name, None)
            if s is None: return f"No TimeSeries '{name}' found"
            tfId, method, coeffs, mplJson, f, docs = s.getTf(idx, dIdx); return f"""
<h3>dIdx {dIdx}</h3>
<div>Transform</div>
<div style="display: grid; grid-template-columns: auto auto; row-gap: 8px; column-gap: 8px; align-items: center; margin-top: 12px; width: fit-content">
    <div>method: </div><input id="{pre}_method" class="input input-bordered" value="{method}" />
    <div>coeffs: </div><input id="{pre}_coeffs" class="input input-bordered" value="{coeffs}" />
    <div>mplJson: </div><textarea id="{pre}_mplJson" class="textarea textarea-bordered">{json.dumps(mplJson, ensure_ascii=False)}</textarea>
    <div>docs: </div><textarea id="{pre}_docs" class="textarea textarea-bordered">{docs}</textarea>
</div><button id="{pre}_saveTfBtn" class="btn" style="margin-top: 8px">Save transform</button>
<div>Plots</div>
<div style="display: grid; grid-template-columns: auto auto; row-gap: 8px; column-gap: 8px; align-items: center; margin-top: 12px; width: fit-content">
    <div>timeStr: </div><input id="{pre}_timeStr" class="input input-bordered" value="1 day" />
    <div>window: </div><input id="{pre}_window" class="input input-bordered" value="1" />
</div><div style="margin: 8px 0px">
    <button id="{pre}_graphBtn" class="btn">Graph</button>
    <button id="{pre}_rawBtn" class="btn" style="margin-left: 8px">View raw</button>
    <button id="{pre}_rawTfBtn" class="btn" style="margin-left: 8px">View raw transformed</button>
</div>
<div id="{pre}_detailsRawTf">{k1_ts_fragment_idx_rawTf(name, idx, dIdx, 1, '1 day')}</div>
<div id="{pre}_detailsRate">{k1_ts_fragment_idx_rate(name, 1, '1 day')}</div>
<div id="{pre}_detailsPert">{k1_ts_fragment_idx_pert(name, 1, dIdx, 1, '1 day')}</div>
<div id="{pre}_rawRaw"></div><script>
    (async () => {{
        let dS = (x) => document.querySelector(x); dS("#{pre}_graphBtn").onclick = async () => {{
            let window = dS("#{pre}_window").value; let timeStr = dS("#{pre}_timeStr").value;
            dS("#{pre}_detailsRawTf").innerHTML= "(loading...)"; dynamicLoad("#{pre}_detailsRawTf",`/k1/ts/fragment/{name}/{idx}/rawTf/{dIdx}/${{window}}/${{timeStr}}`);
            dS("#{pre}_detailsRate").innerHTML = "(loading...)"; dynamicLoad("#{pre}_detailsRate", `/k1/ts/fragment/{name}/0/rate/0/${{window}}/${{timeStr}}`);
            dS("#{pre}_detailsPert").innerHTML = "(loading...)"; dynamicLoad("#{pre}_detailsPert", `/k1/ts/fragment/{name}/{idx}/pert/{dIdx}/${{window}}/${{timeStr}}`);
        }}; dS("#{pre}_rawBtn").onclick = async () => {{ let timeStr = dS("#{pre}_timeStr").value; window.open(`/k1/ts/api/{name}/{idx}/raw/${{timeStr}}`, "_blank"); }};
        dS("#{pre}_rawTfBtn").onclick = async () => {{ let timeStr = dS("#{pre}_timeStr").value; window.open(`/k1/ts/api/{name}/{idx}/rawTf/${{timeStr}}`, "_blank"); }};
        dS("#{pre}_saveTfBtn").onclick = async () => {{
            await fetch("/k1/ts/api/{name}/{idx}/{dIdx}/saveTf", {{ method: "POST", headers: {{ "Content-Type": "application/json" }}, body: JSON.stringify({{ method: dS("#{pre}_method").value, coeffs: dS("#{pre}_coeffs").value, mplJson: dS("#{pre}_mplJson").value, docs: dS("#{pre}_docs").value }}) }}); alert("Saved!"); }}
    }})();
</script>"""
        @app.route("/k1/ts/api/<name>/<int:idx>/<int:dIdx>/saveTf", methods=["POST"], **kwargs)
        def k1_ts_api_idx_dIdx_saveTf(name, idx, dIdx):
            from flask import request; js = request.json; pre = cli.init._jsDAuto(); s = tss.get(name, None)
            if s is None: return f"No TimeSeries '{name}' found"
            s.setTf(idx, dIdx, method=int(js["method"]), coeffs=json.loads(js["coeffs"]), mplJson=json.loads(js["mplJson"]), docs=js["docs"]); return "ok"
        @app.route("/k1/ts/api/<name>/<int:idx>/latest", **kwargs)
        def k1_ts_api_idx_latest(name, idx):
            """Returns latest raw data, looks like [timestamp, [v1, v2, v3, ...]]"""
            d = k1.TimeSeries.allData(); s = d.get(name, None); return f"No TimeSeries with the name '{name}' found" if s is None else s.getLatest(idx)
        @app.route("/k1/ts/api/<name>/<int:idx>/latestTf", **kwargs)
        def k1_ts_api_idx_latestTf(name, idx):
            """Returns latest transformed data, looks like [timestamp, [v1, v2, v3, ...]]"""
            d = k1.TimeSeries.allData(); s = d.get(name, None); return f"No TimeSeries with the name '{name}' found" if s is None else s.getLatestTf(idx)
        @app.route("/k1/ts/api/<name>/<int:idx>/raw/<timeStr>", **kwargs)
        def k1_ts_api_idx_raw(name, idx, timeStr):
            """Returns raw data and present it in a pre"""
            d = k1.TimeSeries.allData(); s = d.get(name, None)
            if s is None: return f"No TimeSeries with the name '{name}' found"
            return s.getRaw(*k1.parseTimeStr(timeStr), idx=idx)
        @app.route("/k1/ts/fragment/<name>/<int:idx>/raw/<dIdx>/<int:window>/<timeStr>", **kwargs)
        def k1_ts_fragment_idx_raw(name, idx, dIdx, window, timeStr):
            """Plots raw data"""
            return k1.TimeSeries.splotRaw(name, *k1.parseTimeStr(timeStr), idx, "all" if dIdx == "all" else int(dIdx), window)
        @app.route("/k1/ts/api/<name>/<int:idx>/rawTf/<timeStr>", **kwargs)
        def k1_ts_api_idx_rawTf(name, idx, timeStr):
            """Returns raw data and present it in a pre"""
            d = k1.TimeSeries.allData(); s = d.get(name, None)
            if s is None: return f"No TimeSeries with the name '{name}' found"
            return list(s.getRawTf(*k1.parseTimeStr(timeStr), idx=idx))
        @app.route("/k1/ts/fragment/<name>/<int:idx>/rawTf/<dIdx>/<int:window>/<timeStr>", **kwargs)
        def k1_ts_fragment_idx_rawTf(name, idx, dIdx, window, timeStr):
            """Plots transformed data data"""
            return k1.TimeSeries.splotRawTf(name, *k1.parseTimeStr(timeStr), idx, "all" if dIdx == "all" else int(dIdx), window)
        @app.route("/k1/ts/fragment/<name>/0/rate/0/<int:window>/<timeStr>", **kwargs)
        def k1_ts_fragment_idx_rate(name, window, timeStr):
            """Plots call rate graph"""
            return k1.TimeSeries.splotRate(name, *k1.parseTimeStr(timeStr), window)
        @app.route("/k1/ts/fragment/<name>/<int:idx>/pert/<dIdx>/<int:window>/<timeStr>", **kwargs)
        def k1_ts_fragment_idx_pert(name, idx, dIdx, window, timeStr):
            """Plots percentile graph"""
            if dIdx == "all": return "(Only available with specific dIdx)"
            return k1.TimeSeries.splotPert(name, *k1.parseTimeStr(timeStr), idx, int(dIdx), window)
    def dummyLoad(self, niter=600, frac=0.1):
        """Simulates a dummy load on this time series. Read source of
this method to understand what it does.

:param niter: number of iterations
:param frac: fraction of new random numbers and extra time to wait. 0 for more deterministic, 1 for more chaotic"""
        a = random.random(); b = random.random(); c = random.random()
        for i in range(niter) | cli.tee().crt():
            a += (random.random()-0.5)*frac; b += (random.random()-0.5)*frac; c += (random.random()-0.5)*frac
            self.append(a, b, c); print(self.getRaw() | cli.shape(0), end=""); time.sleep(0.1 + random.random()*frac)

In [10]:
#export
@k1.cron(delay=11, daemon=True, delayedStart=5, name="ts_main", docs="k1.TimeSeries fast scan thread")
def _timeSeriesThread():
    for idx, ts in _timeSeriesD.items():
        if not ts._initialized: continue
        now = time.time() # raw format: [time, idx, values, pack values]
        if len(ts._raw) > 0: # transfer new raw data to other buffers to be processed later
            _raw = ts._raw; ts._raw = []; ts._rateRaw.extend(_raw); ts._rawRaw.extend(_raw)
            for t,idx,vs,hvs in _raw: ts._pertD[idx].append([t, vs])
        n = len(ts._rateRaw)
        if n > 10: # at least 10 data points before collating, or 60 seconds passed and no more data
            _rateRaw = ts._rateRaw; ts._rateRaw = []
            _max = max(x[0] for x in _rateRaw); _min = min(x[0] for x in _rateRaw); deltaT = _max - _min
            ts._dbRate.insert(time=_min, rate=len(_rateRaw)/deltaT)
        for idx, tvs in list(ts._pertD.items()):
            if len(tvs) > 100:
                ts._pertD[idx] = []; n = len(tvs); data = tvs | cli.cut(1) | cli.T() | cli.sort(None).all() | cli.apply(lambda vs: [vs[0], vs[n//10], vs[n//2], vs[9*n//10], vs[-1]]) | cli.joinSt() | cli.aS(list)
                ts._dbPert.insert(time=tvs[0][0], idx=idx, data=struct.pack("f"*len(data), *data))
        if ts.storeRaw and len(ts._rawRaw) > 0:
            rr = ts._rawRaw; ts._rawRaw = []
            ts._dbRaw.query(f"""INSERT INTO raw ( time, idx, data ) VALUES """ + ", ".join(f"({t}, {idx}, ?)" for t,idx,vs,hvs in rr), *[hvs for t,idx,vs,hvs in rr])
@k1.cron(delay=61, daemon=True, delayedStart=11, name="ts_retention", docs="k1.TimeSeries slow scan thread for retention control")
def _timeSeriesRetentionThread(): # scans to see whether there's old outdated data in sqlite, and delete them and store in cold storage
    for idx, ts in list(_timeSeriesD.items()):
        if not ts._initialized: continue
        if ts._dbRaw is None: print(f"TS retention thread: {ts.name} does not have _dbRaw!"); continue
        now = time.time(); data = ts._dbRaw.query("select time from raw order by time limit 1")
        if len(data) == 0: continue
        beginTime = data[0][0]
        if now - beginTime > ts.retention:
            t = now - ts.retention # timestamp to slice off
            if ts.coldStore: data = ts._dbRaw.query(f"select time, idx, data from raw where time < {t} order by time")
            ts._dbRaw .query(f"delete from raw  where time < {t}"); ts._dbPert.query(f"delete from pert where time < {t}"); ts._dbRate.query(f"delete from rate where time < {t}")
            if ts.coldStore: data | ~cli.apply(lambda t,idx,data: struct.pack("di", t, idx) + data) | cli.apply(lambda x: b"\x00" + struct.pack("B", len(x)+2) + x) >> cli.file(f"{ts.fn}.cold")

In [12]:
#notest
import random
s1 = TimeSeries(name="s1", retention=60)
for i in range(600) | cli.tee().crt():
    s1.append(random.random(), random.random(), random.random())
    print(s1.getRaw() | cli.shape(), end="")
    time.sleep(0.1)

599) 599, 60s elapsed, throughput: 9.85 /s(558, 2)                       (0,)     (0,)                                                                      (15, 2)                              (15, 2)                                   (15, 2)                                                                           (15, 2)     (15, 2)                                                                                                                                                           (125, 2)                                                                 (125, 2)                    (125, 2)                                                                                                                                  (233, 2)                                                                                                                             (233, 2)                                                                                                                                 

In [13]:
#notest
while True: print(s1.getRaw() | cli.shape(), end="\r"); time.sleep(1)

(232, 2)


KeyboardInterrupt



In [None]:
#export
_speedAutoInc = k1.AutoIncrement(prefix="_k1_speed_"); _speedData = {} # Dict[idx -> {name, mod, fn, raw, refined}]
class speed(cli.BaseCli):
    def __init__(self, name=None, fn=None, docs=None, coldStore=False):
        """Tracks and benchmarks certain functions, and monitor them through time
with reports in order to deploy them absolutely everywhere. Example::

    @k1.speed(name="some func description")
    def f(x):
        return x*3

You can get a list of all speed k1.TimeSeries objects via ``k1.TimeSeries.allData``

:param name: optional name to show up in :meth:`allData`
:param fn: file name. If specified, will store speed data in sqlite database
    at this path, else store in memory
:param docs: optional docs to show up in :meth:`allData`
:param coldStore: if True, stores old speed data in condensed binary file. See more at :class:`TimeSeries`"""
        self.name = name or _speedAutoInc(); self.fn = fn; self.docs = docs; self.coldStore = coldStore
        if "/" in self.name: raise Exception("Can't have forward slash in the name")
    def __call__(self, f):
        if self.name in _speedData: raise Exception(f"Name '{self.name}' has appeared before. Please use a different name")
        _speedData[self.name] = self.obj = {"name": self.name, "docs": self.docs, "func": f, "ts": k1.TimeSeries(name=f"speed: {self.name}", fn=self.fn, coldStore=self.coldStore)}
        ts = self.obj["ts"]; _time = time.time
        def wrapper(*args, **kwargs): beginTime = _time(); res = f(*args, **kwargs); duration = _time() - beginTime; ts.append(duration); return res
        functools.update_wrapper(wrapper, f); return wrapper
    @staticmethod
    def allData(): return _speedData
    @staticmethod
    def flask(app, **kwargs):
        """Attaches a speed management plane to a flask app.
Example::

    app = flask.Flask(__name__)
    k1.speed.flask(app)
    app.run(host="0.0.0.0", port=80)

Then, you can access the route "/k1/speed" to see an overview of all speed
benchmarks. However, doing ``k1.TimeSeries.flask(app)`` and access at "/k1/ts"
would be more beneficial, as that contains all the graphs and data

:param app: flask app object
:param kwargs: extra random kwargs that you want to add to ``app.route()`` function"""
        @app.route("/k1/speed", **kwargs)
        def k1_speed_index():
            d = k1.speed.allData(); ui1 = d.items() | ~cli.apply(lambda k,v: [k, v["func"].__name__, inspect.getfile(v["func"]), v["ts"].name, v["docs"]]) | cli.deref() | (cli.toJsFunc("term") | cli.grep("${term}") | k1.viz.Table(["name", "func's name", "func's file name", "TimeSeries name", "docs"], height=600, sortF=True)) | cli.op().interface() | cli.toHtml()
            return f"""<h1>Speed</h1><div style="overflow-x: auto">{ui1}</div>"""

In [43]:
#export
@contextlib.contextmanager
def idenContext(): yield True
_cextMods = {}; k1.settings.add("cExt", k1.Settings().add("includes", ["fstream", "iostream", "sstream", "mutex", "string", "vector", "cmath", "random"], "header files to include"), "k1.compileCExt()-related settings");
def compileCExt(cppCode, moduleName, verbose=False):
    """Conveniently compiles a python C extension module and returns it.
Example::

    mod = k1.compileCExt(\"\"\"
        // pure math func, simple data types
        double func1(double x) { for (int i = 0; i < 1000000; i++) x = std::cos(x); return x; }
        // takes in array
        double func2(std::vector<double>& arr) { double sum = 0; for (auto v : arr) sum += v; return sum; }
        // returns array
        std::vector<int> func3(int x, int n) { std::vector<int> ans; for (int i = 0; i < n; i++) ans.push_back(x+i); return ans; }
        // nested arrays
        std::vector<std::vector<int>> func4(int x, int n) {
            std::vector<std::vector<int>> ans; std::vector<int> ans1, ans2;
            for (int i = 0; i < n; i++) ans1.push_back(x+i);
            for (int i = 0; i < n; i++) ans2.push_back(x+i*2);
            ans.push_back(ans1); ans.push_back(ans2); return ans;
        }
        // complex string manipulation, splitting things like "A,3\\nB,4", std::vector<std::pair<std::string, int>>
        std::vector<std::pair<std::string, int>> func5(std::string text) {
            std::vector<std::pair<std::string, int>> ans; std::string line;
            std::istringstream f(text); std::pair<std::string, int> pair;
            while (std::getline(f, line)) {
                int pos = line.find(","); pair.first = line.substr(0, pos);
                pair.second = std::stoi(line.substr(pos+1)); ans.push_back(pair);
            } return ans;
        }
        
        PYBIND11_MODULE(genM1, m) {
            m.def("func1", &func1); m.def("func2", &func2); m.def("func3", &func3);
            m.def("func4", &func4); m.def("func5", &func5);
    }\"\"\", "genM1", verbose=True) # this takes around 15s to run. Yes it's slow, but it works

    # python-equivalent functions
    def func1(x):
        for i in range(1000000): x = math.cos(x)
        return x
    def func2(arr): return sum(arr)
    def func3(x, n): return [x+i for i in range(n)]
    def func4(x, n): return [[x+i for i in range(n)], [x+i*2 for i in range(n)]]
    def func5(s): return [(x, int(y)) for x,y in [x.split(",") for x in s.split("\\n")]]

    mod.func1(3)     # 22.8 ms ± 1.83 ms, 7.6x faster
    func1(3)         # 174 ms ± 24.1 ms

    x = list(range(100))
    mod.func2(x)     # 7.25 μs ± 761 ns, 3.1x slower
    func2(x)         # 2.33 μs ± 299 ns
    
    mod.func3(3, 10) # 1.16 μs ± 97 ns, 1.2x slower
    func3(3, 10)     # 946 ns ± 128 ns

    mod.func4(3, 10) # 2.23 μs ± 188 ns, 1.25x faster
    func4(3, 10)     # 2.78 μs ± 292 ns

    s = "A,3\\nB,4\\nC,5\\nD,6\\nE,7\\nF,8\\nG,9"
    mod.func5(s)     # 4.5 μs ± 286 ns, 1.07x faster
    func5(s)         # 4.81 μs ± 866 ns

Behind the scenes, this function generates a C source file, compiles it into a
python C extension module, then loads it in the current interpreter session. So
purpose of this is to very quickly drop down to C whenever the need arises.
Solutions like Cython is neat and all, but it's quite awkward to code in, and
doesn't have the full power of C++. Meanwhile, doing it like this gives you full
C++ features, as well as an easy python binding interface via pybind11.

Several header files are included by default, so you don't have to include them, like
<string>, <fstream>, etc. A list of them are in ``settings.cExt.includes``. You can
get a dict of all compiled modules via ``k1.compileCExt.mods()``

Also, as you can see from the tiny benchmark results, it's not always faster to use
the C version, if input and output translation operations takes longer than the
function itself. So although there's a lot of potential for speedups, you have to
be really careful about this, or else you risk slowing it down and wasting a bunch
of time.

:param cppCode: C++ source code. Common headers are included
:param moduleName: name of the module"""
    # code mostly written by ChatGPT 4o. Verified to work tho
    import pybind11; from setuptools import setup, Extension; from setuptools.command.build_ext import build_ext; import importlib.util; temp_dir = tempfile.mkdtemp()
    print(f"temp_dir: {temp_dir}\n" if verbose else "", end=""); incls = k1.settings.cExt.includes | cli.apply(lambda x: f"#include <{x}>") | cli.join("\n")
    cpp_file = f"""#include <pybind11/pybind11.h>\n#include <pybind11/stl.h>\nnamespace py = pybind11;\n{incls}\n""" + cppCode | cli.file(os.path.join(temp_dir, f"{moduleName}.cpp"))
    ext_modules = [Extension(moduleName, sources=[cpp_file], include_dirs=[pybind11.get_include()], language="c++", extra_compile_args=["-O3", "-std=c++17"])]
    class BuildExt(build_ext):
        def run(self): build_ext.run(self)
        def build_extension(self, ext): ext_path = self.get_ext_fullpath(ext.name); os.makedirs(os.path.dirname(ext_path), exist_ok=True); build_ext.build_extension(self, ext)
    with (idenContext() if verbose else k1.captureStdout()):
        setup(name=moduleName, ext_modules=ext_modules, cmdclass={"build_ext": BuildExt}, script_args=["build_ext", "--inplace"], options={"build_ext": {"build_lib": temp_dir}}); so_file = temp_dir | cli.ls() | cli.grep("cpython") | cli.item()
        spec = importlib.util.spec_from_file_location(moduleName, so_file); module = importlib.util.module_from_spec(spec); spec.loader.exec_module(module); _cextMods[moduleName] = module; return module
compileCExt.mods = _cextMods

In [19]:
%%time
mod = compileCExt("""
// pure math func, simple data types
double func1(double x) { for (int i = 0; i < 1000000; i++) x = std::cos(x); return x; }
// takes in array
double func2(std::vector<double>& arr) { double sum = 0; for (auto v : arr) sum += v; return sum; }
// returns array
std::vector<int> func3(int x, int n) { std::vector<int> ans; for (int i = 0; i < n; i++) ans.push_back(x+i); return ans; }
// nested arrays
std::vector<std::vector<int>> func4(int x, int n) {
    std::vector<std::vector<int>> ans; std::vector<int> ans1, ans2;
    for (int i = 0; i < n; i++) ans1.push_back(x+i);
    for (int i = 0; i < n; i++) ans2.push_back(x+i*2);
    ans.push_back(ans1); ans.push_back(ans2); return ans;
}
// complex string manipulation, splitting things like "A,3\\nB,4", std::vector<std::pair<std::string, int>>
std::vector<std::pair<std::string, int>> func5(std::string text) {
    std::vector<std::pair<std::string, int>> ans; std::string line;
    std::istringstream f(text); std::pair<std::string, int> pair;
    while (std::getline(f, line)) {
        int pos = line.find(","); pair.first = line.substr(0, pos);
        pair.second = std::stoi(line.substr(pos+1)); ans.push_back(pair);
    } return ans;
}

PYBIND11_MODULE(genM1, m) {
    m.def("func1", &func1); m.def("func2", &func2); m.def("func3", &func3);
    m.def("func4", &func4); m.def("func5", &func5);
}""", "genM1", verbose=True)

temp_dir: /tmp/tmp40bnre6w
CPU times: user 421 ms, sys: 219 ms, total: 640 ms
Wall time: 16.1 s


In [22]:
def func1(x):
    for i in range(1000000): x = math.cos(x)
    return x
assert mod.func1(3) == func1(3)

In [23]:
%%timeit
mod.func1(3)

22.8 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [24]:
%%timeit
func1(3)

174 ms ± 24.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
def func2(arr): return sum(arr)
arr = list(range(100)); assert mod.func2(arr) == func2(arr)

In [26]:
%%timeit
mod.func2(arr)

7.25 μs ± 761 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [27]:
%%timeit
func2(arr)

2.33 μs ± 299 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [28]:
def func3(x, n): return [x+i for i in range(n)]
assert mod.func3(3, 10) == func3(3, 10)

In [29]:
%%timeit
mod.func3(3, 10)

1.16 μs ± 97 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [30]:
%%timeit
func3(3, 10)

946 ns ± 128 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [31]:
def func4(x, n): return [[x+i for i in range(n)], [x+i*2 for i in range(n)]]
assert mod.func4(3, 10) == func4(3, 10)

In [32]:
%%timeit
mod.func4(3, 10)

2.23 μs ± 188 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [33]:
%%timeit
func4(3, 10)

2.78 μs ± 292 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [34]:
def func5(s): return [(x, int(y)) for x,y in [x.split(",") for x in s.split("\n")]]
s = "A,3\nB,4\nC,5\nD,6\nE,7\nF,8\nG,9"; assert mod.func5(s) == func5(s)

In [35]:
%%timeit
mod.func5(s)

4.5 μs ± 286 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [36]:
%%timeit
func5(s)

4.81 μs ± 866 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [122]:
!../export.py _advanced --upload=True

./export started up - /home/quang/miniforge3/bin/python
----- exportAll
17034   0   60%   
11269   1   40%   
installing...
Found existing installation: k1lib 1.8
Uninstalling k1lib-1.8:
  Successfully uninstalled k1lib-1.8
[33mDEPRECATION: Loading egg at /home/quang/miniforge3/lib/python3.12/site-packages/aigu-0.1-py3.12.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0mLooking in indexes: https://pypi.org/simple, http://10.104.0.3:3141/
Processing /home/quang/k1lib
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: k1lib
  Building wheel for k1lib (setup.py) ... [?25ldone
[?25h  Created wheel for k1lib: filename=k1lib-1.8-py3-none-any.whl size=5145459 sha256=f6c9d35cec912a76e8a535a511517291f20de84515d7523cdfcac664aa536b75
  Stored in directory: /tmp/pip-ephem-wheel-cache-21_gp2hr/wheels/b5/

In [81]:
!../export.py _advanced

^C
Traceback (most recent call last):
  File "/home/quang/k1lib/k1lib/../export.py", line 7, in <module>
  File "/home/quang/miniforge3/lib/python3.12/site-packages/fire/__init__.py", line 21, in <module>
    from fire.core import Fire
  File "/home/quang/miniforge3/lib/python3.12/site-packages/fire/core.py", line 74, in <module>
    from fire.console import console_io
  File "/home/quang/miniforge3/lib/python3.12/site-packages/fire/console/console_io.py", line 27, in <module>
    from fire.console import console_attr
  File "/home/quang/miniforge3/lib/python3.12/site-packages/fire/console/console_attr.py", line 99, in <module>
    from fire.console import console_attr_os
  File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1322, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 1262, in _find_spec
  File "<frozen importlib._bootstrap_external>", line 1528, in find_spec
  File "<frozen importlib._bootstr


KeyboardInterrupt



In [89]:
!../export.py _advanced --bootstrap=True

./export started up - /home/quang/miniforge3/bin/python
----- bootstrapping
Current dir: /home/quang/k1lib, /home/quang/k1lib/k1lib/../export.py
installing...
Found existing installation: k1lib 1.8
Uninstalling k1lib-1.8:
  Successfully uninstalled k1lib-1.8
[33mDEPRECATION: Loading egg at /home/quang/miniforge3/lib/python3.12/site-packages/aigu-0.1-py3.12.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0mLooking in indexes: https://pypi.org/simple, http://10.104.0.3:3141/
Processing /home/quang/k1lib
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: k1lib
  Building wheel for k1lib (setup.py) ... [?25ldone
[?25h  Created wheel for k1lib: filename=k1lib-1.8-py3-none-any.whl size=5135457 sha256=835a5c176fde2d4187d5dedf9e95613cccd2603439893134abdebb5d3f04bcf2
  Stored in directory: /tmp/pip-ep