In [1]:
#export
"""
This module is for Lua's serpent module, which serializes object into a string
similar to json. But maddingly, no one has actually wrote a serialization/deserialization
code in Python before, and I desperately need it for a factorio project. Because
this was written in a rush, I'm in no way guaranteeing that it will work on all
serialized objects out there, but from my testing, it seems pretty robust. This
is exposed automatically with::

   from k1lib.imports import *
   serpent.loads(...) # exposed
"""
import k1lib, json, re, io; from collections import deque
__all__ = ["deconstruct", "listCorrection", "loads_monolith", "loads_fragments", "loads", "dumps"]



In [2]:
#export
def _deconstruct(s:str, i:int, meta, stack, ctx, autoInc):
    x = s.find("{", i); y = s.find("}", i)
    if y < 0: return -1, None
    if x < y and x >= 0: # opens a new context and saves parent context to stack
        if ctx[3] is None: ctx[3] = "=" in s[ctx[1]:x] # not yet determined whether it's a list or dict
        stack.append(ctx); ctx = [autoInc(), x, None, None, ctx[0]]; return x+1, ctx
    else: # closes the current context and go back to the parent context
        if ctx[3] is None: ctx[3] = "=" in s[ctx[1]:y]
        ctx[2] = y; meta.append(ctx); ctx = stack.pop(); return y+1, ctx
def deconstruct(s:str) -> "list[list[5]]":
    """Not intended for the end user. Deconstructs and grab metadata of some
lua objects. Example::

    a = '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}'
    serpent.deconstruct(a)

That returns::

    [['_v3', 21, 27, False, '_v2'],
     ['_v2', 14, 28, True, '_v1'],
     ['_v1', 7, 29, True, '_v0'],
     ['_v4', 31, 35, True, '_v0'],
     ['_v0', 0, 36, False, 'root']]

The columns are: [unique index of bracket, start byte, end byte, is it a dictionary?, parent index]

This is a crucial step within :meth:`listCorrection`"""
    autoInc = k1lib.AutoIncrement(prefix="_v"); meta = []; stack = deque()
    i = 0; n = len(s); ctx = ["root", None, None, None, None] # [idx, sB, eB, isDict?]
    while i < n: i, ctx = _deconstruct(s, i, meta, stack, ctx, autoInc)
    return meta
def listCorrection(s:str) -> str:
    """Not intended for the end user. Corrects for lists in Lua.
Example::

    a = '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}'
    serpent.listCorrection(a) # returns '[1,2,3,{a=3,b={c=6,d=[5,6,7]}},{b=3}]'

See how some pointy brackets have been replaced with square brackets?

This is because there are no list or tuple types in lua, and there are also no sets
in json, so kinda have to roll my own solution"""
    b = bytearray(s.encode())
    for idx, sB, eB, isDict, pIdx in deconstruct(s):
        if not isDict: b[sB] = b"["[0]; b[eB] = b"]"[0]
    return bytes(b).decode()

In [3]:
a = '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}'; #a | insId() | T() | aS(repr).all(2) | display()
assert deconstruct(a) == [['_v3', 21, 27, False, '_v2'], ['_v2', 14, 28, True, '_v1'], ['_v1', 7, 29, True, '_v0'], ['_v4', 31, 35, True, '_v0'], ['_v0', 0, 36, False, 'root']]
assert listCorrection(a) == '[1,2,3,{a=3,b={c=6,d=[5,6,7]}},{b=3}]'

In [None]:
fn = "~/repos/labs/mlexps/private/15-pyanodons/recipes"
a = k1lib.cli.cat(fn) | k1lib.cli.join("")

In [5]:
#export
# def _loads(lua):
#     matches = re.findall("[a-zA-Z_\-\"\[\]]+[ ]*=", lua); replacements = [m.rstrip("= ").strip("[]\"'") for m in matches]
#     for x, y in zip(matches, replacements): lua = lua.replace(x, f'"{y}": ')
#     return json.loads(lua.replace(" ", ""))
p1 = re.compile("[a-zA-Z0-9_\-\"\[\]]+[ ]*=")
def loads_monolith(lua:str) -> object:
    """Not intended for the end user. Core loading mechanism. See :meth:`loads`"""
    lua = listCorrection(lua); matches = sorted(re.findall(p1, lua), key=lambda x: -len(x))
    replacements = [m.rstrip("= ").strip("[]\"'") for m in matches]
    for x, y in zip(matches, replacements): lua = lua.replace(x, f'"{y}": ')
    return json.loads(lua)
def loads_fragments(lua:str) -> object:
    """Not intended for the end user. See :meth:`loads`. Deserializes lua objects,
breaking up the work into multiple fragments. So here's the general gist::

    s = "{1, 2, 3, {4, 5, 6}, {7, 8, 9}}"
    
    # then, we grab the fragments, which are the top level {} blocks, assigning a unique key (the character \ue000 and autoInc index)
    fragments = {"\ue0000": {4, 5, 6}, "\ue0001": {7, 8, 9}}
    # then, we replace the fragments with their keys
    s = "{1, 2, 3, "\ue0000", "\ue0001"}"
    
    # then we load s, it will run fast since the fragments are just simple strings
    s = serpent.loads_monolith(s)
    
    # then we patch s, replacing the keys with actual parsed objects
    s = {1, 2, 3, {4, 5, 6}, {7, 8, 9}}

Why so convoluted? Well turns out, loads_monolith is pretty slow. It has a for loop there,
and there's a .replace() within, which is a hidden for loop that copies the entire
string over and over again, which slows it down. Haven't done extensive testing, but
feels like O(n^2) time complexity while I was working with it.

So this optimization assumes that the top level {} blocks are small, but there're many
of them, thus this assigns less work (shorter string, hence faster .replace()) to each
loads_monolith() calls. So if there're 10k fragments, this can potentially be 10k faster.

This assumption of course is not that great and not very general, and you can easily find
ways around it. But it's just enough for my use case right now, which is to analyze factorio.
The correct way would be to dive deeper and benchmark everything more clearly, but I don't
have time for that."""
    # splits into fragments
    a = [[f"\ue000{i}",loads_monolith(lua[sB:eB+1]), sB, eB] for i,[idx,sB,eB,d,pIdx] in enumerate(deconstruct(lua)) if pIdx == "_v0"]

    # fast way of replacing the fragments with the pua unicodes
    s = io.StringIO(); lastSeek = 0; d = {pua:o for pua, o, sB, eB in a}
    for pua,obj,sB,eB in a: s.write(lua[lastSeek:sB]); s.write(f'"{pua}"'); lastSeek = eB+1
    s.write(lua[lastSeek:]); s.seek(0); c = loads_monolith(s.read())

    # injecting
    if isinstance(c, dict): return {k:d.get(v, v) for k,v in c.items()}
    elif isinstance(c, list): return [d.get(v, v) for v in c]
def preprocess(lua:str): return "".join([x.split("--")[0] for x in lua.split("\n")]).replace(" ", "").replace("\\", "\\\\")
def loads(lua:str):
    """Deserialize lua objects from string.
Example::

    # returns [1, 2, 3, {'a': 3, 'b': {'c': 6, 'd': [5, 6, 7]}}, {'b': 3}]
    loads("{ 1, 2, 3, { a = 3, b = { c = 6, d = {5, 6, 7} } }, { b = 3 } }")

See also: :meth:`dumps`

What's the relative speed here? Because everything is written in Python, I
expect it to be slower than json, but by how much? Here're some benchmark results:

|             | lua  | json | binary |
| ----------- | ---- | ---- | ------ |
| from python | 21us | 11us | 184us  |
| to   python | 92us | 10us | 5.8us  |

The "lua" column uses "serpent.loads()", "json" uses "json.loads()", and
"binary" uses "dill.loads()"
"""
    lua = preprocess(lua)
    return loads_fragments(lua) if len(lua) > 1000 else loads_monolith(lua)
def _dumps_dict(obj):
    for k,v in obj.items(): k = f'["{k}"]' if "-" in k else k; yield f"{k}={dumps(v)}"
def dumps(obj) -> str:
    """Serialize Python objects into lua string.
Example::

    # returns '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}'
    serpent.dumps([1, 2, 3, {'a': 3, 'b': {'c': 6, 'd': [5, 6, 7]}}, {'b': 3}])

See also: :meth:`loads`"""
    if isinstance(obj, dict): return f"{{{','.join(_dumps_dict(obj))}}}"
    if isinstance(obj, (list, tuple)): return f"{{{','.join([dumps(e) for e in obj])}}}"
    return json.dumps(obj)

In [6]:
assert loads(dumps(loads(a))) == loads(a)

In [9]:
!../export.py serpent # --upload=True

2023-12-25 21:53:28,974	INFO worker.py:1458 -- Connecting to existing Ray cluster at address: 192.168.1.19:6379...
2023-12-25 21:53:28,982	INFO worker.py:1633 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
----- exportAll
13214   0   60%   
8738    1   40%   
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 1.4.4.5
Uninstalling k1lib-1.4.4.5:
  Successfully uninstalled k1lib-1.4.4.5
running install
running bdist_egg
running egg_info
creating k1lib.egg-info
writing k1lib.egg-info/PKG-INFO
writing dependency_links to k1lib.egg-info/dependency_links.txt
writing requirements to k1lib.egg-info/requires.txt
writing top-level names to k1lib.egg-info/top_level.txt
writing manifest file 'k1lib.egg-info/SOURCES.txt'
reading manifest file 'k1lib.egg-info/SOURCES.txt'
adding license file 'LICENSE'
writing manifest file 'k1lib.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running inst

In [35]:
!../export.py serpent --bootstrap=True

2023-12-25 19:38:57,239	INFO worker.py:1458 -- Connecting to existing Ray cluster at address: 192.168.1.19:6379...
2023-12-25 19:38:57,254	INFO worker.py:1633 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
----- bootstrapping
Current dir: /home/kelvin/repos/labs/k1lib, /home/kelvin/repos/labs/k1lib/k1lib/../export.py
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 1.4.4.5
Uninstalling k1lib-1.4.4.5:
  Successfully uninstalled k1lib-1.4.4.5
running install
running bdist_egg
running egg_info
creating k1lib.egg-info
writing k1lib.egg-info/PKG-INFO
writing dependency_links to k1lib.egg-info/dependency_links.txt
writing requirements to k1lib.egg-info/requires.txt
writing top-level names to k1lib.egg-info/top_level.txt
writing manifest file 'k1lib.egg-info/SOURCES.txt'
reading manifest file 'k1lib.egg-info/SOURCES.txt'
adding license file 'LICENSE'
writing manifest file 'k1lib.egg-info/SOURCES.txt'
installin