In [None]:
from __future__ import annotations

import copy
import dataclasses
from dataclasses import dataclass
from typing import List, TypeVar, Any, Tuple, Optional, Dict, Union

import numpy as np
import pandas as pd
from typing import Generic
from typing import Literal

from autora.variable import VariableCollection
from sklearn.base import BaseEstimator

## First attempt – a dataframe wrapper which holds deltas

In [None]:
class DataFrameWithDeltas:
    deltas: List[Tuple[str, pd.DataFrame]]

    def __init__(self, initial: pd.DataFrame):
        self.deltas = [("new", initial)]

    def update(self, delta: pd.DataFrame, kind: Literal["new", "extend"]):
        self.deltas.append((kind, delta))

    @property
    def df(self):
        v = None
        for kind, delta in self.deltas:
            if kind == "new":
                v = delta
            elif kind == "extend":
                assert isinstance(v, pd.DataFrame)
                v = pd.concat([v, delta])
            else:
                raise NotImplementedError(f"{kind=}")
        return v


In [None]:
d = DataFrameWithDeltas(pd.DataFrame({"a": [1,2,3], "b": list("abc")}))
d.df

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


In [None]:
d.update(pd.DataFrame({"a": [4], "b": list("d")}), kind="extend")
d.df


Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c
0,4,d


In [None]:
d.update(pd.DataFrame({"a": [5], "b": list("e")}), kind="new")
d.df

Unnamed: 0,a,b
0,5,e


In [None]:
d.deltas

[('new',
     a  b
  0  1  a
  1  2  b
  2  3  c),
 ('extend',
     a  b
  0  4  d),
 ('new',
     a  b
  0  5  e)]

## State with Deltas

literature_search / seed_data -> {conditions: […], observations: […], experimental_data: [...] model: […], status:
“expected”,
variables:
VariableCollection(…), kind = "new", source: "seed"}

experimentalist -> {conditions: […], status: “proposed”, kind="extend", source: "experimentalist"} ## the
experiment_runner only wants the "last" proposed conditions, so we need to be able to access the "last proposed
experiment"

experiment_runner -> {experimental_data: [...], conditions: view(data), observations: view(data), status:
“observed”, kind: "extend", source: "experiment_runner"}

theorist -> {model: TheModel(), (fitted_)data: [...], status: “fitted”, kind: "extend", source: "theorist"}

can "kind" just be the "extension" function rather than the abstract name.

Idea: we store all these results as "deltas":
all_data = [seed_data, experimentalist_result_1, experiment_runner_result_1, theorist_result_1, ... ]
... and then resolve the deltas whenever we need a particular code.


In [None]:
from autora.workflow.state.delta import State, StateDelta

In [None]:
State(data=pd.DataFrame({"a": [1], "b": ["f"]})) + StateDelta(kind="extend", data=pd.DataFrame({"a":[2], "b":["s"]}))

State(data=   a  b
0  1  f
1  2  s)

In [None]:
State(data=pd.DataFrame({"a": [1], "b": ["f"]})) + StateDelta(kind="replace", data=pd.DataFrame({"a":[2], "b":["s"]}))

State(data=   a  b
0  2  s)

In [None]:
State(data=pd.DataFrame({"a": [1]})) \
+ StateDelta(kind="replace", data=pd.DataFrame({"a":[2]})) \
+ StateDelta(kind="extend", data=pd.DataFrame({"a":[3]})) \
+ StateDelta(kind="extend", data=pd.DataFrame({"a":[4]}))

State(data=   a
0  2
1  3
2  4)

In [None]:
type(np.array((1,2,3)))

numpy.ndarray

In [None]:
State(data=np.array([(1,2,3)]))

State(data=array([[1, 2, 3]]))

In [None]:
State(data=np.array([(1,2,3)])) \
+ StateDelta(kind="extend", data=np.array([(4,5,6)]))

State(data=array([[1, 2, 3],
       [4, 5, 6]]))

In [None]:
State(data=np.array([(1,2,3)])) \
+ StateDelta(kind="extend", data=np.array([(4,5,6)])) \
+ StateDelta(kind="replace", data=np.array([(7,8,9)]))

State(data=array([[7, 8, 9]]))

In [None]:
State(data=np.array([(1,2,3)])) \
+ StateDelta(kind="extend", data=np.array([(4,5,6)])) \
+ StateDelta(kind="replace", data=np.array([(7,8,9)])) \
+ StateDelta(kind="extend", data=np.array([(10,11,12)]))

State(data=array([[ 7,  8,  9],
       [10, 11, 12]]))

In [None]:
from functools import reduce

reduce(
    lambda x, y: x+y,
    [
        State(data=pd.DataFrame({"a": [1]})),
        StateDelta(kind="replace", data=pd.DataFrame({"a":[2]})),
        StateDelta(kind="extend", data=pd.DataFrame({"a":[3]})),
        StateDelta(kind="extend", data=pd.DataFrame({"a":[4]}))
    ]
)

State(data=   a
0  2
1  3
2  4)

In [None]:
sum([1,2,3])

6

In [None]:
sum(State(data=pd.DataFrame({"a": [1]})), StateDelta(kind="replace", data=pd.DataFrame({"a":[2]})))

TypeError: 'State' object is not iterable

## Generic State and StateDelta

In [None]:
class State:
    initial: Any
    deltas: List[StateDelta[State]]

S = TypeVar("S")

class StateDelta(S):
    value: S
    kind: Literal["replace", "extend"]


TypeError: TypeVar(name, constraint, ...): constraints must be types. Got (~S,).

In [None]:
State = TypeVar("S")

class StateWithDeltas(Generic[State]):
    initial: Any
    deltas: List[StateDelta[State]]

class StateDelta(Generic[State]):
    value: State
    kind: Literal[]


SyntaxError: invalid syntax (2210526089.py, line 9)

In [None]:
from autora.workflow.state.delta import StateDelta as StateDeltaImport
State(data=np.array([(1,2,3)])) \
+ StateDeltaImport(kind="extend", data=np.array([(4,5,6)])) \
+ StateDelta(kind="replace", data=np.array([(7,8,9)])) \
+ StateDelta(kind="extend", data=np.array([(10,11,12)]))

State(data=array([[ 7,  8,  9],
       [10, 11, 12]]))