In [None]:
#default_exp types

# Types
> PyFrames are plain-Python-compatible tabular data. Every entry is a Python object. The canonical form for a PyFrame is a list of records.
> Refs and Ranges, and operators are used to determine stuff

In [1]:
#export
from typing import List, Dict, Any
from collections import defaultdict

class PyFrame:
    def __init__(self, rows: List[Dict[str, Any]]):
        assert len(set(tuple(r.keys()) for r in rows)) == 1, 'Mismatched row keys or empty data.'
        self.data = rows

    def __getitem__(self, index):
        row, col = index
        return self.data[row][col]
    def __setitem__(self, index, value):
        row, col = index
        self.data[row][col] = value
        
    @property
    def columns(self):
        return self.data[0].keys()
    
    @property
    def col(self):
        class ColIndexer:
            def __getitem__(self_idx, idx):
                return [r[idx] for r in self]
            
            def __setitem__(self_idx, idx, value):
                assert len(value) == len(self.data), f'length of values to set should be {len(self.data)}'
                for r, v in zip(self, value):
                    r[idx] = v
        return ColIndexer()
    
    @property
    def cols(self):
        class MultipleColumnsIndexer:
            def __getitem__(self_idx, indices):
                return [{col: r[col] for col in indices} for r in self]
        return MultipleColumnsIndexer()
            
    @property
    def row(self):
        class RowIndexer:
            def __getitem__(self_idx, idx):
                return self.data[idx]
            
            def __setitem__(self_idx, idx, value):
                assert not (missing := self.data[0].keys() - value.keys()), f'{missing} not in value to be set'
                self.data[idx] = value
        return RowIndexer()
    
    @property
    def rows(self):
        class MultipleRowsIndexer:
            def __getitem__(self_idx, indices):
                return {idx: self.data[idx] for idx in indices}
        return MultipleRowsIndexer()
        
    def __getattr__(self, name):
        return getattr(self.data, name)
    
    def __iter__(self):
        return iter(self.data)
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        return repr(self.data)
    
    @classmethod
    def from_record_of_lists(cls, record: Dict[str, List]):
        assert len(set(len(v) for v in record.values())) == 1, 'Mismatched list lengths.'
        return cls([dict(zip(record.keys(), v)) for v in zip(*record.values())])
    
    @classmethod
    def from_list_of_rows(cls, rows: List[List], header: List[str]):
        assert set(len(row) for row in rows) == {len(header)}, 'Mismatched list lengths.'
        return cls([dict(zip(header, row)) for row in rows])
    
    def set_index(self, idx):
        if isinstance(idx, str):
            assert idx in self.data[0].keys(), f'{idx} not in data'
            data = {r[idx]: r for r in self.data}
        else:
            assert not (missing_keys := (set(idx) - self.data[0].keys())), f'{missing_keys} not in data'
            data = {r[tuple(col for col in idx)]: r for r in self.data}
        return IndexedPyFrame(data)
    
    def collect_index(self, idx):
        data = defaultdict(lambda: [])
        if isinstance(colnames, str):
            assert idx in self.data[0].keys(), f'{idx} not in data'
            for r in self:
                data[r[idx]].append(r)
        else:
            assert not (missing_keys := (set(idx) - self.data[0].keys())), f'{missing_keys} not in data'
            for r in self:
                data[r(tuple(r[col] for col in idx))].append(r)
        return IndexedPyFrame(data)
    

class IndexedPyFrame:
    def __init__(self, rows: Dict[Any, List[Dict[str, Any]]]):
        assert len(set(tuple(r.keys()) for r in rows.values())) == 1, 'Mismatched row keys.'
        self.data = rows

    def __getitem__(self, index):
        row, col = index
        return self.data[row][col]
    
    @property
    def columns(self):
        return self.data[0].keys()
    
    @property
    def col(self):
        class ColIndexer:
            def __getitem__(self_idx, idx):
                return {k: r[idx] for k, r in self.items()}
            
            def __setitem__(self_idx, idx, value):
                assert len(value) == len(self.data), f'length of values to set should be {len(self.data)}'
                assert not (missing := value.keys() - self.data.keys()), f'{missing} not in index'
                for k, v in value.items():
                    self.data[k][idx] = v
        return ColIndexer()
    
    @property
    def cols(self):
        class MultipleColumnsIndexer:
            def __getitem__(self_idx, indices):
                return [{k: {col: r[col] for col in indices}} for k, r in self.items()]
        return MultipleColumnsIndexer()
            
    @property
    def row(self):
        class RowIndexer:
            def __getitem__(self_idx, idx):
                return self.data[idx]
            
            def __setitem__(self_idx, idx, value):
                assert not (missing := self.data[0].keys() - value.keys()), f'{missing} not in value to be set'
                self.data[idx] = value
        return RowIndexer()
    
    @property
    def rows(self):
        class MultipleRowsIndexer:
            def __getitem__(self_idx, indices):
                return {idx: self.data[idx] for idx in indices}
        return MultipleRowsIndexer()
        
    def __getattr__(self, name):
        return getattr(self.data, name)
    
    def __iter__(self):
        return iter(self.data)
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        return repr(self.data)

In [None]:
f = PyFrame.from_record_of_lists({'a': [0, 1, 2], 'b': [3, 4, 5]})
f

[{'a': 0, 'b': 3}, {'a': 1, 'b': 4}, {'a': 2, 'b': 5}]

In [None]:
f.rows[(1, 2)]

{1: {'a': 1, 'b': 4}, 2: {'a': 2, 'b': 5}}

In [None]:
for r in f:
    print(r)

{'a': 0, 'b': 3}
{'a': 1, 'b': 4}
{'a': 2, 'b': 5}


In [None]:
len(f)

3

In [None]:
f_i = f.set_index('b')

In [None]:
f_i.rows[(3, 4)]

{3: {'a': 0, 'b': 3}, 4: {'a': 1, 'b': 4}}

In [None]:
f.col['c'] = [7, 8, 9]

In [None]:
f

[{'a': 0, 'b': 3, 'c': 7}, {'a': 1, 'b': 4, 'c': 8}, {'a': 2, 'b': 5, 'c': 9}]

# Parsing types

In [None]:
#export
class ParseError(ValueError): pass

In [None]:
#export
import re
from ebb.util import colname_to_num, num_to_colname
from collections import namedtuple

re_ref = re.compile('^(?P<cfix>\$)?(?P<col>[A-Z]+)(?P<rfix>\$)?(?P<row>[1-9][0-9]*)$')

# Todo(Rik): Maybe worried about negative rows/cols? Input validation?
class Ref:
    def __init__(self, row, column, fixed_row=False, fixed_column=False):
        self.row, self.column = row, column
        self.fixed_row, self.fixed_column = fixed_row, fixed_column
        
    def __eq__(self, other):
        return self.__dict__ == other.__dict__
    
    def __hash__(self):
        return self.to_string().__hash__()
    
    def __repr__(self):
        return self.to_string()
        return f'Ref(row={self.row}, column={self.column}, fixed_row={self.fixed_row}, fixed_column={self.fixed_column})'

    @classmethod
    def from_string(cls, s):
        m = re_ref.match(s)
        if not m: raise ParseError(f'{s} is not a reference')
        row, fixed_row = int(m['row'])-1, bool(m['rfix'])
        column, fixed_column = m['col'], bool(m['cfix'])
        assert row >= 0
        return cls(row, column, fixed_row, fixed_column)
    
    def to_string(self):
        col = f'{"$" if self.fixed_column else ""}{self.column}'
        row = f'{"$" if self.fixed_column else ""}{self.row+1}'
        return col+row

InfixOp = namedtuple('InfixOp', ['op', 'left', 'right'])
PrefixOp = namedtuple('PrefixOp', ['op', 'arg'])
PostfixOp = namedtuple('PostfixOp', ['op', 'arg'])
# We write operators in precedence order
operators = {
    # Todo(Rik): support for intersection op (which idiot ever thought that should be a space)
    ':': InfixOp, # ' ': InfixOp
    '-': PrefixOp, '%': PostfixOp, r'\^': InfixOp,
    r'\*': InfixOp, '/': InfixOp, r'\+': InfixOp,
    '-': InfixOp, r'\&': InfixOp, '=': InfixOp,
    '<': InfixOp, '>': InfixOp, '<=': InfixOp,
    '>=': InfixOp, '<>': InfixOp, ',': InfixOp, 
}

Function = namedtuple('Function', ['name', 'args'])

In [None]:
import pytest

with pytest.raises(ParseError):
    Ref.from_string('foo')
with pytest.raises(ParseError):
    Ref.from_string('A0')
assert Ref.from_string('A4') == Ref(3, 0, fixed_row=False, fixed_column=False)
assert Ref.from_string('$A4') == Ref(3, 0, fixed_row=False, fixed_column=True)
assert Ref.from_string('A$4') == Ref(3, 0, fixed_row=True, fixed_column=False)
assert Ref.from_string('$A$4') == Ref(3, 0, fixed_row=True, fixed_column=True)
assert Ref(10, 10) == Ref(10, 10, fixed_row=False, fixed_column=False)
assert Ref.from_string('A4').to_string() == 'A4'