# Array

In [69]:
class Arr:
    def __init__(self, data):
        data = [item for item in data]
        self.dtype = dtype(data)
        self.data = [self.dtype(item) for item in data]
        self.size = len(self.data) # Because len will be something else
        self.shape = (self.size,)
        self.ndim = len(self.shape)

    def __str__(self):
        ret = "Arr: \n" # We will add more strings to this "main" string
        if self.ndim < 2: 
            ret += str(self.data)
        else:
            rows, cols = self.shape
            for i, val in enumerate(self.data):
                ret += str(val).center(6) # str.center - check it out.
                if i % cols == cols -1: # in "real" math this means i mod cols = -1
                    ret += '\n'
    
        return ret

    __repr__ = __str__

    def reshape(self, rows, cols=None):
        ret = Arr(self.data)
        if cols is None:
            ret = ret.reshape(rows, 1)
            ret.shape = (ret.shape[0],)
            ret.ndim = len(ret.shape)
            return ret
        elif cols == -1:
            assert self.size % rows == 0, f"rows must be divide {self.size} without a remainder"
            cols = self.size // rows
        elif rows == -1:
            assert self.size % cols == 0, f"cols must be divide {self.size} without a remainder"
            rows = self.size // cols
        assert rows * cols == len(self.data), (f"cannot reshape data with {self.size}"
                                               f" values to shape ({rows},{cols})")
        ret.shape = (rows, cols)
        ret.ndim = len(ret.shape)
        return ret

    def __getitem__(self, items):
        return arr_getter(self, items)
    
    def __len__(self):
        return self.shape[0]

    def __iter__(self):
        return iter(self.data)

    ### Math
    def sum(self):
        return sum(self)

    def count(self):
        return len(self)

    def mean(self):
        return (self.sum() / self.count())
    
    ### Arithmetic

    def __add__(self, other):
        return zip_apply(self, other, self.dtype.__add__)

    def __sub__(self, other):
        return zip_apply(self, other, self.dtype.__sub__)

    def __mul__(self, other):
        return zip_apply(self, other, self.dtype.__mul__)

    def __truediv__(self, other):
        return zip_apply(self, other, self.dtype.__truediv__)

    def __abs__(self):
        return Arr(map(self.dtype.__abs__, self.data))


def zip_apply(left, right, f):
    # Length is the same
    assert len(left) == len(right), f'arrays are not of same shape'
    # Type is the same
    assert dtype(left) == dtype(right), f'Arrays are not of same dtype'
    # We can do the work
    result = [f(l, r) for (l, r) in zip(left, right)]
    return Arr(result)


def dtype(obj):
    """Returns the dtype of the array"""
    dtype = int
    for item in obj:
        itype = type(item)
        if itype == str:  # str is the largest, so dtype is str
            return str
        if itype == float:  # We haven't seen str by now so type is either float or int
            dtype = float
    return dtype

def indices2len(start, stop, step):
    if step > 0:
        return (stop - start - 1) // step + 1
    elif step == 0:
        return 1
    else:
        # No reversing
        raise ValueError
        

def coord2idx(arr, row, col):
    return row * arr.shape[1] + col

def idx2coord(arr, idx):
    row = idx // arr.shape[1]
    col = idx % arr.shape[0]
    return row, col

def arr_getter(arr, items):
    if not isinstance(items, tuple):  # got only one slicer
        items = (items,)
    assert arr.ndim >= len(items), f'More slicers ({len(items)}) than dimensions ({arr.ndim})'
    if arr.ndim == 1:
        return arr.data[items[0]]
    if arr.ndim == 2:
        # Less Then 2 Items
        if len(items) < 2:
            ret = arr_getter(arr, items + (slice(None),))
            return ret
        # Int location
        new_items = []
        for item in items:
            if isinstance(item, int):
                new_items.append(slice(item, item+1))
            else:
                new_items.append(item)
        items = tuple(new_items)
        r_start, r_stop, r_step = items[0].indices(arr.shape[0])
        c_start, c_stop, c_step = items[1].indices(arr.shape[1])
        new_shape = (indices2len(r_start, r_stop, r_step), indices2len(c_start, c_stop, c_step))
        return array_iterator(arr, new_shape, r_start, r_step, r_stop, c_step, c_stop, c_start)
    
def array_iterator(arr, new_shape, r_start, r_step, r_stop, c_step, c_stop, c_start):
    r_index, c_index = r_start, c_start # index is set to start
    ret = [] # We will populate this
    for _ in range(new_shape[0] * new_shape[1]):
        if c_index >= c_stop:
            # We reached end of line
            c_index = c_start # Go back to start of line which is first coloumn
            r_index += r_step # Advance the row
        if r_index >= r_stop:
            # We reached the end
            break
        _index = coord2idx(arr, r_index, c_index) # Which index to take from the data
        try:
            ret.append(arr.data[_index])
        except IndexError:
            break
        c_index += c_step
    ret = Arr(ret).reshape(*new_shape)
    return ret

# Index
An index is an object that indexes a Series. We will want to achieve. it has:
1. Values that represent the actual indices
2. A mapping between indices to numerical indices

In [167]:
from collections import defaultdict
class Idx:
    def __init__(self, data):
        self.values = Arr(data)
        self.mapping = defaultdict(list)
        for i, key in enumerate(data):
            self.mapping[key].append(i)

    def __len__(self):
        return len(self.values)
    
    def unique(self):
        return Arr(self.mapping.keys())

    def __getitem__(self, item):
        return self.values.__getitem__(item)    
    
    def __str__(self):
        return f"Index with values: {self.unique()}"

In [168]:
idx = Idx(list('abbabaacd'))
idx.mapping

defaultdict(list, {'a': [0, 3, 5, 6], 'b': [1, 2, 4], 'c': [7], 'd': [8]})

In [169]:
print (idx)

Index with values: Arr: 
['a', 'b', 'c', 'd']


# Series
A series is an object that has 3 things:
1. Array `Arr` of values
2. Index
3. Name

In [42]:
None or Idx(range(len(Arr(range(5)))))

<__main__.Idx at 0x10c9016a0>

In [123]:
class Srs:
    def __init__(self, data, name=None, index=None):
        self.values = Arr(data)
        self.index = index or Idx(range(len(self.values)))
        assert len(self.values) == len(self.index)
        self.name = name or "new_series"

    def __len__(self):
        return len(self.values)

## Iterate Over a Series
When iterating over a series we want to iterate along with the index. We will `zip` them together.

In [124]:
class Srs(Srs):  
    def __iter__(self):
        return zip(self.index.values, self.values)
    
    def __str__(self):
        ret = f"{self.name}: \n"
        for idx, val in self: # This is now possible thanks to __iter__
            ret += str(idx).center(6)
            ret += str(val).center(6)
            ret += '\n'

        return ret

    __repr__ = __str__

In [125]:
idx = Idx(list('abcde'))
srs = Srs(range(5), 'foo', idx)
srs

foo: 
  a     0   
  b     1   
  c     2   
  d     3   
  e     4   

# Copying array behaviour with `__getattr__`
`__getattr__` is called if the attribute does not exist. [Example](Supplemental%20-%20Minimals.ipynb#Get-Attr).  
if the method does not exist, we want to copy the behviour of the underlying series. The way to know if the underlying series has the method, is by calling `hasattr`.  
Both `getattr` and `setattr` are ways to call methods using strings.

In [126]:
class Srs(Srs):
    def __getattr__(self, item):
        if hasattr(self.values, item):
            return getattr(self.values, item)
        raise AttributeError

In [127]:
arr = Arr([10,5,2])
srs = Srs([10,5,2], 'foo')

In [128]:
arr.sum()

17

In [129]:
srs.sum()

17

In [130]:
# This will sadly not work
srs + srs

TypeError: unsupported operand type(s) for +: 'Srs' and 'Srs'

# Arithmetic

In [153]:
def series_apply(left, right, f_name):
    # Same index. We will not deal with not aligned concats
    f = getattr(left.values, f_name) # We now now gett so let's make it nicer
    res = f(right.values)
    return Srs(res, index=left.index)

class Srs(Srs):
    def __add__(self, other):
        return series_apply(self, other, '__add__')
    def __sub__(self, other):
        return series_apply(self, other, '__sub__')
    def __mul__(self, other):
        return series_apply(self, other, '__mul__')
    def __truediv__(self, other):
        return series_apply(self, other, '__truediv__')

In [154]:
s1 = Srs([1,2,3])
s2 = Srs([30,20,10])

In [158]:
print (s1+s2)
print (s1*s2)
print ((s1-s2).mean())
print ((s1/s2).sum())


new_series: 
  0     31  
  1     22  
  2     13  

new_series: 
  0     30  
  1     40  
  2     30  

-18.0
0.43333333333333335


# `getitem` by index location


In [159]:
def listify(val):
    if isinstance(val, list):
        return val
    else:
        return [val]

In [172]:
class Srs(Srs):
    def __getitem__(self, items):
        if items == slice(None):
            # To solve things like srs[:]
            return self
        items = listify(items)
        idx = []
        for item in items:
            assert item in self.index.mapping.keys(), f'{items} is not in the series index'
            idx.extend(self.index.mapping.get(item))

        idx = sorted(idx)
        vals = [self.values[i] for i in idx]
        new_index = [self.index[i] for i in idx] # This is a bit overhead, but will suffice for now
        return Srs(vals, name=self.name, index=Idx(new_index))

In [171]:
idx = Idx(list('abede'))
srs = Srs(range(5), 'bar', idx)
print(srs[['e']])
print(srs[['e', 'a']])

bar: 
  e     2   
  e     4   

bar: 
  a     0   
  e     2   
  e     4   



# Exercise: Implement `Srs.value_counts()`
[`pd.value_count()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.value_counts.html)

In [176]:
# Ex
class Srs(Srs):
    def as_index(self):
        # Hint
        return Idx(self.values)

    def value_counts(self):
        # BOE
        idx = self.as_index()
        map_ = idx.mapping
        keys = []
        counts = []
        for key, indices in map_.items():
            keys.append(key)
            counts.append(len(indices))
        ret = Srs(counts, name=f'{self.name} value counts', index=Idx(keys))
        return ret
        # EOE

In [177]:
vc = Srs('abbbbabdbedb', name='letters')

In [178]:
print(vc)

letters: 
  0     a   
  1     b   
  2     b   
  3     b   
  4     b   
  5     a   
  6     b   
  7     d   
  8     b   
  9     e   
  10    d   
  11    b   



In [179]:
print(vc.value_counts())

letters value counts: 
  a     2   
  b     7   
  d     2   
  e     1   

