In [2]:
%run "./obj/Array.ipynb"

# Index
An index is an object that indexes a Series. it has:
1. Values that represent the indices names.
2. A mapping between indices to numerical locations.

In [3]:
from collections import defaultdict
class Idx:
    def __init__(self, data):
        self.values = Arr(data)
        self.mapping = defaultdict(list)
        for i, key in enumerate(data):
            self.mapping[key].append(i)

    def __len__(self):
        return len(self.values)
    
    def unique(self):
        return Arr(self.mapping.keys())

    def __getitem__(self, item):
        return self.values.__getitem__(item)    
    
    def __str__(self):
        return f"Index with values: {self.unique()}"

In [4]:
idx = Idx(list('abbabaacd'))
idx.mapping

defaultdict(list, {'a': [0, 3, 5, 6], 'b': [1, 2, 4], 'c': [7], 'd': [8]})

In [5]:
Arr

__main__.Arr

In [6]:
print (idx)

Index with values: Arr: 
['a', 'b', 'c', 'd']


# Series
A series is an object that has 3 things:
1. Array `Arr` of values
2. Index
3. Name

In [7]:
class Srs:
    def __init__(self, data, name=None, index=None):
        self.values = Arr(data)
        self.index = index or Idx(range(len(self.values)))
        assert len(self.values) == len(self.index)
        self.name = name or "new_series"

    def __len__(self):
        return len(self.values)

## Iterate Over a Series
When iterating over a series we want to iterate along with the index. We will `zip` them together.

In [8]:
class Srs(Srs):  
    def __iter__(self):
        return zip(self.index.values, self.values)
    
    def __str__(self):
        ret = f"{self.name}: \n"
        for idx, val in self: # This is now possible thanks to __iter__
            ret += str(idx).center(6)
            ret += str(val).center(6)
            ret += '\n'

        return ret

    __repr__ = __str__

In [9]:
idx = Idx(list('abcde'))
srs = Srs(range(5), 'foo', idx)
srs

foo: 
  a     0   
  b     1   
  c     2   
  d     3   
  e     4   

# Copying array behaviour with `__getattr__`
`__getattr__` is called if the attribute does not exist. [Example](Part%209%20-%20Minimals.ipynb#Get-Attr).  
if the method does not exist, we want to copy the behviour of the underlying series. The way to know if the underlying series has the method, is by calling `hasattr`.  
Both `getattr` and `setattr` are ways to call methods using strings.

In [10]:
class Srs(Srs):
    def __getattr__(self, item):
        if hasattr(self.values, item):
            return getattr(self.values, item)
        raise AttributeError

In [11]:
arr = Arr([10,5,2])
srs = Srs([10,5,2], 'foo')

In [12]:
arr.sum()

17

In [13]:
srs.sum()

17

In [14]:
# This will sadly not work
srs + srs

TypeError: unsupported operand type(s) for +: 'Srs' and 'Srs'

# Arithmetic

In [15]:
def series_apply(left, right, f_name):
    # Same index. We will not deal with not aligned concats
    f = getattr(left.values, f_name) # We now now gett so let's make it nicer
    res = f(right.values)
    return Srs(res, index=left.index)

class Srs(Srs):
    def __add__(self, other):
        return series_apply(self, other, '__add__')
    def __sub__(self, other):
        return series_apply(self, other, '__sub__')
    def __mul__(self, other):
        return series_apply(self, other, '__mul__')
    def __truediv__(self, other):
        return series_apply(self, other, '__truediv__')

In [16]:
s1 = Srs([1,2,3])
s2 = Srs([30,20,10])

In [18]:
print (s1+s2)
print (s1*s2)
print ((s1-s2).mean())
print ((s1/s2).sum())


new_series: 
  0     31  
  1     22  
  2     13  

new_series: 
  0     30  
  1     40  
  2     30  

-18.0
0.43333333333333335


# `getitem` by index location


In [19]:
def listify(val):
    if isinstance(val, list):
        return val
    else:
        return [val]

In [21]:
class Srs(Srs):
    def __getitem__(self, items):
        if items == slice(None):
            # To solve things like srs[:]
            return self
        items = listify(items)
        idx = []
        for item in items:
            assert item in self.index.mapping.keys(), f'{items} is not in the series index'
            idx.extend(self.index.mapping.get(item))

        idx = sorted(idx)
        vals = [self.values[i] for i in idx]
        new_index = [self.index[i] for i in idx] # This is a bit overhead, but will suffice for now
        return Srs(vals, name=self.name, index=Idx(new_index))

In [24]:
idx = Idx(list('abede'))
srs = Srs(range(5), 'bar', idx)
print(srs['e']) # Thanks to listify
print(srs[['e', 'a']])

bar: 
  e     2   
  e     4   

bar: 
  a     0   
  e     2   
  e     4   



# Exercise: Implement `Srs.value_counts()`
[`pd.value_count()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.value_counts.html)

In [34]:
# Ex
class Srs(Srs):
    def as_index(self):
        # Hint
        return Idx(self.values)

    def value_counts(self):
        # BOE
        idx = self.as_index()
        mapping = idx.mapping
        keys = []
        counts = []
        for key, indices in mapping.items():
            keys.append(key)
            counts.append(len(indices))
        ret = Srs(counts, name=f'{self.name} value counts', index=Idx(keys))
        return ret
        # EOE

In [35]:
vc = Srs('abbbbabdbedb', name='letters')

In [37]:
print(vc.value_counts())

letters value counts: 
  a     2   
  b     7   
  d     2   
  e     1   

