## Q1.

Add a __setitem__ to the python linked list implementation from the lecture (this past wednesday).

In the following code, I added a __setitem__ method and I also added some simple doctest examples to test the method. 

In [6]:
from doctest import run_docstring_examples as dtest
import numbers
import reprlib
class LL:
    """
    >>> A = LL()  
    >>> A[0]
    Traceback (most recent call last):
        ...
    IndexError: trying to index an empty LL
    >>> A.insert_front(1)
    >>> A[0]
    1
    >>> A.insert_back(2)
    >>> A[1]
    2
    >>> A
    LL([1,...])
    >>> myll = LL.from_components([1,2])
    >>> myll[1]
    1
    >>> len(myll)
    2
    >>> myll[2]
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1]
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    >>> myll[0] = 3
    >>> myll[0]
    3
    >>> myll[1] = 4
    >>> myll[1]
    4
    >>> myll[2] = 5
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1] = 6
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    """
    @classmethod
    def from_components(cls, components):
        inst = cls(components[0])
        for c in components[1:]:
            inst.insert_front(c)
        return inst
        
    def __init__(self, head=None):
        if head is None:
            self._headNode = None
        else:
            self._headNode = [head, None]
            
    def insert_front(self, element):
        new_node = [element, None]
        new_node[1] = self._headNode
        self._headNode = new_node
        
    def insert_back(self, element):
        new_node = [element, None]
        curr_ptr = self._headNode
        while curr_ptr[1] is not None:
            curr_ptr = curr_ptr[1]
        curr_ptr[1]= new_node
        
    def __repr__(self):
        class_name = type(self).__name__
        if len(self)==0:
            components=""
        else:
            components = reprlib.repr(self[0])
        return '{}([{},...])'.format(class_name,components)


    def __len__(self):
        curr_ptr = self._headNode
        count = 0
        if curr_ptr==None:
            return 0
        while 1:
            count = count + 1
            if curr_ptr[1] is None:
                break
            curr_ptr = curr_ptr[1]
        return count    
    
    def __getitem__(self, index):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count:
                    return curr_ptr[0]
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
            
    def __setitem__(self, index, value):
        if isinstance(index, numbers.Integral):
            curr_ptr = self._headNode
            cnt = 0
            while curr_ptr[1] is not None and cnt < index:
                curr_ptr = curr_ptr[1]
                cnt += 1
            if cnt == index:
                curr_ptr[0] = value
            else:
                raise IndexError('LL index out of range')
        else:
            raise TypeError('LL indices must be integers')

In [9]:
dtest(LL, globals(), verbose = True)

Finding tests in NoName
Trying:
    A = LL()  
Expecting nothing
ok
Trying:
    A[0]
Expecting:
    Traceback (most recent call last):
        ...
    IndexError: trying to index an empty LL
ok
Trying:
    A.insert_front(1)
Expecting nothing
ok
Trying:
    A[0]
Expecting:
    1
ok
Trying:
    A.insert_back(2)
Expecting nothing
ok
Trying:
    A[1]
Expecting:
    2
ok
Trying:
    A
Expecting:
    LL([1,...])
ok
Trying:
    myll = LL.from_components([1,2])
Expecting nothing
ok
Trying:
    myll[1]
Expecting:
    1
ok
Trying:
    len(myll)
Expecting:
    2
ok
Trying:
    myll[2]
Expecting:
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
ok
Trying:
    myll[0:1]
Expecting:
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
ok
Trying:
    myll[0] = 3
Expecting nothing
ok
Trying:
    myll[0]
Expecting:
    3
ok
Trying:
    myll[1] = 4
Expecting nothing
ok
Trying:
    myll[1]
Expecting:
    4
ok
Trying:
    myll

## Q2.

An online mean and standard deviation algorithm.

Below is a function to generate a potentially infinite stream of 1-D data.

In [35]:
from random import normalvariate, random
from itertools import count
def make_data(m, stop=None):
    for _ in count():
        if stop and _ > stop:
            break
        yield 1.0e09 + normalvariate(0, m*random() )
        

Here is an implementation of an online mean algorithm..see http://www.johndcook.com/blog/standard_deviation/ and the link to http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ in-between. (Convince yourselves of the formulas...)

In [36]:
def online_mean(iterator):
    n = 0
    mu = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        yield mu

We use out generator functions to implement iterators:

In [37]:
g = make_data(5, 10)
list(g)

[1000000001.3387809,
 999999997.7676711,
 999999994.4817712,
 999999995.3750169,
 999999998.5171537,
 1000000000.200478,
 1000000001.396329,
 1000000000.9860353,
 999999991.3612254,
 1000000002.5616441,
 999999998.2833676]

In [38]:
g = online_mean(make_data(5, 100))
print(type(g))
list(g)

<class 'generator'>


[999999999.9980669,
 999999999.3561436,
 999999999.1999214,
 999999998.5204223,
 999999998.0037779,
 999999998.3088945,
 999999999.6059519,
 999999999.3397826,
 999999999.4964558,
 999999999.7687683,
 1000000000.0368434,
 1000000000.2324959,
 1000000000.3356546,
 1000000000.3643652,
 1000000000.2801886,
 1000000000.3053945,
 1000000000.1235442,
 1000000000.3862027,
 1000000000.3581358,
 1000000000.4920787,
 1000000000.3572879,
 1000000000.2109491,
 1000000000.1541876,
 1000000000.1481762,
 1000000000.418504,
 1000000000.4350363,
 1000000000.3886176,
 1000000000.4172977,
 1000000000.3899544,
 1000000000.3843002,
 1000000000.3659649,
 1000000000.3574309,
 1000000000.199853,
 1000000000.2845091,
 1000000000.2304503,
 1000000000.2061335,
 1000000000.178766,
 1000000000.225028,
 1000000000.3758626,
 1000000000.3974831,
 1000000000.4282348,
 1000000000.4050058,
 1000000000.4093843,
 1000000000.293729,
 1000000000.3169991,
 1000000000.3109092,
 1000000000.2863685,
 1000000000.2850714,
 100000

### 2.1

Implement the standard deviation algorithm as a generator function as

```python
def online_mean_dev(iterator):
    BLA BLA
    if n > 1:
        stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)
```

In [56]:
# your code here
import math
def online_mean_dev(iterator):
    n = 0
    mu = 0
    dev = 0
    for value in iterator:
        n += 1
        old_mu = mu
        mu = mu + (value - mu)/n
        dev = dev + (value - mu) * (value - old_mu)
        if n > 1:
            stddev = math.sqrt(dev/(n - 1))
            yield (n, value, mu, stddev)

Here we make 100000 element data, and run this iterator on it (imagine running this on a time-series being slowly read from disk

In [57]:
data_with_stats = online_mean_dev(make_data(5, 100000))

## Q3.

Let's do Anomaly detection. Write a routine `is_ok`:

```python
def is_ok(level, t)
```

which takes a tuple like the one yielded by your code above and returns True if the value is in between `level`-$\sigma$ of the mean.

In [58]:
def is_ok(level, t):
    if abs(t[1] - t[2]) < t[3] * level:
        return True 
    else:
        return False

We use this function to create a predicate passed through to `itertools.filterfalse` which is then used to obtain an iterator on the anomalies.

In [59]:
from itertools import filterfalse
pred = lambda t: is_ok(5, t)
anomalies = filterfalse(pred, data_with_stats)

We materialize the anomalies...

In [60]:
list(anomalies)#materialize

[(4768, 999999984.68036, 1000000000.0127765, 2.922086981522264),
 (7047, 1000000014.8247367, 1000000000.0463713, 2.93124597705474),
 (10392, 1000000015.3078771, 1000000000.040401, 2.935966304016386),
 (12574, 999999985.1463597, 1000000000.0132565, 2.9233181790765195),
 (14209, 999999985.1987454, 1000000000.0058945, 2.9207681467643574),
 (15722, 999999983.1718143, 1000000000.007707, 2.920155027230007),
 (21022, 999999985.2008264, 1000000000.0056747, 2.9182422791432208),
 (22261, 1000000015.7997742, 1000000000.0023355, 2.9128400720095846),
 (23794, 1000000014.9619192, 1000000000.0049596, 2.903872410625253),
 (29934, 999999982.4327077, 1000000000.0037544, 2.908835621319776),
 (33823, 1000000017.2040555, 999999999.9990014, 2.8977439893141375),
 (36910, 1000000014.5597478, 999999999.9988799, 2.8995012663531803),
 (40798, 1000000014.791176, 1000000000.0010875, 2.896351770421046),
 (45604, 1000000015.711511, 999999999.9922448, 2.89439541843344),
 (46343, 999999985.0274818, 999999999.9917256, 

## To think of, but not hand in

What kinds of anomalies will this algorithm pick up? What kinds would a shorter "window" of anomaly detection, like 100 points around the time in question pick? How might you create an algorithm which does window based averaging? (hint: the window size is small compared to the time series size). 

Finally think a bit of how you might implement all of this in a production environment..remember that data streaming in might get backed up when you handle an anomaly.

(Some inspiration might accrue if you look at the docs for `collections.deque`).