In [35]:
# Initialize Otter
import otter
grader = otter.Notebook("ps5.ipynb")

### Question 1
In this question you will implement a class called `Matrix`. A matrix will be modeled as a collection of vectors all of the same dimension, and you may use the `Vector` class that we explored in class (included with this problem set as `vector.py`) to implement it:

In [36]:
from vector import Vector

In [37]:
class Matrix:
    '''
    Matrix class
    '''


    def __init__(self, *args) -> None:
        '''
        constructor with args
        '''
        from vector import Vector

        if not args:
            raise ValueError("Matrix should require at least one vector for initialization")
        
        if not all(isinstance(arg, Vector) for arg in args):
            raise TypeError("Input arguments should be vectors")
        
        if not all(args[0].dim==arg.dim for arg in args):
            raise ValueError("Vectors should all have the same dimension")
        self._val = tuple(args)

    def __str__(self) -> str:
        if self.dims[1] == 1:
            return "\n".join('('+str(v)+')' for v in self._val[0])
        return "\n".join(map(str, zip(*self._val)))
    
    def __repr__(self) -> str:
        return str(self)
        
    def __iter__(self):
        return iter(self._val)
    
    def __add__(self, other):
        #if not isinstance(other, Matrix):
        #   return NotImplemented
        
        if not self._conformable(other):
            raise ValueError("Cannot add matrices of different dimensions")
        return Matrix(*[a + b for a, b in zip(self, other)])

    def __mul__(self, other):
        import numbers
        if not isinstance(other, numbers.Number):
            return NotImplemented
        return Matrix(*(a * other for a in self._val))
    
    def __rmul__(self, other):
        return self.__mul__(other)
    
    
    def __radd__(self, other):
        return self.__add__(other)

    def __getitem__(self, items):
        if (len(items)==2)and all(isinstance(i, int) for i in items) and 0<=items[0]<self.dims[0] and 0<=items[1]<self.dims[1]:
                return self._val[items[1]][items[0]]
        else:
            raise IndexError("Illegal Index")
    
    def __matmul__(self, other):
        from vector import Vector
        if not self._conformable_mat(other):
            raise ValueError("Incompatible matrix dimensions")

        newcols = []
        for col_j in other:
            newcol_j = [0.0]*self.dims[0]
            for row_i in range(self.dims[0]):
                newcol_j[row_i] = sum(ik * kj for ik, kj in zip((self[row_i, k] for k in range(self.dims[1])), col_j))
            
            newcols.append(Vector(*newcol_j))

        return Matrix(*newcols)
    
    def __eq__(self, __value: object) -> bool:
        if self._conformable(__value):
            return all((s == v  for s,v in zip(self._val, __value._val)))
        return False

    @property
    def dims(self) -> tuple:
        return (self._val[0].dim, len(self._val))

    def _conformable(self, other) -> bool :
        return isinstance(other, Matrix) and self.dims == other.dims
    
    def _conformable_mat(self, other) -> bool :
        '''conformable 4 matmul'''
        return isinstance(other, Matrix) and self.dims[1] == other.dims[0]
    
    def transpose(self):
        from vector import Vector
        return Matrix(*map(lambda v: Vector(*v), zip(*self._val)))
    

**1(a)** (1 pt) The constructor of `Matrix` should accept one or more `Vector` objects, which will represent the *columns* of the matrix.

```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(5, 6, 7)
>>> v3 = Vector(8, 9, 10)
>>> Matrix(v1)
<__main__.Matrix at 0x10efbabb0>
>>> Matrix(v1, v2)
<__main__.Matrix at 0x117023640>
>>> Matrix(v1, v2, v3)
<__main__.Matrix at 0x117023040>
```

Additionally, it should perform the following checks:

1.  It should verify that each argument is of the correct type:
    ```
    >>> Matrix(1, "", v1)
    TypeError: Input arguments should be vectors
    ```
    
2.  It should verify that each argument has the same dimension.

    ```
    >>> v1 = Vector(1, 2, 3)
    >>> v2 = Vector(5, 6)
    >>> Matrix(v1, v2)
    ValueError: Vectors should all have the same dimension
    ```

In [38]:
grader.check("1a")

**1(b)** (2 pts) Calling `str` on a `Matrix` object should produce a nice looking text representation:

```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> v3 = Vector(7, 8, 9)
>>> M = Matrix(v1, v2, v3)
>>> print(M)
(1, 4, 7)
(2, 5, 8)
(3, 6, 9)
```

In [39]:
grader.check("q1b")

**1(c)** (1 pt.) A matrix should have a attribute `dims` which returns a tuple of its dimensions (number of rows and columns):
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> M.dims
(3, 2)
```

In [40]:
grader.check("q1c")

**1(d)** (1 pt.) A matrix should be iterable. Iterating over the matrix should return each of its columns as vectors.
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> for col in M: print(col)
(1, 2, 3)
(4, 5, 6)
```

In [41]:
grader.check("q1d")

**1(e)** (2 pts) Two matrices of the same dimensions can be added together. 
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> print(M + M)
(2, 8)
(4, 10)
(6, 12)
```
However, matrices of different dimensions cannot be added together:
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> Matrix(v1) + Matrix(v1, v2)
ValueError: Cannot add matrices of different dimensions
```

In [42]:
grader.check("q1e")

**1(f)** (2 pts) Matrices can be multipled by a numerical constant:
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> print(M * 2.5)
(2.5, 10.0)
(5.0, 12.5)
(7.5, 15.0)
>>> print("a" * M)
TypeError: ...
```

In [43]:
grader.check("q1f")

**1(g)** (2 pts) Individual entries of a matrix can be accessed using the notation `M[i,j]`:
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> print(M)
>>> M[0, 0]
1
>>> M[2, 1]
6
>>> M[3, 3]
IndexError: ...
```

In [44]:
grader.check("q1g")

**1(h)** (3 pts) A matrix can be transposed:
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> print(M)
(1, 4)
(2, 5)
(3, 6)
>>> print(M.transpose())
(1, 2, 3)
(4, 5, 6)
```

In [45]:
grader.check("q1h")

**1(i)** (2 pts) Two matrices of conformable dimensions can be multiplied using the matrix multiplication operator (`@`). Hint: which dunder method should you implement for this symbol?
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> print(M @ M.transpose())
(17, 22, 27)
(22, 29, 36)
(27, 36, 45)
>>> M @ M
ValueError: Incompatible matrix dimensions
```

In [46]:
grader.check("q1i")

**1(j)** (2 pts) Matrices can be tested for equality:
```
>>> v1 = Vector(1, 2, 3)
>>> v2 = Vector(4, 5, 6)
>>> M = Matrix(v1, v2)
>>> M == M
True
>>> M == Matrix(v1)
False
>>> M == 1.0
False
>>> M == "matrix"
False
```

In [47]:
grader.check("q1j")

### Question 2
This question will use the MapReduce concepts for data processing.

In [48]:
from functools import reduce
import itertools
import gzip


class MapReduce:
    @property
    def reduce_init(self):
        # override as necessary if the init parameter needs to change
        return None

    def mapper(self, x):
        raise NotImplementedError()

    def reducer(self, accum, x):
        raise NotImplementedError()

    def postprocess(self, reduced):
        # override if necessary
        return reduced

    def run(self, iterable):
        mapped = map(self.mapper, iterable)
        reduced = reduce(self.reducer, mapped, self.reduce_init)
        processed = self.postprocess(reduced)
        return processed

(To make things more flexible, we have also added an optional `.postprocess()` method that can be used to do additional processing after the reduction step.)

Questions 2(a) & 2(b) concern the Enron dataset:

In [49]:
def enron(n=None):
    i1 = gzip.open("email-Enron.txt.gz", "rt")
    i2 = itertools.islice(i1, 4, None)  # slice off header
    return itertools.islice(i2, n)


For each question below, implement a subclass of MapReduce such that calling `.run(enron(n))` produces the desired output. For example, if the question asked you to calculate the total number of e-mails, your solution could be:

In [50]:
class NumEmails(MapReduce):
    @property
    def reduce_init(self):
        return 0

    def mapper(self, x):
        return 1

    def reducer(self, accum, x):
        return accum + x


NumEmails().run(enron(100))

100

**2(a)** (3 pts) Define a user's *importance* to be the number of unique people who e-mailed them (not including themself). Write a MapReduce class that returns a `collections.Counter` mapping each user ID to their importance when run.

In [51]:
class Importance(MapReduce):
    '''
    importance = the number of unique people who e-mailed them (not including themself).
    '''
    @property
    def reduce_init(self):
        from collections import defaultdict
        return defaultdict(set)

    def mapper(self, x):
        return tuple(map(int, x.split()))


    def reducer(self, accum, x):
        if x[0] != x[1]:
            accum[x[1]].add(x[0]) 
        return accum

    def postprocess(self, reduced):
        from collections import Counter

        return Counter({k: len(v) for k, v in reduced.items()})

In [52]:
grader.check("q2a")

**2(b)** (4 pts) Define a user's *forgetfulness* to be the number of times they e-mailed themself. Write a MapReduce class that returns a `Counter` that maps each user who e-mailed themself at least once to their forgetfulness score.

In [53]:
class Forgetful(MapReduce):
    '''Forgetful = the number of times they e-mailed themself'''
    @property
    def reduce_init(self):
        return dict()

    def mapper(self, x):
        nodes = tuple(map(int, x.split()))
        return (nodes[0]==nodes[1], *nodes)

    def reducer(self, accum, x):
        '''x = (flag, send, receiver)'''
        if x[0]:
            accum[x[1]] = accum.get(x[1], 0)+1
        return accum

    def postprocess(self, reduced):
        from collections import Counter

        return Counter(reduced)

In [54]:
grader.check("q2b")

## Submission

Make sure you have run all cells in your notebook in order before running the cell below, so that all images/graphs appear in the output. The cell below will generate a zip file for you to submit. **Please save before exporting!**

Upload this .zip file to Gradescope for grading.

In [55]:
# Save your notebook first, then run this cell to export your submission.
grader.export(pdf=False)