## Preliminaries

In [1]:
# import numpy as np
import pandas as pd
import mxnet
import matplotlib.pyplot as plt
import seaborn

import os

### 2.1.1 Getting Started

In [2]:
from mxnet import np, npx
# When using ndarray we almost always invoke the set_np function: 
# this is for compatibility of ndarray processing by other components of MXNet
npx.set_np()

In [3]:
x = np.arange(12)
x

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [4]:
x.shape

(12,)

In [5]:
x.size

12

In [6]:
np.empty((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [7]:
np.zeros((2,3,4)) # <- a tensor

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [8]:
np.ones((2,3,4))

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

When we construct arrays to serve as parameters in a neural network, we will typically initialize their values randomly.

In [9]:
np.random.normal(0,1, size = (3,4))

array([[ 1.1630787 ,  2.2122064 ,  0.4838046 ,  0.7740038 ],
       [ 0.29956347,  1.0434403 ,  0.15302546,  1.1839255 ],
       [-1.1688148 ,  1.8917114 ,  1.5580711 , -1.2347414 ]])

In [10]:
np.array([[2,1,4,3], [1,2,3,4], [4,3,2,1]])

array([[2., 1., 4., 3.],
       [1., 2., 3., 4.],
       [4., 3., 2., 1.]])

In [11]:
x = np.array([1,2,4,8])
y = np.array([2,2,2,2])
# In MXNet, common standard arithmetic operators have been lifted to ELEMENTWISE Operations
x + y, x - y, x * y, x / y, x ** y 

(array([ 3.,  4.,  6., 10.]),
 array([-1.,  0.,  2.,  6.]),
 array([ 2.,  4.,  8., 16.]),
 array([0.5, 1. , 2. , 4. ]),
 array([ 1.,  4., 16., 64.]))

#### Concatenating the arrays

In [12]:
x = np.arange(12).reshape(3,4)
y = np.array([[2,1,4,3], [1,2,3,4], [4,3,2,1]])

np.concatenate([x,y], axis = 0), np.concatenate([x,y], axis = 1)

(array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]]),
 array([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
        [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
        [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [13]:
x.sum()

array(66.)

In [14]:
np.sum(x)

array(66.)

### 2.1.3 Broadcasting Mechanism

In [15]:
a = np.arange(3).reshape(3,1)
b = np.arange(2).reshape(1,2)
a,b

(array([[0.],
        [1.],
        [2.]]),
 array([[0., 1.]]))

In [16]:
a + b

array([[0., 1.],
       [1., 2.],
       [2., 3.]])

Since a and b are 3 × 1 and 1 × 2 matrices respectively, their shapes do not match up if we want to add them. We broadcast the entries of both matrices into a larger 3 × 2 matrix as follows: for matrix a it replicates the columns and for matrix b it replicates the rows before adding up both elementwise.

In [17]:
x

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

### 2.1.4 Indexing and Slicing

In [18]:
x[-1], x[1:3]

(array([ 8.,  9., 10., 11.]),
 array([[ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]]))

In [19]:
x[1,2] = 9

If we want to assign multiple elements the same value, we simply index all of them and then assign them the value. For instance, [0:2, :] accesses the first and second rows, where : takes all the elements along axis 1 (column). While we discussed indexing for matrices, this obviously also works for vectors and for tensors of more than 2 dimensions.

In [20]:
x[0:2, :] = 12

### 2.1.5 Saving Memory

In [21]:
before = id(y)
y = y + x
id(y) == before

False

So, how can we save the memory? In MXNet it is easy:

In [22]:
z = np.zeros_like(y)
print(f'id(z): {id(z)}')
z[:] = x + y
print(f'id(z): {id(z)}')

id(z): 5042467696
id(z): 5042467696


### 2.1.6 Conversion to Other Python Objects

In [23]:
a = x.asnumpy()
b = np.array(a)
type(a), type(b)

(numpy.ndarray, mxnet.numpy.ndarray)

In [24]:
t1 = np.ones((2,3,4))
t2 = np.ones((2,3,1))

t2 += 3

In [25]:
t1, t2

(array([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],
 
        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]]),
 array([[[4.],
         [4.],
         [4.]],
 
        [[4.],
         [4.],
         [4.]]]))

In [26]:
t1 + t2

array([[[5., 5., 5., 5.],
        [5., 5., 5., 5.],
        [5., 5., 5., 5.]],

       [[5., 5., 5., 5.],
        [5., 5., 5., 5.],
        [5., 5., 5., 5.]]])

## 2.2 Data Preprocessing

In [27]:
def mkdir_if_not_exist(path):
    if not isinstance(path, str):
        path = os.path.join(*path)
    if not os.path.exists(path):
        os.makedirs(path)

In [28]:
data_file = '../data/house_tiny.csv'
mkdir_if_not_exist('../data')

In [29]:
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')
    f.write('NA,Pave,127500\n')
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [30]:
data = pd.read_csv(data_file)

In [31]:
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100
3       NaN   NaN  140000


In [32]:
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]

In [33]:
inputs = inputs.fillna(inputs.mean())

In [34]:
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN


For categorical or discrete values in inputs, we consider “NaN” as a category

In [35]:
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1


### 2.2.3 Conversion to the `ndarray` format

In [36]:
X, y = np.array(inputs.values), np.array(outputs.values)
X,y

(array([[3., 1., 0.],
        [2., 0., 1.],
        [4., 0., 1.],
        [3., 0., 1.]], dtype=float64),
 array([127500, 106000, 178100, 140000], dtype=int64))

#### Exercise 1 and 2 

In [37]:
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,No.Bedrooms,Sq.M,Year,Price\n')
    f.write('NA,Pave,4,120,2008,127500\n')
    f.write('2,NA,3,100,2007,106000\n')
    f.write('4,NA,6,220,2009,178100\n')
    f.write('NA,NA,3,105,2009,121000\n')
    f.write('NA,NA,2,95,NA,110000\n')
    f.write('8,Gravel,7,450,NA,240000\n')
    f.write('9,NA,9,1000,2016,840000\n')

In [38]:
data = pd.read_csv(data_file)

In [39]:
data

Unnamed: 0,NumRooms,Alley,No.Bedrooms,Sq.M,Year,Price
0,,Pave,4,120,2008.0,127500
1,2.0,,3,100,2007.0,106000
2,4.0,,6,220,2009.0,178100
3,,,3,105,2009.0,121000
4,,,2,95,,110000
5,8.0,Gravel,7,450,,240000
6,9.0,,9,1000,2016.0,840000


In [40]:
inputs, outputs = data.iloc[:, 0:5], data.iloc[:, 5]

In [41]:
inputs.dropna(thresh = 4, axis = 1, inplace = True)
inputs = inputs.fillna(inputs.mean())
inputs.loc[inputs['Year'] == 2009.8, 'Year'] = 2010
X,y = np.array(inputs.values), np.array(outputs.values)

## 2.3 Linear Algebra

In [42]:
A = np.arange(20).reshape(5,4)
A

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.],
       [12., 13., 14., 15.],
       [16., 17., 18., 19.]])

In [43]:
A.T

array([[ 0.,  4.,  8., 12., 16.],
       [ 1.,  5.,  9., 13., 17.],
       [ 2.,  6., 10., 14., 18.],
       [ 3.,  7., 11., 15., 19.]])

In [44]:
# symmetric matrix
B = np.array([[1,2,3], [2,0,4], [3,4,5]])
B

array([[1., 2., 3.],
       [2., 0., 4.],
       [3., 4., 5.]])

In [45]:
B == B.T

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

#### 2.3.5 Basic Properties of Tensor Arithmetic

In [46]:
A = np.arange(20).reshape(5,4)
B = A.copy()
A, A + B

(array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]]),
 array([[ 0.,  2.,  4.,  6.],
        [ 8., 10., 12., 14.],
        [16., 18., 20., 22.],
        [24., 26., 28., 30.],
        [32., 34., 36., 38.]]))

Any elementwise unary operation does not change the shape of its operand.

Elementwise multiplication of two matrices is called their *Hadamard product*

In [47]:
A * B

array([[  0.,   1.,   4.,   9.],
       [ 16.,  25.,  36.,  49.],
       [ 64.,  81., 100., 121.],
       [144., 169., 196., 225.],
       [256., 289., 324., 361.]])

In [48]:
A.sum(axis = 0)

array([40., 45., 50., 55.])

In [49]:
A

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.],
       [12., 13., 14., 15.],
       [16., 17., 18., 19.]])

In [50]:
A.mean(axis = 0)

array([ 8.,  9., 10., 11.])

#### Non-Reduction Sum

In [51]:
sum_A = A.sum(axis = 1, keepdims=True)

In [52]:
sum_A

array([[ 6.],
       [22.],
       [38.],
       [54.],
       [70.]])

In [53]:
A,A.cumsum(axis = 0)

(array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]]),
 array([[ 0.,  1.,  2.,  3.],
        [ 4.,  6.,  8., 10.],
        [12., 15., 18., 21.],
        [24., 28., 32., 36.],
        [40., 45., 50., 55.]]))

#### 2.3.7 Dot Products

Dot product of two vectors **x** and **y** is a **sum over the products of the elements at the same position**.

In [54]:
x = np.array([0.,1.,2.,3.])
y = np.ones(4)

x, y, np.dot(x,y)

(array([0., 1., 2., 3.]), array([1., 1., 1., 1.]), array(6.))

In [55]:
# Equivalent expression
np.sum(x * y)

array(6.)

In [56]:
A.shape, x.shape, np.dot(A, x)

((5, 4), (4,), array([ 14.,  38.,  62.,  86., 110.]))

In [57]:
B = np.ones(shape = (4,3))

In [58]:
np.dot(A,B)

array([[ 6.,  6.,  6.],
       [22., 22., 22.],
       [38., 38., 38.],
       [54., 54., 54.],
       [70., 70., 70.]])

In [59]:
A,B

(array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]]),
 array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]))

#### 2.3.10 Norms

In [60]:
u = np.array([3, -4])
np.linalg.norm(u)

array(5.)

In [61]:
np.abs(u).sum()

array(7.)

Frobenius norm (matrix norm)

In [62]:
np.linalg.norm(np.ones((4,9)))

array(6.)

In [63]:
np.ones((4,9))

array([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [64]:
A.T.T

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.],
       [12., 13., 14., 15.],
       [16., 17., 18., 19.]])

Given two matrices **A** and **B**, show that the sum of transposes is equal to the transpose of a sum

i.e. A.T + B.T = (A + B).T

In [65]:
B = A.copy()

In [66]:
print(f"Sum of transposes: \n{A.T + B.T}")
print('\n')
print(f"Shape of the matrices: {A.shape, B.shape} \n")
print(f"Transpose of sum: \n{(A + B).T}")
print(f"A.T + B.T = (A + B).T == {A.T + B.T == (A + B).T}")

Sum of transposes: 
[[ 0.  8. 16. 24. 32.]
 [ 2. 10. 18. 26. 34.]
 [ 4. 12. 20. 28. 36.]
 [ 6. 14. 22. 30. 38.]]


Shape of the matrices: ((5, 4), (5, 4)) 

Transpose of sum: 
[[ 0.  8. 16. 24. 32.]
 [ 2. 10. 18. 26. 34.]
 [ 4. 12. 20. 28. 36.]
 [ 6. 14. 22. 30. 38.]]
A.T + B.T = (A + B).T == [[ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True  True  True  True  True]]


In [67]:
sq_mat = np.array([[1,2,3], [2,0,4], [3,4,5]])
sq_mat

array([[1., 2., 3.],
       [2., 0., 4.],
       [3., 4., 5.]])

Given any square matrix A, is A + A⊤ always symmetric? Why?

Because a transposed square matrix is always going to be the same. A == A.T in this case. So, here, A + A.T is equal to A + A

In [68]:
sq_mat == sq_mat.T

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [69]:
sq_mat + sq_mat.T

array([[ 2.,  4.,  6.],
       [ 4.,  0.,  8.],
       [ 6.,  8., 10.]])

In [229]:
sq_mat + sq_mat

array([[ 2.,  4.,  6.],
       [ 4.,  0.,  8.],
       [ 6.,  8., 10.]])