# vector arithmetic
- unlike languages like Matlab, Mathematica and C++, Python does not provide
vector 'arithmetic', which is extremely useful in:
    - machine learning
    - statistics 
    - big data
    - parallel processing
    - science and engineering in general


In [2]:
# first time i saw this i was surprised
# expected to get [3,6,9]

[1,2,3]*3

[1, 2, 3, 1, 2, 3, 1, 2, 3]

In [3]:
# this doesn't work at all,
# i expected [4,10,18]

[1,2,3]*[4,5,6]

TypeError: can't multiply sequence by non-int of type 'list'

In [4]:
# this doesn't work either

import math

math.sin([1.,2.,3.])

TypeError: a float is required

In [5]:
#  more concatenating
# expected [5,7,9]

[1,2,3]+[4,5,6]

[1, 2, 3, 4, 5, 6]

# numpy - numerical python
- almost all packages and people dealing with non trivial amounts of data use numpy arrays
- much more time and space efficient than python lists 
- provides vector arithmetic
- many convenient methods for modifiying arrays
- usually elements of a numpy array are all the same type
- array elements are "unboxed"
- array indexing techniques are almost identical to Matlab, but:
    - Python 1st index is 0
    - Matlab 1st index is 1


In [6]:
# builtin to anaconda

from numpy import *


In [7]:
# note a.nbytes = 24 = 3 * 8
# no per float overhead

a= array([1.,2.,3.])
b= array([4.,5.,6.])
[a, type(a), a.size, a.shape, a.itemsize, a.dtype, a.nbytes]

[array([ 1.,  2.,  3.]), numpy.ndarray, 3, (3,), 8, dtype('float64'), 24]

In [8]:
a

array([ 1.,  2.,  3.])

In [9]:
b

array([ 4.,  5.,  6.])

In [10]:
# vector arithmetic - numpy objects define __add__, __mul__, etc

[a + b, a-b, a*b, b/a, a+10, a*10, a.min(), a.max(), a.mean(), a.dot(b)]

[array([ 5.,  7.,  9.]),
 array([-3., -3., -3.]),
 array([  4.,  10.,  18.]),
 array([ 4. ,  2.5,  2. ]),
 array([ 11.,  12.,  13.]),
 array([ 10.,  20.,  30.]),
 1.0,
 3.0,
 2.0,
 32.0]

In [11]:
# vector functions
# numpy.sin acts on each element of a numpy array, and returns a new array

ls=linspace(0, 6.28, 10)
[ls, sin(ls)]

[array([ 0.        ,  0.69777778,  1.39555556,  2.09333333,  2.79111111,
         3.48888889,  4.18666667,  4.88444444,  5.58222222,  6.28      ]),
 array([ 0.        ,  0.64251645,  0.98468459,  0.8665558 ,  0.34335012,
        -0.34035671, -0.86496168, -0.98523494, -0.644954  , -0.0031853 ])]

In [12]:
# make arrays

a = array([1,2,3])
b = array([[1,2],[3,4]])
[a, type(a),b]

[array([1, 2, 3]), numpy.ndarray, array([[1, 2],
        [3, 4]])]

In [13]:
# copy an array 

d = b.copy()
e = b.copy()

[b, d, b is d]

[array([[1, 2],
        [3, 4]]), array([[1, 2],
        [3, 4]]), False]

In [14]:
# element by element compare

d[1,1] = 40

b == d


array([[ True,  True],
       [ True, False]], dtype=bool)

In [15]:
# compare all elements

[array_equal(b, d), array_equal(b, e)]

[False, True]

In [16]:
# make a square matrix

def sqmat(n):
    # make 1D matrix
    a=array(range(n*n))
    # reshape to 2d
    s = a.reshape(n,n)
    return(s)

sqmat(4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [17]:
sqmat(4).transpose()

array([[ 0,  4,  8, 12],
       [ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15]])

In [18]:
# other ways to make matrix
# familiar from matlab

zs = zeros((3,3))
zs

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [19]:
zs[1,1] = 1
zs

array([[ 0.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  0.]])

In [20]:
ones((2,2))

array([[ 1.,  1.],
       [ 1.,  1.]])

In [21]:
# 2d slices

f = sqmat(5)
f

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [22]:
# pull out a sub matrix

f[2:4, 2:]

array([[12, 13, 14],
       [17, 18, 19]])

In [23]:
# iterate by row

for r in f:
    print(r)

[0 1 2 3 4]
[5 6 7 8 9]
[10 11 12 13 14]
[15 16 17 18 19]
[20 21 22 23 24]


In [24]:
# iterate by element

for e in f.flat:
    print(e)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


# Reshaping arrays
- often very useful to change the 'view' or 'interpretation' of an array


In [25]:
a = array(range(12))
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [26]:
b,c = a.reshape((4,3)), a.reshape((2,6))
b

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [27]:
c

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [28]:
# not the same object

[a is b , a is c]

[False, False]

In [29]:
# but...

a[0] = 55
a

array([55,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [30]:
b

array([[55,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [31]:
# a,b,c are looking at the SAME chunk of data
# so reshape is a cheap operation - it doesn't copy the data

c

array([[55,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [32]:
# the transpose is being done by swapping indexes,
# not moving data

b.transpose()

array([[55,  3,  6,  9],
       [ 1,  4,  7, 10],
       [ 2,  5,  8, 11]])

In [33]:
# raw data is unchanged by transpose

a

array([55,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [34]:
c

array([[55,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

# Joining arrays

In [35]:
f

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [36]:
vstack((f,f))

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [37]:
hstack((f,f))

array([[ 0,  1,  2,  3,  4,  0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 20, 21, 22, 23, 24]])

# Spliting arrays

In [38]:
g = sqmat(3)
g

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [39]:
# split into column arrays

hsplit(g,3)

[array([[0],
        [3],
        [6]]), array([[1],
        [4],
        [7]]), array([[2],
        [5],
        [8]])]

In [40]:
# split into row arrays

vsplit(g, 3)

[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]

In [41]:
# range only accepts ints

range(5.8, 10.2)

TypeError: 'float' object cannot be interpreted as an integer

In [42]:
# arange takes floats and makes a numpy array

m = array(arange(5.7,1,-.7))
m

array([ 5.7,  5. ,  4.3,  3.6,  2.9,  2.2,  1.5])

In [43]:
# list index

m[ [1, 3, 2] ]

array([ 5. ,  3.6,  4.3])

In [44]:
# make a boolean array

b = m > 4.2
b

array([ True,  True,  True, False, False, False, False], dtype=bool)

In [45]:
# boolean indexing - pick out elements that are true

m[b]

array([ 5.7,  5. ,  4.3])

# Numpy also has a 'matrix' type, in the linear algebra sense
- many functions available
    - inverse
    - equation solving
    - eigenvalues and eigenvectors

In [46]:
a = mat("2 4 6;4 2 6;10 -4 18")
a

matrix([[ 2,  4,  6],
        [ 4,  2,  6],
        [10, -4, 18]])

In [47]:
ai = linalg.inv(a)
ai

matrix([[-0.41666667,  0.66666667, -0.08333333],
        [ 0.08333333,  0.16666667, -0.08333333],
        [ 0.25      , -0.33333333,  0.08333333]])

In [48]:
im = ai * a
im

matrix([[  1.00000000e+00,   1.11022302e-16,  -2.22044605e-16],
        [ -2.22044605e-16,   1.00000000e+00,  -2.22044605e-16],
        [  0.00000000e+00,   5.55111512e-17,   1.00000000e+00]])

In [49]:
im.shape


(3, 3)

In [50]:
# clean up floating point noise

import math

for row in range(im.shape[0]):
    for col in range(im.shape[1]):
        if math.fabs(im[row,col]) < .00001:
            im[row, col] = 0.0
im

matrix([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.]])