# 1. Numpy

In [1]:
import numpy as np

In [2]:
import warnings 
warnings.filterwarnings('ignore')

### 1.1 Creating Arrays

In [44]:
'''Avoid object datatypes or unwanted datatypes'''
reg_python_list = ['a',True,3,4,5]

x = np.array(reg_python_list)
x

array(['a', 'True', '3', '4', '5'], 
      dtype='|S4')

In [45]:
reg_python_list = [1,2,3,4,5]

x = np.array(reg_python_list)
x

array([1, 2, 3, 4, 5])

In [46]:
y = np.array([6,7,8,9,10])
y

array([ 6,  7,  8,  9, 10])

In [47]:
'''You can make multi-dimensional arrays.'''
reg_list1 = [1,2,3]
reg_list2 = [4,5,6]

z = np.array([reg_list1, reg_list2])
z

array([[1, 2, 3],
       [4, 5, 6]])

In [7]:
'''Similarly, you can make multi-dimensional arrays with other arrays'''
z = np.array([x,y])
z

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [7]:
'''Repeat arrays by multiplying lists (or arrays)'''
np.array([1,2,3]*3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [8]:
'''Repeat elements in arrays into another array'''
np.repeat([1,2,3],3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [9]:
'''numpy's arange returns evenly spaced values by a interval size'''
n = np.arange(0,30,2)
n

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [11]:
'''
numpy's linspace lets you decide how many points you want. 
NOTE: linspace hits the last value in the range while arange doesn't.
'''
o = np.linspace(0,30,16)
o

array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.,  20.,
        22.,  24.,  26.,  28.,  30.])

In [12]:
'''Change shape of the arrays'''
n = n.reshape(5,3)
n

array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22],
       [24, 26, 28]])

In [13]:
'''Main difference is resize and reshape is that resize changes the original numpy array'''
o.resize(4,4)
o

array([[  0.,   2.,   4.,   6.],
       [  8.,  10.,  12.,  14.],
       [ 16.,  18.,  20.,  22.],
       [ 24.,  26.,  28.,  30.]])

### 1.2 Special Arrays

In [14]:
'''Array of ones'''
np.ones((3,2))

array([[ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])

In [15]:
'''Array of zeros'''
np.zeros((2,3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [16]:
'''Array of ones on the diagonal'''
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [17]:
'''y was a numpy array (6,7,8,9,10): numpy diag makes it a diagonal matrix'''
np.diag(y)

array([[ 6,  0,  0,  0,  0],
       [ 0,  7,  0,  0,  0],
       [ 0,  0,  8,  0,  0],
       [ 0,  0,  0,  9,  0],
       [ 0,  0,  0,  0, 10]])

In [18]:
p = np.ones([2,3], int)
p

array([[1, 1, 1],
       [1, 1, 1]])

In [19]:
'''Vertical stack your arrays (column dimensions must match)'''
np.vstack([p,2*p])

array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

In [20]:
'''Horizontal stack your arrrays (row dimensions must match)'''
np.hstack([p,2*p])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

### 1.3 Operations and Vectorization

In [21]:
'''
In case you forgot: 

'''
x, y

(array([1, 2, 3, 4, 5]), array([ 6,  7,  8,  9, 10]))

In [22]:
'''
x + y = [1 + 6, 2 + 7, 3 + 8, 4 + 9, 5 + 10]
'''
x + y

array([ 7,  9, 11, 13, 15])

In [23]:
[i + j for i, j in zip(x, y)]

[7, 9, 11, 13, 15]

In [24]:
'''
x * y = [1 * 6, 2 * 7, 3 * 8, 4 * 9, 5 * 10]
'''
x * y

array([ 6, 14, 24, 36, 50])

In [25]:
'''
x ** 2 = [1 ** 2, 2 ** 2, 3 ** 2, 4 ** 2, 5 ** 2]
'''
x ** 2

array([ 1,  4,  9, 16, 25])

In [26]:
'''
x.dot(y) = (1*6) + (2*7) + (3*8) + (4*9) + (5*10)
'''
x.dot(y)

130

In [27]:
dice = np.array([1, 2, 3, 4, 5, 6])
dice

array([1, 2, 3, 4, 5, 6])

In [28]:
dice.sum()

21

In [29]:
dice.max()

6

In [30]:
dice.min()

1

In [31]:
dice.mean()

3.5

In [32]:
dice.std()

1.707825127659933

### 1.4 Indexing / Slicing

In [33]:
a = np.arange(13)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [34]:
'''Python is 0 based indexing'''
a[0]

0

In [36]:
'''Gets the 3rd to 4th element (if you never had python before, think about it)'''
a[2:4]

array([2, 3])

In [35]:
'''Gets all the elements starting from the negative 6th element skipping every other element'''
a[-6::2]

array([ 7,  9, 11])

In [37]:
'''I prefer reshape because of this method chaining potential'''
b = np.arange(36).reshape(6,6)
b

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [38]:
'''Taking part of the array using boolean operations (will always return 1-D arrays)'''
b[b >= 30]

array([30, 31, 32, 33, 34, 35])

### 1.5 Broadcasting

In [48]:
x = np.array([x,x])
x,y

(array([[1, 2, 3, 4, 5],
        [1, 2, 3, 4, 5]]), array([ 6,  7,  8,  9, 10]))

In [49]:
x + y

array([[ 7,  9, 11, 13, 15],
       [ 7,  9, 11, 13, 15]])

In [50]:
print 'Current x: \n', x
print 'x + 1: \n', x + 1

Current x: 
[[1 2 3 4 5]
 [1 2 3 4 5]]
x + 1: 
[[2 3 4 5 6]
 [2 3 4 5 6]]


In [57]:
x = x.T

In [58]:
x

array([[1, 1],
       [2, 2],
       [3, 3],
       [4, 4],
       [5, 5]])

Without this broadcasting, we would have to reshape x to the correct size and use repeat to do the same oepration.

### 1.6 Iterating Over Arrays (Should You Actually Need It)

In [59]:
c = np.random.randint(0, 10, (4,3))
c

array([[0, 6, 5],
       [4, 7, 5],
       [6, 1, 4],
       [6, 5, 5]])

In [60]:
for row in c:
    print row

[0 6 5]
[4 7 5]
[6 1 4]
[6 5 5]


In [61]:
for i in range(len(c)):
    print c[i]

[0 6 5]
[4 7 5]
[6 1 4]
[6 5 5]


In [46]:
'''Enumerate gives indicies and values'''
for i, row in enumerate(c):
    print 'row', i, 'is', row

row 0 is [5 9 0]
row 1 is [5 3 1]
row 2 is [9 2 6]
row 3 is [7 3 6]


In [62]:
c2 = c**2
c2

array([[ 0, 36, 25],
       [16, 49, 25],
       [36,  1, 16],
       [36, 25, 25]])

In [48]:
'''zip combines two lists (or arrays) and lets you use them in inputs.'''
for i, j in zip(c,c2):
    print i, "+", j, "=", i + j

[5 9 0] + [25 81  0] = [30 90  0]
[5 3 1] + [25  9  1] = [30 12  2]
[9 2 6] + [81  4 36] = [90  6 42]
[7 3 6] + [49  9 36] = [56 12 42]


# Summary

Important take aways:
- Numpy arrays care about which data type it holds.
- Vectorization makes numpy fast, don't iterate when you don't have to.
- Broadcasting is useful and allows operations between arrays that are different sizes.