# Numpy Introduction

### This notebook covers many of the basic functions used by numpy to work with arrays including single and multi dimensional arrays, shaping, slicing, iterating, and the use of some functions for aggregating

In [1]:
import numpy as np

### Creating Arrays

In [2]:
mylist = [2, 3, 4]
array1 = np.array(mylist)
array1

array([2, 3, 4])

In [4]:
array2 = np.array([4, 5, 6])
array2

array([4, 5, 6])

In [6]:
#feed np.array() a list of lists to create a multi-dimensional array
array3 = np.array([[1, 2,3], [4,5,6]])
array3

array([[1, 2, 3],
       [4, 5, 6]])

In [65]:
#the .shape function tells the dimensions of the array as rows x columns
array3.shape

(2, 3)

In [9]:
#use arange to create an array of evenly spaced, sequential numbers
x = np.arange(0, 100, 10)
x

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [10]:
#use .reshape() to change the dimensions of the existing array
y = x.reshape(5, 2)
y

array([[ 0, 10],
       [20, 30],
       [40, 50],
       [60, 70],
       [80, 90]])

In [15]:
#change the shape further 
y = y.reshape(10, 1)
y

array([[ 0],
       [10],
       [20],
       [30],
       [40],
       [50],
       [60],
       [70],
       [80],
       [90]])

In [16]:
#linspace will create an array by taking the range we want and how many numbers we want in that range
#this divides the range 0 - 15 by 20
o = np.linspace(0, 15, 20)
o

array([  0.        ,   0.78947368,   1.57894737,   2.36842105,
         3.15789474,   3.94736842,   4.73684211,   5.52631579,
         6.31578947,   7.10526316,   7.89473684,   8.68421053,
         9.47368421,  10.26315789,  11.05263158,  11.84210526,
        12.63157895,  13.42105263,  14.21052632,  15.        ])

In [19]:
#change the shape in place
o.resize(4, 5)
o

array([[  0.        ,   0.78947368,   1.57894737,   2.36842105,
          3.15789474],
       [  3.94736842,   4.73684211,   5.52631579,   6.31578947,
          7.10526316],
       [  7.89473684,   8.68421053,   9.47368421,  10.26315789,
         11.05263158],
       [ 11.84210526,  12.63157895,  13.42105263,  14.21052632,  15.        ]])

In [27]:
#ones, zeros, and eye returns arrays of only ones, only zeros, and an identity matrix respectively given dimensions
#notice that input for ones and zeros is a tuple, input for eye is an integer
one = np.ones((3, 3))
one

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [28]:
zero = np.zeros((4,3))
zero

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [29]:
i = np.eye(5)
i

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [35]:
#np.diag() takes a list and constructs an array with a list on the diagonal
diag = np.diag(mylist)
diag

array([[2, 0, 0],
       [0, 3, 0],
       [0, 0, 4]])

In [37]:
#create repeated lists by repeating a list or use np.repeat() to repeat each item in a list multiple times
r = np.array([1,2,3] * 3)
r

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [38]:
r = np.repeat([2, 3, 4], 3)
r

array([2, 2, 2, 3, 3, 3, 4, 4, 4])

In [48]:
#stack arrays vertically or horizontally using vstack and hstack
#note that input is a list of numpy arrays
p = np.ones((2,3))
q = p * 2
p, q

(array([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]]), array([[ 2.,  2.,  2.],
        [ 2.,  2.,  2.]]))

In [42]:
#stack p on top of (p multipled by 2)
vert = np.vstack([p, 2*p])
vert

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 2.,  2.,  2.],
       [ 2.,  2.,  2.]])

In [49]:
#stack p next to q
horiz = np.hstack([p, q])
horiz

array([[ 1.,  1.,  1.,  2.,  2.,  2.],
       [ 1.,  1.,  1.,  2.,  2.,  2.]])

### Element-wise operations

In [50]:
p + q

array([[ 3.,  3.,  3.],
       [ 3.,  3.,  3.]])

In [51]:
p * q

array([[ 2.,  2.,  2.],
       [ 2.,  2.,  2.]])

In [54]:
q**3

array([[ 8.,  8.,  8.],
       [ 8.,  8.,  8.]])

In [71]:
#use the dot() function to compute the dot product of p*q
#reshape q for dot product
q.reshape(2,3)
p.resize(3,2)
#view q and its shape
q, q.shape

(array([[ 2.,  2.,  2.],
        [ 2.,  2.,  2.]]), (2, 3))

In [74]:
#view p and its shape
p, p.shape

(array([[ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  1.]]), (3, 2))

In [76]:
#note that this is taking shapes (3x2) x (2x3) to produce shape (3x3)
p.dot(q), p.dot(q).shape

(array([[ 4.,  4.,  4.],
        [ 4.,  4.,  4.],
        [ 4.,  4.,  4.]]), (3, 3))

In [87]:
#create array of y and y squared
y = np.array(mylist)
q = np.array([y, y**2])
q

array([[ 2,  3,  4],
       [ 4,  9, 16]])

In [88]:
q.shape

(2, 3)

In [89]:
#use .T to tranpose the array
q.T

array([[ 2,  4],
       [ 3,  9],
       [ 4, 16]])

In [90]:
q.T.shape

(3, 2)

In [92]:
#use dtype to get the type of array
q.dtype

dtype('int64')

In [95]:
#cast an array as a certain type by using .astype
q = q.astype('f')

In [96]:
q.dtype

dtype('float32')

### Aggregate functions of arrays

In [97]:
a = np.array([-4, -2, 1, 3, 5])

In [98]:
a.sum()

3

In [99]:
a.max()

5

In [100]:
a.min()

-4

In [104]:
a.mean()

0.59999999999999998

In [105]:
a.std()

3.2619012860600183

In [106]:
#find the min/max of the indices of the array
a.argmax()

4

In [107]:
a.argmin()

0

### Indexing and slicing arrays

In [108]:
#create array of integers 0-12 squared
s = np.arange(13)**2
s

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144])

In [109]:
#index parameters are start, end, and step size
s[4], s[0], s[1:12:2]

(16, 0, array([  1,   9,  25,  49,  81, 121]))

In [110]:
s[-4:]

array([ 81, 100, 121, 144])

In [112]:
#start 5th from the end and go to beginning skipping every other item
s[-5::-2]

array([64, 36, 16,  4,  0])

#### In multi-dimensional arrays

In [113]:
r = np.arange(36)
r.resize(6,6)
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [115]:
#note zero-indexed rows and columns
r[3,1]

19

In [121]:
#notice end number 6 is not inclusive. i.e. 3:5 would only give elements 3 and 4
r[3, 3:6]

array([21, 22, 23])

In [123]:
#get first two rows and all of the columns except the last
r[:2, :-1]

array([[ 0,  1,  2,  3,  4],
       [ 6,  7,  8,  9, 10]])

In [124]:
#get every second element from the last row
r[-1, ::2]

array([30, 32, 34])

In [125]:
#conditional indexing
r[r > 30]

array([31, 32, 33, 34, 35])

In [126]:
#assign all elements greater than 30 to be equal to 30
r[r > 30] = 30
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [128]:
r2 = r[:3, :3]
r2

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14]])

In [131]:
r2[:] = 0
r2

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [133]:
#notice that when changing a slice of the original array, that change persists in the original
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [135]:
#to make a copy, use the .copy() function
r_copy = r.copy()
r_copy

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [136]:
r_copy[:3, :3] = 10
print(r)
print()
print(r_copy)

[[ 0  0  0  3  4  5]
 [ 0  0  0  9 10 11]
 [ 0  0  0 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]
 [30 30 30 30 30 30]]

[[10 10 10  3  4  5]
 [10 10 10  9 10 11]
 [10 10 10 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]
 [30 30 30 30 30 30]]


### Iterating over arrays

In [137]:
new = np.random.randint(0,10,(4,3))
new

array([[0, 6, 5],
       [6, 9, 9],
       [4, 2, 5],
       [0, 9, 9]])

In [139]:
#iterate over new array
for item in new:
    print(item)

[0 6 5]
[6 9 9]
[4 2 5]
[0 9 9]


In [140]:
for i in range(len(new)):
    print(new[i])

[0 6 5]
[6 9 9]
[4 2 5]
[0 9 9]


In [144]:
#enumerate returns the index of the row and the row itself
for i, row in enumerate(new):
    print('row', i, 'is', row)

row 0 is [0 6 5]
row 1 is [6 9 9]
row 2 is [4 2 5]
row 3 is [0 9 9]


In [145]:
new2 = new**2
new2

array([[ 0, 36, 25],
       [36, 81, 81],
       [16,  4, 25],
       [ 0, 81, 81]])

In [148]:
#we can iterate over both arrays using zip
for i, j in zip(new, new2):
    print(i, ' + ', j, ' = ', i+j)

[0 6 5]  +  [ 0 36 25]  =  [ 0 42 30]
[6 9 9]  +  [36 81 81]  =  [42 90 90]
[4 2 5]  +  [16  4 25]  =  [20  6 30]
[0 9 9]  +  [ 0 81 81]  =  [ 0 90 90]
