# NumPy

## ALWAYS do: import numpy as np
This is a convention that everyone follows. If you do not do this, other people will have a hard time reading your code

In [1]:
import numpy as np

## Numpy arrays
- arrays can only contain data of the same data type (similar to atomic vectors or matrices in R)

## Array creation
- direct creation with `np.array()`
- Create a list with square brackets, and put that inside `np.array()`

In [2]:
np.array([1,2,3])

array([1, 2, 3])

In [3]:
a = np.array([1, 2, 3])
print(a) # printing an array appears different from the array([]) in ipython

[1 2 3]


- upcast (coerce): integer - float - string

In [3]:
b = np.array([1, 2, 3.0])
print(b) # the 3.0 is a float and will upcast (coerce) other values to floats

[1. 2. 3.]


In [4]:
c = np.array([1, 2, "3"]) # upcast (coerced) to strings
print(c)

['1' '2' '3']


In [5]:
t = np.array([1, 2, "3",3.0]) # upcast (coerced) to strings
print(t)

['1' '2' '3' '3.0']


If you provide a list of lists, you can create a multi-dimensional array. (Like a matrix)

In [5]:
d = np.array([ [1,2,3] , [4,5,6] ])
print(d)
print(d.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


but if the dimensions don't match, you'll get an array of lists... which is not as useful.

In [6]:
e = np.array([ [1,2,3],[4,5] ])
print(e)

[list([1, 2, 3]) list([4, 5])]


## Other ways to make arrays

In [13]:
np.zeros(5) # makes an array of 0s. similar to rep(0, 5)

array([0., 0., 0., 0., 0.])

In [8]:
# default type is float, 'dtype' can change the data type
np.zeros(5, dtype = int)

array([0, 0, 0, 0, 0])

In [9]:
np.zeros(5, dtype = str)

array(['', '', '', '', ''], dtype='<U1')

In [15]:
np.zeros([2,4])  # makes an array 2x4

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [19]:
np.zeros([2,3,4]) # 3 dimensional array 2 x 3 x 4... 
# notice the order of creation: 2 'sheets' or 3 rows by 4 columns

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [10]:
# block - sheet - row - column
np.zeros([2,3,4,5]) 

array([[[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]],


       [[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]]])

In [14]:
np.ones(5)  # similar, but inserts ones

array([1., 1., 1., 1., 1.])

In [6]:
# np.full(format,filling)
np.full([2,3], 7)  # similar, but you specify one value that gets repeated

array([[7, 7, 7],
       [7, 7, 7]])

In [9]:
np.full(5,0)

array([0, 0, 0, 0, 0])

## Making arrays of random numbers
numpy uses the Mersenne Twister
- All random generator functions begin with `np.random.`

In [51]:
np.random.seed(1)  # seed the generator for reproducibility

In [52]:
np.random.random(5)  # random.random for generate random numbers on the interval [0,1)

array([4.17022005e-01, 7.20324493e-01, 1.14374817e-04, 3.02332573e-01,
       1.46755891e-01])

In [11]:
np.random.randn(5)
# random.randn for random normal from standard normal

array([-2.32221096,  0.52972594,  1.21834369, -1.71497079, -0.82532084])

In [54]:
np.random.normal(10, 3, [2, 4])
# random.normal for random normal from normal with mean 10 and sd 3
# arranged in a 2 x 4 matrix

array([[11.91208544,  7.42028018, 15.31782289,  6.66891084],
       [10.5436428 , 11.6930346 ,  8.30046931, 12.18992679]])

In [12]:
np.random.randint(0, 10, 20)  # select random integers from 0 inclusive to 10 exclusive
# and return 20 values

array([3, 5, 6, 5, 2, 1, 3, 2, 7, 7, 5, 9, 0, 0, 5, 9, 0, 4, 8, 3])

More random generation at: <https://docs.scipy.org/doc/numpy-1.14.0/reference/routines.random.html>

## Array sequences
make sequences with

- `np.arange(start, stop, step)`  
-  makes an **a**rray **range** from start (inclusive) to stop (exclusive), by step

In [73]:
print(range(0,10, 2)) # range object in regular python

range(0, 10, 2)


In [15]:
a = np.arange(0,101, 5)
print(a)

[  0   5  10  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85
  90  95 100]


- `np.linspace(start, stop, num)`
- makes an array of **lin**ear **space**d values beginning with start(inclusive), ending with stop(inclusive), with a length of num

In [16]:
np.linspace(0, 100, 11)

array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])

In [19]:
np.linspace(0, 100, 10)

array([  0.        ,  11.11111111,  22.22222222,  33.33333333,
        44.44444444,  55.55555556,  66.66666667,  77.77777778,
        88.88888889, 100.        ])

In [27]:
np.linspace(0, 100, 10, endpoint = False)  # optional parameter endpoint to exclude the stop value

array([ 0., 10., 20., 30., 40., 50., 60., 70., 80., 90.])

# Array Attributes
- `array.ndim` for number of dimensions
- `array.shape` for the size of each dimension
- `array.dtype` for the data type 

In [29]:
x = np.ones([3,4],dtype=int)
print(x)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [30]:
x.ndim

2

In [31]:
x.shape

(3, 4)

In [32]:
x.dtype

dtype('int32')

In [33]:
y = np.arange(0,12, 1)
print(y)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [34]:
y.ndim

1

In [35]:
y.shape # a one dimensional array

(12,)

In [36]:
y.dtype

dtype('int32')

In [38]:
z = np.array(["3","5"])
print(z)
print(z.ndim)
print(z.shape)

['3' '5']
1
(2,)


In [39]:
z.dtype

dtype('<U1')

In [40]:
z = np.ones(5, dtype = str)
print(z)
z.dtype

['1' '1' '1' '1' '1']


dtype('<U1')

In [41]:
np.full(5,2,dtype=str)

array(['2', '2', '2', '2', '2'], dtype='<U1')

## Reshaping Arrays
- `np.reshape(array, [new shape])` is the reshape function
- `array.T` is the transpose method, but leaves the original array unaffected


In [42]:
j = np.arange(0,12,1)
print(j)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [43]:
k = np.reshape(j, [3,4])  # note that it fills row-wise unlike R
print(k)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [44]:
k = j.reshape([3,4])
print(k)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [58]:
l = np.reshape(k,[12])
print(l)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [100]:
print(k.T)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [105]:
print(k) # calling k.T does not modify the original k array

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [59]:
# can combine the above methods and steps into one:
l = np.arange(0,12,1).reshape([3,4]).T
# create a-range >> reshape >> transpose
print(l)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [159]:
y = np.arange(0,12, 1)
print(y)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [160]:
y.shape

(12,)

In [154]:
print(y.T) # the transpose of a one dimensional array doesn't suddenly give it a second dimension

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [161]:
y.T.shape

(12,)

In [156]:
z = np.reshape(y, [1,12])
print(z)

[[ 0  1  2  3  4  5  6  7  8  9 10 11]]


In [163]:
z.shape

(1, 12)

In [157]:
print(z.T)

[[ 0]
 [ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]]


In [164]:
z.T.shape

(12, 1)

# Subsetting and Slicing Arrays
- very similar to subsetting and slicing lists

In [117]:
y = np.arange(0,12, 1)
print(y)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [118]:
y[4]

4

In [144]:
y.shape

(12,)

In [119]:
y[4:6]

array([4, 5])

you can slice with a second colon. The array gets subset with `array[start:stop:step]`

In [120]:
y[1:8:3]

array([1, 4, 7])

Subsetting and slicing higher dimensional arrays is similar, and uses a comma to separate subsetting instructions for each dimension.

In [20]:
z = np.reshape(y, [3,4])
print(z)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [21]:
print(z[1,2]) # returns what is at row index 1, col index 2

6


In [22]:
type(z[1,2]) # with only one value, the type is the integer. It is no longer an array.

numpy.int32

In [23]:
z[0:2, 0:2]

array([[0, 1],
       [4, 5]])

In [24]:
type(z[0:2, 0:2])  # the type remains a numpy array

numpy.ndarray

In [25]:
print(z[2, :]) # returns row at index 2

[ 8  9 10 11]


In [26]:
z[2, :].shape  # the shape is one dimensional

(4,)

In [27]:
print(z[:,2]) # returns column at index 2

[ 2  6 10]


In [28]:
z[:,2].shape # shape is one dimensional

(3,)

## Concatenating Arrays

In [59]:
x = np.arange(0, 4, 1)
y = np.arange(100,104, 1)
print(x)
print(y)

[0 1 2 3]
[100 101 102 103]


In [60]:
np.concatenate([x,y])

array([  0,   1,   2,   3, 100, 101, 102, 103])

In [61]:
np.concatenate([x,y], axis = 0)

array([  0,   1,   2,   3, 100, 101, 102, 103])

# apply in linear regression

In [67]:
x1 = np.arange(0, 4, 1)
x2 = np.arange(100,104, 1)
x = np.concatenate([x1,x2]).reshape([2,4]).T
y = np.array([3,6,9,0])

from sklearn import linear_model
model = linear_model.LinearRegression(fit_intercept = True) 
model.fit(x,y)

print(x1)
print(x2)
print(x)
print(model.coef_)

[0 1 2 3]
[100 101 102 103]
[[  0 100]
 [  1 101]
 [  2 102]
 [  3 103]]
[-0.3 -0.3]


In [58]:
x.shape

(4,)

In [62]:
print(np.vstack([x,y]))

[[  0   1   2   3]
 [100 101 102 103]]


In [63]:
xm = np.reshape(x, [2,2])
ym = np.reshape(y, [2,2])
print(xm)
print(ym)

[[0 1]
 [2 3]]
[[100 101]
 [102 103]]


In [81]:
print(np.concatenate([xm,ym]))

[[  0   1]
 [  2   3]
 [100 101]
 [102 103]]


In [87]:
print(np.concatenate([xm,ym], axis = 0))

[[  0   1]
 [  2   3]
 [100 101]
 [102 103]]


In [86]:
print(np.concatenate([xm,ym], axis = 1))

[[  0   1 100 101]
 [  2   3 102 103]]


In [90]:
print(np.vstack([xm, ym]))
print(np.vstack([xm, ym]).ndim)
print(np.vstack([xm, ym]).shape)
print(np.vstack([xm, ym]).dtype)

[[  0   1]
 [  2   3]
 [100 101]
 [102 103]]
2
(4, 2)
int32


In [93]:
print(np.hstack([xm, ym]))
print(np.hstack([xm, ym]).ndim)
print(np.hstack([xm, ym]).shape)
print(np.hstack([xm, ym]).dtype)

[[  0   1 100 101]
 [  2   3 102 103]]
2
(2, 4)
int32


## Math Operators with numpy arrays

In [48]:
print(x)
print(y)
print(xm)
print(ym)

[0 1 2 3]
[100 101 102 103]
[[0 1]
 [2 3]]
[[100 101]
 [102 103]]


In [50]:
x + 5

array([5, 6, 7, 8])

In [52]:
x + y

array([100, 102, 104, 106])

In [53]:
xm + 5

array([[5, 6],
       [7, 8]])

In [54]:
xm + ym

array([[100, 102],
       [104, 106]])

In [56]:
np.dot(x,y)   # 0 * 100 + 1 * 101 + 2 * 102 + 3 * 103

614

In [59]:
print(xm)
print(ym)

[[0 1]
 [2 3]]
[[100 101]
 [102 103]]


In [60]:
np.dot(xm, ym)  # matrix multiplication between xm and ym

array([[102, 103],
       [506, 511]])

In [61]:
x * y 

array([  0, 101, 204, 309])

In [62]:
xm * ym  # it is not matrix multiplication

array([[  0, 101],
       [204, 309]])