# Numpy tutorial

In [2]:
import numpy as np

---
## Creating a numpy array
---

### Numpy array from python list

In [3]:
arr=np.array([1,2,3,4,5])
arr

array([1, 2, 3, 4, 5])

### Use `dtype` to explicitly declare type

In [5]:
arr=np.array([1,2,3,4,5],dtype='float32')
arr

array([1., 2., 3., 4., 5.], dtype=float32)

### Declaring multidimensional arrays

In [8]:
arr=np.array([range(i,i+3) for i in [1,4,7]])
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Array of zeros

In [10]:
arr=np.zeros((5,5))
arr

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

### Array of ones

In [12]:
arr=np.ones((5,5))
arr

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

### Array filled with a given number

In [15]:
arr=np.full((5,5),1.5)
arr

array([[1.5, 1.5, 1.5, 1.5, 1.5],
       [1.5, 1.5, 1.5, 1.5, 1.5],
       [1.5, 1.5, 1.5, 1.5, 1.5],
       [1.5, 1.5, 1.5, 1.5, 1.5],
       [1.5, 1.5, 1.5, 1.5, 1.5]])

### Array with linear sequence

In [16]:
arr=np.arange(1,10,1)
arr

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

### Array with n evenly spaced values between 2 given points

In [19]:
arr=np.linspace(1,50,25)
arr

array([ 1.        ,  3.04166667,  5.08333333,  7.125     ,  9.16666667,
       11.20833333, 13.25      , 15.29166667, 17.33333333, 19.375     ,
       21.41666667, 23.45833333, 25.5       , 27.54166667, 29.58333333,
       31.625     , 33.66666667, 35.70833333, 37.75      , 39.79166667,
       41.83333333, 43.875     , 45.91666667, 47.95833333, 50.        ])

---
## Creating a numpy array with random numbers
---

### Uniformly distributed random values between 0 and 1

In [22]:
arr=np.random.random((5,5))
arr

array([[0.88074075, 0.24520379, 0.24809671, 0.41235967, 0.26730426],
       [0.22469928, 0.4214415 , 0.60933476, 0.40638437, 0.11167298],
       [0.36689668, 0.32316038, 0.79650524, 0.11174247, 0.16527083],
       [0.65039171, 0.59325674, 0.03011029, 0.8451291 , 0.24162231],
       [0.23256502, 0.85112494, 0.74277078, 0.14676145, 0.32601934]])

### Normally distributed random values with `mean=0` and `variance=1`

In [25]:
arr=np.random.normal(0,1,(3,3))
arr

array([[-0.32746217, -1.05883589, -0.63589087],
       [-0.05649182, -1.20489198, -2.34321154],
       [ 1.21301302, -0.60207936, -0.44243781]])

### Identity matrix

In [26]:
arr=np.eye(3)
arr

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Initialize an empty array

In [27]:
arr=np.empty(3)
arr

array([1., 1., 1.])

---
## Array attributes
---

In [30]:
arr1=np.random.randint(0,10,(3))
arr2=np.random.randint(0,10,(4,5))
arr3=np.random.randint(0,10,(6,7,8))

print('arr3 ndim:',arr3.ndim)
print('arr3 size:',arr3.size)
print('arr3 shape:',arr3.shape)
print('arr3 dtype:',arr3.dtype)

arr3 ndim: 3
arr3 size: 336
arr3 shape: (6, 7, 8)
arr3 dtype: int64


---
## Array indexing
---

In [32]:
arr=np.random.randint(0,10,(3,4))
arr

array([[9, 7, 7, 4],
       [8, 0, 1, 1],
       [5, 9, 0, 4]])

In [34]:
print(arr[0])
print(arr[2,1])
print(arr[-1])
print(arr[-1,-1])

[9 7 7 4]
9
[5 9 0 4]
4


---
## Slicing
---

In [36]:
arr=np.random.randint(0,10,(5,5))
arr

array([[9, 4, 3, 0, 4],
       [8, 5, 1, 3, 6],
       [8, 7, 2, 1, 8],
       [4, 0, 8, 9, 6],
       [0, 2, 7, 8, 0]])

In [38]:
print(arr[0:4,1:3])

[[4 3]
 [5 1]
 [7 2]
 [0 8]]


Slices always return views not copies, therefore any change to the slice also reflects in the original array. 

In [40]:
arr=np.random.randint(0,10,(5,5))
view1=arr[2:4,1:4]
view1

array([[0, 1, 0],
       [9, 5, 5]])

In [41]:
view1[1,1]=100

In [48]:
print('view1 = ',view1)
print()
print('view2 = ',arr)

view1 =  [[  0   1   0]
 [  9 100   5]]

view2 =  [[  5   1   7   7   0]
 [  1   9   3   2   8]
 [  4   0   1   0   3]
 [  4   9 100   5   1]
 [  0   1   2   2   9]]


### Creating copy of slices

In [50]:
arr=np.random.randint(0,10,(5,5))
view1=arr[2:4,1:4].copy()
view1

array([[4, 3, 1],
       [7, 1, 0]])

In [51]:
view1[1,1]=100

In [52]:
print('view1 = ',view1)
print()
print('view2 = ',arr)

view1 =  [[  4   3   1]
 [  7 100   0]]

view2 =  [[8 2 6 9 1]
 [4 2 5 9 3]
 [6 4 3 1 1]
 [4 7 1 0 9]
 [2 5 9 3 7]]


---
## Reshaping
---

Number of elements before and after reshaping must be same.

In [55]:
arr=np.arange(1,10).reshape((3,3))
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [57]:
arr=np.array([1,2,3,4,5])
arr=arr[np.newaxis,:]
arr.shape

(1, 5)

---
## Concatenation and splitting
---

In [60]:
arr1=np.array([1,2,3])
arr2=np.array([4,5,6,7])
arr=np.concatenate((arr1,arr2))
arr

array([1, 2, 3, 4, 5, 6, 7])

### Concatenating along different axis

In [63]:
arr1=np.array([[1,2,3]])
arr2=np.array([[4,5,6]])
arr=np.concatenate((arr1,arr2),axis=0)
arr

array([[1, 2, 3],
       [4, 5, 6]])

### `hstack` and `vstack`

In [64]:
arr1=np.array([1,2,3])
arr2=np.array([4,5,6])
arr3=np.hstack((arr1,arr2))
arr3

array([1, 2, 3, 4, 5, 6])

In [65]:
arr4=np.vstack((arr1,arr2))
arr4

array([[1, 2, 3],
       [4, 5, 6]])

---
## Aggregates
---

In [67]:
arr=np.arange(1,6)
np.add.reduce(arr)

15

Accumulate is used to show intermediate results and reduce is used to reduce the array

In [68]:
np.add.accumulate(arr)

array([ 1,  3,  6, 10, 15])

In [69]:
np.multiply.reduce(arr)

120

In [70]:
np.multiply.accumulate(arr)

array([  1,   2,   6,  24, 120])

---
## Broadcasting
---

Broadcasting allows binary operations to be performed on arrays of different sizes. Arrays are streched or broadcasted to match the shape of other.
![Broadcasting](https://www.safaribooksonline.com/library/view/python-data-science/9781491912126/assets/pyds_02in06.png)

In [71]:
np.arange(5)+5

array([5, 6, 7, 8, 9])

In [72]:
np.ones((3,3))+np.arange(3)

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [73]:
np.ones((3,1))+np.arange(3)

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

---
## Boolean arrays and masks
---

In [74]:
arr=np.random.randint(0,10,(5,5))
arr

array([[3, 7, 2, 0, 7],
       [9, 2, 9, 0, 6],
       [8, 3, 5, 5, 4],
       [5, 0, 5, 6, 6],
       [1, 4, 3, 5, 0]])

In [75]:
arr>5

array([[False,  True, False, False,  True],
       [ True, False,  True, False,  True],
       [ True, False, False, False, False],
       [False, False, False,  True,  True],
       [False, False, False, False, False]])

In [76]:
np.count_nonzero(arr>5)

8

In [77]:
np.sum(arr>5)

8

---
## Fancy indexing
---

Fancy indexing can be used to select subset of a matrix. It can be used to select random points to plot from a large dataset.

In [78]:
mean=[0,0]
cov=[[1,2],
    [2,5]]
rand=np.random.RandomState(42)

In [80]:
X=rand.multivariate_normal(mean,cov,100)
X.shape

(100, 2)