# MA 506 Probability and Statistical Inference

# 1. Basics

## 1.1 Adding collections without using numpy

Data available in list format cannot be directly added pointwise. The + operator concatenates the two lists

In [1]:
a = [1,2,3,4]
b = [5,6,7,8]
a+b

[1, 2, 3, 4, 5, 6, 7, 8]

In order we achieve this we need to use the zip() function

In [2]:
result = []
for i,j in zip(a,b):
    result.append(i+j)
result

[6, 8, 10, 12]

Numpy makes it very easy

## 1.2 Declaring an array

In [3]:
import numpy as np

In [4]:
a = np.array([1,2,3])
a

array([1, 2, 3])

In [5]:
type(a)

numpy.ndarray

In [6]:
a.dtype

dtype('int64')

In [7]:
f = np.array([1.2,1,2])
f.dtype

dtype('float64')

In [8]:
f[1]

1.0

In [9]:
f.ndim

1

In [10]:
f.shape

(3,)

In [11]:
f.size

3

## 1.3 Re-shape of an array

In [12]:
a = np.array([1,2,3])
b = np.array([1,2,3])
print(a)
print(a.shape)
print(b)
print(b.shape)

[1 2 3]
(3,)
[1 2 3]
(3,)


In [13]:
a.dot(b)

14

In [14]:
a1 = a.reshape(-1,1)
a1

array([[1],
       [2],
       [3]])

In [16]:
a1.dot(b)

ValueError: shapes (3,1) and (3,) not aligned: 1 (dim 1) != 3 (dim 0)

In [17]:
b1 = b.reshape(-1,1)
a1.dot(b1)

ValueError: shapes (3,1) and (3,1) not aligned: 1 (dim 1) != 3 (dim 0)

In [18]:
b1 = b.reshape(1,-1)
a1.dot(b1)

array([[1, 2, 3],
       [2, 4, 6],
       [3, 6, 9]])

## 1.4 Back to elementwise addition

For the following a and f array, elementwise adding is straightforward

In [19]:
a = np.array([1,2,3,4])
b = np.array([5,6,7,8])
a+b

array([ 6,  8, 10, 12])

In [20]:
## for concatenating a and b like for the list case
np.concatenate((a,b),axis = 0)

array([1, 2, 3, 4, 5, 6, 7, 8])

## 1.5 Numpy is way faster

In [21]:
a = [i for i in range(1000000)]
f = [i for i in range(1000000)]

In [22]:
%%time
summ = 0
for i,j in zip(a,f):
    summ = summ + i*j
summ

CPU times: user 176 ms, sys: 3.89 ms, total: 180 ms
Wall time: 182 ms


333332833333500000

In [23]:
a = np.array([i for i in range(1000000)])
f = np.array([i for i in range(1000000)])

In [24]:
%%time
summ = a.dot(f)
summ

CPU times: user 2.03 ms, sys: 15 µs, total: 2.04 ms
Wall time: 2.09 ms


333332833333500000

## 1.6 Other elementwise operations 

In [25]:
a = np.array([1,2,3])
f = np.array([3,2,3])

In [26]:
# multiplication
a*f

array([3, 4, 9])

In [27]:
## division
a/f  

array([0.33333333, 1.        , 1.        ])

In [28]:
## raising to power
a**f

array([ 1,  4, 27])

In [29]:
## multiplying a scalar
a*10 

array([10, 20, 30])

## 1.7 Universal function

Operate on a numpy array in an element by element fashion

In [30]:
a = np.array([1,2,3])
np.sin(a)  ## vectorized operation: faster than a loop

array([0.84147098, 0.90929743, 0.14112001])

In [31]:
np.log(a)

array([0.        , 0.69314718, 1.09861229])

## 1.8 arange vs range 

range is a standard python function. Arange is a similar function provided by numpy

#### range

In [32]:
range(10)

range(0, 10)

In [33]:
[i for i in range(10)]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [34]:
[i for i in range(2,10)]

[2, 3, 4, 5, 6, 7, 8, 9]

In [35]:
[i for i in range(2,10,2)]

[2, 4, 6, 8]

In [36]:
[i for i in range(0,1,0.2)]

TypeError: 'float' object cannot be interpreted as an integer

#### arange

In [37]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [38]:
np.arange(2,10)

array([2, 3, 4, 5, 6, 7, 8, 9])

In [39]:
np.arange(2,10,2)

array([2, 4, 6, 8])

In [40]:
np.arange(0,1,0.2)

array([0. , 0.2, 0.4, 0.6, 0.8])

#### time comparison

In [41]:
%%time
np.sin(np.arange(1000000))

CPU times: user 51.4 ms, sys: 2.92 ms, total: 54.3 ms
Wall time: 52.5 ms


array([ 0.        ,  0.84147098,  0.90929743, ...,  0.21429647,
       -0.70613761, -0.97735203])

In [42]:
%%time
np.sin(range(1000000))

CPU times: user 108 ms, sys: 13 ms, total: 121 ms
Wall time: 121 ms


array([ 0.        ,  0.84147098,  0.90929743, ...,  0.21429647,
       -0.70613761, -0.97735203])

# 2. Moving to 2 dimensions

In [43]:
a = np.array([[1,2,4,4],[4,3,4,5],[5,3,2,5],[53,4,2,5]])
a

array([[ 1,  2,  4,  4],
       [ 4,  3,  4,  5],
       [ 5,  3,  2,  5],
       [53,  4,  2,  5]])

In [44]:
print(a[1,1],a[2,3])

3 5


In [45]:
a.shape

(4, 4)

In [46]:
a.ndim

2

In [47]:
a.size

16

## 2.1 Slicing

Slicing refers to accessing specfic sections of the array through proper indices

In [48]:
a = np.array([[1, 2, 3, 4, 5],
       [6, 7, 8, 9, 10]])
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [49]:
a.shape

(2, 5)

In [50]:
a[0,1:3]

array([2, 3])

In [51]:
a[0]

array([1, 2, 3, 4, 5])

In [52]:
a[1].shape

(5,)

In [53]:
b = a[1]
b

array([ 6,  7,  8,  9, 10])

In [54]:
## 1 and 3 both indices are with respect to start of the array
b[1:3]

array([7, 8])

In [55]:
## b[lower:upper:step]  -- step is optional
b[0:-1:2]

array([6, 8])

In [114]:
# start at index 0 and keep on going with an increment of 2
b[0::2]  

array([ 6,  8, 10])

In [115]:
## start to end with step of 2
b[::2]

array([ 6,  8, 10])

In [56]:
b[1:]

array([ 7,  8,  9, 10])

In [57]:
 ## 3rd last element to end
b[-3:]

array([ 8,  9, 10])

In [58]:
b[1:-2]

array([7, 8])

## 2.2 Striding while slicing

In [59]:
c = np.array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4],
       [ 6,  4,  5,  7,  8]])
c

array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4],
       [ 6,  4,  5,  7,  8]])

In [60]:
c[0:-1:2,:]

array([[12,  3,  4,  5,  5],
       [ 6,  4,  5,  6,  3]])

In [61]:
c[:,0:-2:2]

array([[12,  4],
       [ 4,  4],
       [ 6,  5],
       [ 6,  6],
       [ 6,  5]])

In [62]:
c[0::2,:]

array([[12,  3,  4,  5,  5],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  5,  7,  8]])

In [63]:
c[0:-1:2,0:-2:2]

array([[12,  4],
       [ 6,  5]])

## 2.3 Reshaping

In [64]:
c.shape

(5, 5)

In [65]:
d = c.reshape(1,25)
d

array([[12,  3,  4,  5,  5,  4,  3,  4,  5,  6,  6,  4,  5,  6,  3,  6,
         4,  6,  7,  4,  6,  4,  5,  7,  8]])

In [66]:
d = c.reshape(10,2)

ValueError: cannot reshape array of size 25 into shape (10,2)

## 2.4 Modifying a numpy array

In [67]:
a = np.array([1,2,3])
print(a)
a[0]= -1
print(a)

[1 2 3]
[-1  2  3]


In [68]:
a[0] = 11.2
print(a)

[11  2  3]


be careful while modifying arrays

In [69]:
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [70]:
b = a[0,:]
b

array([1, 2, 3])

In [71]:
b[0] = -10
b

array([-10,   2,   3])

In [72]:
a

array([[-10,   2,   3],
       [  4,   5,   6],
       [  7,   8,   9]])

Multiple view of the same arrays. To make a copy do the following

In [73]:
a

array([[-10,   2,   3],
       [  4,   5,   6],
       [  7,   8,   9]])

In [74]:
b = a[0,:].copy()
b

array([-10,   2,   3])

In [75]:
b[0] = -20
b

array([-20,   2,   3])

In [76]:
a

array([[-10,   2,   3],
       [  4,   5,   6],
       [  7,   8,   9]])

Editing multiple values together

In [77]:
a

array([[-10,   2,   3],
       [  4,   5,   6],
       [  7,   8,   9]])

In [78]:
a[0::2,0::2] = 1000

In [79]:
a

array([[1000,    2, 1000],
       [   4,    5,    6],
       [1000,    8, 1000]])

## 2.5 Fancy indexing

### 2.5.1 Indexing by position

In [80]:
a = np.array([1,43,55,75,34545,453])
ind = [3,4]
a[ind]

array([   75, 34545])

In [81]:
c = np.array([[12,3,4,5,5],[4,3,4,5,6],[6,4,5,6,3],[6,4,6,7,4],[6,4,5,7,8]])
c

array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4],
       [ 6,  4,  5,  7,  8]])

In [82]:
ind1 = [1,3,4]
ind2 = [1,3,4]
c[ind1,ind2]

array([3, 7, 8])

### 2.5.2 Indexing by masking

In [3]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [4]:
import numpy as np
mask = np.array([1,0,0,1,0],dtype=bool)
a[mask]

array([0, 3])

In [85]:
mask = a>1
mask

array([False, False,  True,  True,  True])

In [86]:
a[mask]

array([2, 3, 4])

We can also assign values

In [87]:
a = np.array([10,20,-10,3,4,5,20])
a

array([ 10,  20, -10,   3,   4,   5,  20])

In [88]:
a[a<0] =0
a

array([10, 20,  0,  3,  4,  5, 20])

### 2.5.3 Multiple dimension masking

In [89]:
c

array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4],
       [ 6,  4,  5,  7,  8]])

In [90]:
c[c[:,0]>5,:]

array([[12,  3,  4,  5,  5],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4],
       [ 6,  4,  5,  7,  8]])

In [91]:
c[(c[:,0]>5) & (c[:,0]<10),:]

array([[6, 4, 5, 6, 3],
       [6, 4, 6, 7, 4],
       [6, 4, 5, 7, 8]])

In [92]:
c>10

array([[ True, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False]])

In [93]:
a = c>10
c[a]

array([12])

In [94]:
c

array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4],
       [ 6,  4,  5,  7,  8]])

In [95]:
c%3==0

array([[ True,  True, False, False, False],
       [False,  True, False, False,  True],
       [ True, False, False,  True,  True],
       [ True, False,  True, False, False],
       [ True, False, False, False, False]])

In [96]:
c[c%3==0]

array([12,  3,  3,  6,  6,  6,  3,  6,  6,  6])

# 3 Higher dimensional arrays

In [97]:
c = np.array([[[1,2,3],[4,5,6],[7,8,9],[13,14,15]],[[1,2,3],[4,5,6],[10,11,12],[16,176,18]]])
c

array([[[  1,   2,   3],
        [  4,   5,   6],
        [  7,   8,   9],
        [ 13,  14,  15]],

       [[  1,   2,   3],
        [  4,   5,   6],
        [ 10,  11,  12],
        [ 16, 176,  18]]])

In [98]:
c.shape

(2, 4, 3)

In [99]:
c[0]

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [13, 14, 15]])

In [100]:
c[0,1]

array([4, 5, 6])

# 4 Calculation with arrays

## 4.1 Broadcasting

In [101]:
a = np.array([0,10,20,30])
a.shape

(4,)

In [102]:
b = np.array([0,1,2])
b.shape

(3,)

In [103]:
a1 = a.reshape(4,1)
a1.shape

(4, 1)

In [104]:
# a1: (4,1) and b is (3,)
a1+b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [105]:
b1 = b.reshape(1,3)
b1.shape

(1, 3)

In [106]:
# a1: (4,1) and b1 (1,3)
a1+b1

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [107]:
a1.dot(b1)

array([[ 0,  0,  0],
       [ 0, 10, 20],
       [ 0, 20, 40],
       [ 0, 30, 60]])

In [108]:
## scalar broadcasting over an array
print(a)
print(a+2)

[ 0 10 20 30]
[ 2 12 22 32]


## 4.2 Reduction operations

In [109]:
c = np.array([[12,3,4,5,5],[4,3,4,5,6],[6,4,5,6,3],[6,4,6,7,4]])
c

array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4]])

In [110]:
## adds all the values in the array
c.sum()  

102

In [111]:
## compress rows into 1 vector
c.sum(axis=0) 

array([28, 14, 19, 23, 18])

In [112]:
## compress columns into 1 vector
c.sum(axis=1)  

array([29, 22, 24, 27])

In [113]:
c.sum(axis=-1)

array([29, 22, 24, 27])

Others

In [114]:
c.min()

3

In [115]:
c.max()

12

In [116]:
c.mean()

5.1

In [117]:
c.ptp()

9

And many others....

#### both of the following are okay

In [118]:
## method format
a.sum()  

60

In [119]:
## function format
np.sum(a)  

60

## 4.3 Other important functions

### 4.3.1 argmax and argmin

In [120]:
c

array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4]])

In [121]:
c[[2,3],[3,4]]

array([6, 4])

In [122]:
c.argmin(axis = 0)

array([1, 0, 0, 0, 2])

In [123]:
c.argmin(axis = 1)

array([1, 1, 4, 1])

In [262]:
c[c.argmin(axis = 0),np.arange(5)]

array([4, 3, 4, 5, 3])

In [146]:
c.argmin(axis = 0),np.arange(5)

(array([1, 0, 0, 0, 2]), array([0, 1, 2, 3, 4]))

In [263]:
c.argmin()

1

In [264]:
c[0,1] = 10
c.argmin()

6

In [265]:
c

array([[12, 10,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4]])

To better understand this

In [266]:
np.unravel_index(c.argmin(),c.shape)  ## to unflatten the index in rectangular format
## c.argmin(): index to be resolved
## c.shape: the shape to be resolved into

(1, 1)

#### Still a problem as there are multiple minima here

In [281]:
c

array([[12,  3,  4,  5,  5],
       [ 4,  3,  4,  5,  6],
       [ 6,  4,  5,  6,  3],
       [ 6,  4,  6,  7,  4]])

In [282]:
np.where(c == c.min())

(array([0, 1, 2]), array([1, 1, 4]))

In [283]:
c[np.where(c == c.min())] ## returns a tuple with as many elements as there are elements in .shape

array([3, 3, 3])

Can also use where for many other things

In [284]:
c[np.where(c>=10)]  ## subsetting based on a rule

array([12])

### 4.3.2 Some other functions

In [285]:
np.linspace(0,1,3)

array([0. , 0.5, 1. ])

In [286]:
import numpy as np
f1 = np.array([5,3,2,5])
np.argsort(f1)

array([2, 1, 0, 3])

In [289]:
f2 = np.array([[5,3,2,5],[1,2,3,4]])
f2

array([[5, 3, 2, 5],
       [1, 2, 3, 4]])

In [290]:
np.argsort(f2)

array([[2, 1, 0, 3],
       [0, 1, 2, 3]])

In [291]:
np.argsort(f2,axis=1)

array([[2, 1, 0, 3],
       [0, 1, 2, 3]])

In [292]:
np.argsort(f2,axis=0)

array([[1, 1, 0, 1],
       [0, 0, 1, 0]])