# Data Science I Topic 1 - Numpy

In [1]:
# Import numpy with alias np
import numpy as np

## N-dimensional array -- ndarray

<u>Run the cells below and inspect the results</u>

In [2]:
# 1-D
x = np.array([1,2,3], dtype='int8')
print(x)
print('Item size:', x.itemsize)  # length in bytes of one element
print('Size:', x.size) # no. of elements in the array
print('Size in bytes:', x.nbytes)  # total bytes consumed
print('Data Type: ', x.dtype)
print('Dimension: ', x.ndim)

[1 2 3]
Item size: 1
Size: 3
Size in bytes: 3
Data Type:  int8
Dimension:  1


In [3]:
# 2-D
y = np.array([[1,2],[3,4],[5,6]])
print(y)
print('Dimension: ', y.ndim)

[[1 2]
 [3 4]
 [5 6]]
Dimension:  2


<u>Answer the following questions</u>:
    
* If you don't specify the data type, what would be the default? 
* How many more times of memory will be used per element?
* Print out the shape and the total bytes consumed.

**Ans:**
* int32 or int64 (depends on your PC)
* 4 (or 8)

In [4]:
print(y.shape) #shape
print(y.nbytes) #total bytes consumed

(3, 2)
24


<u>For bigger applications, you often need to mind the data type and size.</u>

### Accessing specific elements, rows, columns

<u>It works just like subsetting lists. Try it out!

**Tips**: Use [row,column] indexing.</u>

In [5]:
y

array([[1, 2],
       [3, 4],
       [5, 6]])

In [6]:
# Print the first column of y
y[:,0]

array([1, 3, 5])

In [7]:
# Print the second row of y
y[1]

array([3, 4])

In [8]:
# Print the last element of y
#y[2,1] #OR
y[-1,-1]

6

<u>Run the cells below and answer the questions.</u>

In [9]:
a = np.arange(1,11) 
print(a)

[ 1  2  3  4  5  6  7  8  9 10]


In [10]:
# Print the odd and even elements using start:end:interval
print(a[::2]) # odd

print(a[1::2]) # even. 

[1 3 5 7 9]
[ 2  4  6  8 10]


In [11]:
# Print the even elements of a, in decreasing order
print(a[10:0:-2]) #OR:
print(a[-1::-2])

[10  8  6  4  2]
[10  8  6  4  2]


### Initializing Various Types of Arrays.

<u>Follow the instructions below and run the cells.</u>

#### All zeros

In [12]:
print(np.zeros(4))
# print the shape
np.zeros(4).shape

[0. 0. 0. 0.]


(4,)

#### All ones

In [13]:
print(np.ones((2,3)))
# print the shape
np.ones((2,3)).shape

[[1. 1. 1.]
 [1. 1. 1.]]


(2, 3)

#### Identity matrix

In [14]:
print(np.identity(3))
# print the shape
np.identity(3).shape

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


(3, 3)

#### Other numbers

In [15]:
# 3-D
print( np.full((2,3,4), 5) ) 

[[[5 5 5 5]
  [5 5 5 5]
  [5 5 5 5]]

 [[5 5 5 5]
  [5 5 5 5]
  [5 5 5 5]]]


#### Random decimal numbers

In [16]:
# 5 samples from uniform distribution over [0, 1)
np.random.rand(5)

array([0.28175731, 0.58183851, 0.28412434, 0.83923245, 0.3124241 ])

In [17]:
# the average should be close to 0.5
np.random.rand(1000).mean() 

0.5014948702167253

<u>Print 5 samples taken from uniform distribution over [0.1 0.2).

Check that the average is close to 0.15 when you withdraw enough samples</u>

In [18]:
np.random.rand(5)*0.1 + 0.1

array([0.14786106, 0.11938236, 0.19691654, 0.19378627, 0.10550082])

In [19]:
(np.random.rand(1000)*0.1 + 0.1).mean()

0.15112810520347375

#### Random integer numbers

<u>Run the following cell several times (ctrl/cmd+enter), then uncomment the seed and rerun the cell several times.</u>

In [20]:
np.random.seed(123)
print( np.random.randint(-5,5, size=(3,2)) ) # discrete uniform distribution from [low, high)

[[-3 -3]
 [ 1 -4]
 [-2  4]]


<u>Set the seed to 1234 and create a 3x3 random integer numbers from 0 to 100 inclusive.</u>

In [21]:
np.random.seed(1234)
np.random.randint(0,101, size=(3,3))

array([[47, 83, 38],
       [53, 76, 24],
       [15, 49, 23]])

### Copying

<u>Run the cells</u>

In [22]:
a = np.array([1,2,3])
b = a 
b[1] = 0 #change the second element of b
print('a = ', a)
print('b = ', b)

a =  [1 0 3]
b =  [1 0 3]


<u>Changing `b` means changing `a` as well! We should instead assign the copy of `a`.</u>

In [23]:
a = np.array([1,2,3])
b = a.copy()
b[1] = 0
print('a = ', a)
print('b = ', b) #Now changing b won't change a

a =  [1 2 3]
b =  [1 0 3]


### Repeating

In [24]:
np.random.seed(123)
a = np.random.randint(0,5, size=(2,3))
print(a)
print(a.shape)

[[2 4 2]
 [1 3 2]]
(2, 3)


In [25]:
# complete the following to repeat `a` twice, row-wise
print(np.repeat(a,2,axis=0))

[[2 4 2]
 [2 4 2]
 [1 3 2]
 [1 3 2]]


In [26]:
# now column-wise
print(np.repeat(a,2,axis=1))

[[2 2 4 4 2 2]
 [1 1 3 3 2 2]]


<u>If instead of repeating per single row/column, we want to repeat the whole block at once, we use `np.tile()`</u>

In [27]:
# complete the following to tile twice row-wise
print(np.tile(a, (2,1))) #what should be the shape?

# complete the following to tile twice column-wise
print(np.tile(a, (1,2)))

[[2 4 2]
 [1 3 2]
 [2 4 2]
 [1 3 2]]
[[2 4 2 2 4 2]
 [1 3 2 1 3 2]]


### Reshaping

In [28]:
# Run this cell
b = np.array([[1,2,3],[4,5,6]])
print(b.shape)
print(b.reshape(6))
print(b.reshape(1,6))

(2, 3)
[1 2 3 4 5 6]
[[1 2 3 4 5 6]]


<u>Reshape b to having 3 rows and 2 columns.</u>

In [29]:
b.reshape(3,2)

array([[1, 2],
       [3, 4],
       [5, 6]])

<u>Can you reshape b to any arbitrary shape, say, (2,4)? Try it below.</u>

In [30]:
b.reshape(2,4)

ValueError: cannot reshape array of size 6 into shape (2,4)

<u>We can specify just one dimension and let the remaining dimension be deduced based on the size of the data by passing -1. Try it below</u>

In [31]:
b.reshape(-1,2)

array([[1, 2],
       [3, 4],
       [5, 6]])

### Stacking

<u>Create the described matrices below.</u>

In [32]:
# 2x2 matrix of ones
a = np.ones((2,2))
print(a)

[[1. 1.]
 [1. 1.]]


In [33]:
# 2x2 matrix of twos
b = np.full((2,2) ,2)
print(b)

[[2 2]
 [2 2]]


In [34]:
# Pass a and b as a tuple to np.vstack()
np.vstack((a,b))

array([[1., 1.],
       [1., 1.],
       [2., 2.],
       [2., 2.]])

In [35]:
# Stack a and b horizontally, now using np.hstack()
np.hstack((a,b))

array([[1., 1., 2., 2.],
       [1., 1., 2., 2.]])

### Comparison operator

In [36]:
np.random.seed(123)
a = np.random.randint(-5,5, size=(1,5))
print(a)

# check if each element of a is greater than 0
print(a>0)

[[-3 -3  1 -4 -2]]
[[False False  True False False]]


In [37]:
np.random.seed(1234)
b = np.random.randint(-5,5, size=(1,5))
print(b)

# check if b is greater than a, member-wise
print(b>a)

[[-2  1  0 -1  3]]
[[ True  True False  True  True]]


### Boolean operators

<u>Still using `a` and `b` above, run the cell below.</u>

In [38]:
print(a)
print(b)
print(np.logical_or(a>0, b>0))

[[-3 -3  1 -4 -2]]
[[-2  1  0 -1  3]]
[[False  True  True False  True]]


<u>Now use `logical_and()` to check if both `a` and `b`, member-wise, are greater than 0.</u>

In [39]:
print(np.logical_and(a>0, b>0))

[[False False False False False]]


### Looping over

In [40]:
# Run this cell
np.random.seed(123)
M = np.random.randint(-5,5, size=(2,2,3))
print(M)

[[[-3 -3  1]
  [-4 -2  4]]

 [[ 1 -4 -5]
  [-4  4 -5]]]


<u>Run the cells below. What's the difference?</u>

In [41]:
for i,m in enumerate(M): #enumerate() adds a counter
    print("{} :\n {}".format(i,m))

0 :
 [[-3 -3  1]
 [-4 -2  4]]
1 :
 [[ 1 -4 -5]
 [-4  4 -5]]


In [42]:
for i,m in enumerate(np.nditer(M)):
    print("{} : {}".format(i,m))

0 : -3
1 : -3
2 : 1
3 : -4
4 : -2
5 : 4
6 : 1
7 : -4
8 : -5
9 : -4
10 : 4
11 : -5


## Basic Matrix & Vector Operations

### Matrix addition & subtraction

In [43]:
# Run this cell
np.random.seed(123)
A = np.random.randint(1,10, size=(2,5))
B = np.random.randint(1,10, size=(2,5))
print(A)
print(B)

[[3 3 7 2 4]
 [7 2 1 2 1]]
[[1 4 5 1 1]
 [5 2 8 4 3]]


In [44]:
# Try Numpy matrix addition and subtraction by using np.add() and np.subtract()
print(np.add(A,B))
print(np.subtract(A,B))

[[ 4  7 12  3  5]
 [12  4  9  6  4]]
[[ 2 -1  2  1  3]
 [ 2  0 -7 -2 -2]]


### Matrix (element-wise) multiplication and division

In [45]:
# Now try Numpy matrix element-wise multiplication and division 
# using np.multiply() and np.divide()
print(np.multiply(A,B))
print(np.divide(A,B))

[[ 3 12 35  2  4]
 [35  4  8  8  3]]
[[3.         0.75       1.4        2.         4.        ]
 [1.4        1.         0.125      0.5        0.33333333]]


### Outer and inner products

In [46]:
# Run this cell
np.random.seed(123)
a = np.random.randint(1,5, size=(1,3))
b = np.random.randint(1,5, size=(3,1))
print(a)
print(b)

[[3 2 3]]
[[3]
 [1]
 [3]]


#### Matrix multiplication 

$C=ab,$ such that $C_{ij} = \sum_{k=1}^{n}a_{ik}b_{kj},$ where $n$ is inner dimension.

In [47]:
# Set C=ab using np.matmul(). Does the order of the arguments matters? Exchange the order and find out.
np.matmul(a,b)

array([[20]])

In [48]:
np.matmul(b,a)

array([[9, 6, 9],
       [3, 2, 3],
       [9, 6, 9]])

#### Matrix inner product 

$C= <a,b> = a^Tb,$ such that $C = \sum_{i}^{n}a_{i}b_{i}$.

In [49]:
# Set C=<a,b> using np.dot(). Does the order of the arguments matter? Exchange the order and find out.
np.dot(a,b)

array([[20]])

In [50]:
np.dot(b,a)

array([[9, 6, 9],
       [3, 2, 3],
       [9, 6, 9]])

In [51]:
# Now try using np.inner(). What's the problem?
np.inner(a,b)

ValueError: shapes (1,3) and (1,3) not aligned: 3 (dim 1) != 1 (dim 0)

In [52]:
# fix it using np.transpose() or .T
np.inner(a,b.T)

array([[20]])

In [53]:
# OR, depending on the question
np.inner(a.T,b)

array([[9, 3, 9],
       [6, 2, 6],
       [9, 3, 9]])