# Module 2: Introduction to Numpy

Numpy, which stands for numerical Python, is a Python library package to support numerical computations. The basic data structure in numpy is a multi-dimensional array object called ndarray. Numpy provides a suite of functions that can efficiently manipulate elements of the ndarray. 

## Creating ndarray

An ndarray can be created from a list or tuple object.

In [1]:
import numpy as np

oneDim = np.array([1.0,2,3,4,5])   # a 1-dimensional array (vector)
print(oneDim)
print("#Dimensions =", oneDim.ndim)
print("Dimension =", oneDim.shape)
print("Size =", oneDim.size)
print("Array type =", oneDim.dtype)

twoDim = np.array([[1,2],[3,4],[5,6],[7,8]])  # a two-dimensional array (matrix)
print(twoDim)
print("#Dimensions =", twoDim.ndim)
print("Dimension =", twoDim.shape)
print("Size =", twoDim.size)
print("Array type =", twoDim.dtype)

arrFromTuple = np.array([(1,'a',3.0),(2,'b',3.5)])  # create ndarray from tuple
print(arrFromTuple)
print("#Dimensions =", arrFromTuple.ndim)
print("Dimension =", arrFromTuple.shape)
print("Size =", arrFromTuple.size)

[1. 2. 3. 4. 5.]
#Dimensions = 1
Dimension = (5,)
Size = 5
Array type = float64
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
#Dimensions = 2
Dimension = (4, 2)
Size = 8
Array type = int64
[['1' 'a' '3.0']
 ['2' 'b' '3.5']]
#Dimensions = 2
Dimension = (2, 3)
Size = 6


There are several built-in functions in numpy that can be used to create ndarrays

In [2]:
print(np.random.rand(5))      # random numbers from a uniform distribution between [0,1]

print(np.random.randn(5))     # random numbers from a normal distribution

print(np.arange(-10,10,2))    # similar to range, but returns ndarray instead of list

print(np.arange(12).reshape(3,4))  # reshape to a matrix

print(np.linspace(0,1,10))    # split interval [0,1] into 10 equally separated values

print(np.logspace(-3,3,7))    # create ndarray with values from 10^-3 to 10^3

[0.6032457  0.56775259 0.08296586 0.12762583 0.17506763]
[-0.36600545 -0.09693097  0.9547545   1.26981398 -0.9466322 ]
[-10  -8  -6  -4  -2   0   2   4   6   8]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556
 0.66666667 0.77777778 0.88888889 1.        ]
[1.e-03 1.e-02 1.e-01 1.e+00 1.e+01 1.e+02 1.e+03]


In [3]:
print(np.zeros((2,3)))        # a matrix of zeros

print(np.ones((3,2)))         # a matrix of ones

print(np.eye(3))              # a 3 x 3 identity matrix

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1.]
 [1. 1.]
 [1. 1.]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


## Element-wise Operations

You can apply standard operators such as addition and multiplication on each element of the ndarray.

In [4]:
x = np.array([1,2,3,4,5])

print(x + 1)      # addition

print(x - 1)      # subtraction

print(x * 2)      # multiplication

print(x // 2)     # integer division

print(x ** 2)     # square

print(x % 2)      # modulo  
print(1 / x)      # division


[2 3 4 5 6]
[0 1 2 3 4]
[ 2  4  6  8 10]
[0 1 1 2 2]
[ 1  4  9 16 25]
[1 0 1 0 1]
[1.         0.5        0.33333333 0.25       0.2       ]


In [5]:
x = np.array([2,4,6,8,10])
y = np.array([1,2,3,4,5])

print(x + y)

print(x - y)

print(x * y)

print(x / y)

print(x // y)

print(x ** y)

[ 3  6  9 12 15]
[1 2 3 4 5]
[ 2  8 18 32 50]
[2. 2. 2. 2. 2.]
[2 2 2 2 2]
[     2     16    216   4096 100000]


## Indexing and Slicing

There are various ways to select certain elements with an ndarray.

In [6]:
x = np.arange(-5,5)
print(x)

y = x[3:5]     # y is a slice, i.e., pointer to a subarray in x
print(y)

y[:] = 1000    # modifying the value of y will change x
print(y)
print(x)

z = x[3:5].copy()   # makes a copy of the subarray
print(z)

z[:] = 500          # modifying the value of z will not affect x
print(z)
print(x)

[-5 -4 -3 -2 -1  0  1  2  3  4]
[-2 -1]
[1000 1000]
[  -5   -4   -3 1000 1000    0    1    2    3    4]
[1000 1000]
[500 500]
[  -5   -4   -3 1000 1000    0    1    2    3    4]


In [7]:
my2dlist = [[1,2,3,4],[5,6,7,8],[9,10,11,12]]   # a 2-dim list

print(my2dlist)

print(my2dlist[2])        # access the third sublist

print(my2dlist[:][2])     # can't access third element of each sublist

# print(my2dlist[:,2])    # this will cause syntax error

my2darr = np.array(my2dlist)

print(my2darr)

print(my2darr[2][:])      # access the third row

print(my2darr[2,:])       # access the third row

print(my2darr[:][2])      # access the third row (similar to 2d list)

print(my2darr[:,2])       # access the third column

print(my2darr[:2,2:])     # access the first two rows & last two columns

[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
[9, 10, 11, 12]
[9, 10, 11, 12]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[ 9 10 11 12]
[ 9 10 11 12]
[ 9 10 11 12]
[ 3  7 11]
[[3 4]
 [7 8]]


ndarray also supports boolean indexing.

In [8]:
my2darr = np.arange(1,13,1).reshape(3,4)
print(my2darr)

divBy3 = my2darr[my2darr % 3 == 0]
print(divBy3, type(divBy3))

divBy3LastRow = my2darr[2:, my2darr[2,:] % 3 == 0]
print(divBy3LastRow)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[ 3  6  9 12] <class 'numpy.ndarray'>
[[ 9 12]]


More indexing examples.

In [9]:
my2darr = np.arange(1,13,1).reshape(4,3)
print(my2darr)

indices = [2,1,0,3]    # selected row indices
print(my2darr[indices,:])

rowIndex = [0,0,1,2,3]     # row index into my2darr
columnIndex = [0,2,0,1,2]  # column index into my2darr
print(my2darr[rowIndex,columnIndex])

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[ 7  8  9]
 [ 4  5  6]
 [ 1  2  3]
 [10 11 12]]
[ 1  3  4  8 12]


## Numpy Arithmetic and Statistical Functions

There are many built-in mathematical functions available for manipulating elements of nd-array.

In [3]:
import numpy as np

y = np.array([-1.4, 0.4, -3.2, 2.5, 3.4])    # generate a random vector

print(y)

print(np.abs(y))          # convert to absolute values

print(np.sqrt(abs(y)))    # apply square root to each element

print(np.sign(y))         # get the sign of each element

print(np.exp(y))          # apply exponentiation

print(np.sort(y))         # sort array

[-1.4  0.4 -3.2  2.5  3.4]
[1.4 0.4 3.2 2.5 3.4]
[1.18321596 0.63245553 1.78885438 1.58113883 1.84390889]
[-1.  1. -1.  1.  1.]
[ 0.24659696  1.4918247   0.0407622  12.18249396 29.96410005]
[-3.2 -1.4  0.4  2.5  3.4]


In [4]:
import numpy as np

x = np.arange(-2,3)
y = np.random.randn(5)
print(x)

print(y)

print(np.add(x,y))           # element-wise addition       x + y

print(np.subtract(x,y))      # element-wise subtraction    x - y

print(np.multiply(x,y))      # element-wise multiplication x * y

print(np.divide(x,y))        # element-wise division       x / y

print(np.maximum(x,y))       # element-wise maximum        max(x,y)

[-2 -1  0  1  2]
[-0.46974947  0.75972157  0.23113661  1.66680485 -0.60313692]
[-2.46974947 -0.24027843  0.23113661  2.66680485  1.39686308]
[-1.53025053 -1.75972157 -0.23113661 -0.66680485  2.60313692]
[ 0.93949893 -0.75972157  0.          1.66680485 -1.20627385]
[ 4.25758865 -1.31627169  0.          0.59995026 -3.31599662]
[-0.46974947  0.75972157  0.23113661  1.66680485  2.        ]


In [5]:
y = np.array([-3.2, -1.4, 0.4, 2.5, 3.4])    # generate a random vector

print(y)

print("Min =", np.min(y))             # min 

print("Max =", np.max(y))             # max 

print("Average =", np.mean(y))        # mean/average

print("Std deviation =", np.std(y))   # standard deviation

print("Sum =", np.sum(y))             # sum 

[-3.2 -1.4  0.4  2.5  3.4]
Min = -3.2
Max = 3.4
Average = 0.34000000000000014
Std deviation = 2.432776191925595
Sum = 1.7000000000000006


## Numpy linear algebra

Numpy provides many functions to support linear algebra operations.

In [6]:
X = np.random.randn(2,3)    # create a 2 x 3 random matrix

print(X)


print(X.T)             # matrix transpose operation X^T

y = np.random.randn(3) # random vector 

print(y)

print(X.dot(y))        # matrix-vector multiplication  X * y

print(X.dot(X.T))      # matrix-matrix multiplication  X * X^T

print(X.T.dot(X))      # matrix-matrix multiplication  X^T * X

[[ 0.59876307 -0.24802821  1.51431636]
 [-1.05586653 -1.72686204 -0.41968883]]
[[ 0.59876307 -1.05586653]
 [-0.24802821 -1.72686204]
 [ 1.51431636 -0.41968883]]
[-0.11042169 -0.72592192 -0.66797672]
[-0.89759539  1.65049994]
[[ 2.71318924 -0.83944505]
 [-0.83944505  4.27304534]]
[[1.47337134 1.67482569 1.3498521 ]
 [1.67482569 3.04357049 0.34915153]
 [1.3498521  0.34915153 2.46929275]]


In [7]:
X = np.random.randn(5,3)

print(X)

C = X.T.dot(X)               # C = X^T * X is a square matrix

invC = np.linalg.inv(C)      # inverse of a square matrix

print(invC)

detC = np.linalg.det(C)      # determinant of a square matrix

print(detC)

S, U = np.linalg.eig(C)      # eigenvalue S and eigenvector U of a square matrix

print(S)

print(U)

[[ 0.62794064 -0.04845289  1.45568782]
 [-0.05663888 -0.44882636 -0.44292357]
 [-0.96777971  0.10352524 -0.05074279]
 [ 0.68042218  0.36585902 -0.49887718]
 [-0.63271254  0.49848174 -0.61339902]]
[[ 0.54683507  0.04258199 -0.18739211]
 [ 0.04258199  1.81652345  0.21046397]
 [-0.18739211  0.21046397  0.4319491 ]]
2.9680728620510206
[3.72266053 1.47338806 0.54113295]
[[ 0.56740429 -0.8233602   0.0114171 ]
 [-0.12624639 -0.07328255  0.98928839]
 [ 0.81370402  0.56276784  0.14552709]]


## Iterating over array elements

Python `for` loop is the most convenient way to iterate over an array:

In [3]:
v = np.array([1, 2, 3, 4])

for element in v:
    print (element)

1
2
3
4


In [4]:
M = np.array([[1,2], [3,4]])

for row in M:
    print ("row", row)
    
    for element in row:
        print (element)

row [1 2]
1
2
row [3 4]
3
4


Use the `enumerate` function to obtain both the element and its index in the `for` loop: 

In [5]:
for row_idx, row in enumerate(M):
    print ("row_idx", row_idx, "row", row)
    
    for col_idx, element in enumerate(row):
        print ("col_idx", col_idx, "element", element)
       
        # update the matrix M: square each element
        M[row_idx, col_idx] = element ** 2

row_idx 0 row [1 2]
col_idx 0 element 1
col_idx 1 element 2
row_idx 1 row [3 4]
col_idx 0 element 3
col_idx 1 element 4


In [6]:
# each element in M is now squared
M

array([[ 1,  4],
       [ 9, 16]])

## Type casting

Since Numpy arrays are *statically typed*, the type of an array does not change once created. But we can explicitly cast an array of some type to another using the `astype` functions. This always create a new array of new type:

In [7]:
M.dtype

dtype('int32')

In [8]:
M2 = M.astype(float)

M2

array([[  1.,   4.],
       [  9.,  16.]])

In [9]:
M2.dtype

dtype('float64')

In [10]:
M3 = M.astype(bool)

M3

array([[ True,  True],
       [ True,  True]], dtype=bool)

# Scipy:  a library for scientific computing
#### It provides many user-friendly and efficient numerical routines such as stats,  numerical integration and optimization.

In [11]:
import numpy as np
import scipy.spatial.distance as sp_dist


x = np.random.randint(0, 2, size = 5)      # Return random integers from low (inclusive) to high (exclusive).
y = np.random.randint(0, 2, size = 5)
print (x)
print (y)
print (sp_dist.cosine(x,y))
print (sp_dist.euclidean(x,y))
print (sp_dist.jaccard(x,y))
print (sp_dist.hamming(x,y))

[0 1 1 1 0]
[1 1 1 1 0]
0.133974596216
1.0
0.25
0.2


In [12]:
from scipy.stats import linregress

a = [15, 12, 8, 8, 7, 7, 7, 6, 5, 3]
b = [10, 25, 17, 11, 13, 17, 20, 13, 9, 15]

linregress(a, b)

LinregressResult(slope=0.20833333333333331, intercept=13.375, rvalue=0.14499815458068518, pvalue=0.68940144811669501, stderr=0.50261704627083637)