# NumPy 

In [1]:
import numpy as np

In [2]:
np.array([3.14, 4, 2, 3])

array([3.14, 4.  , 2.  , 3.  ])

In [3]:
np.array([1, 2, 3, 4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

In [6]:
np.ones((3,5),dtype = int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [10]:
np.full((2,4),'M')
# Create a 2x4 array filled with 'M'

array([['M', 'M', 'M', 'M'],
       ['M', 'M', 'M', 'M']], dtype='<U1')

In [11]:
np.random.random((3,3))

array([[0.75257318, 0.74983253, 0.25197221],
       [0.52277962, 0.09431284, 0.28271822],
       [0.81970043, 0.04599751, 0.98800446]])

In [13]:
np.random.normal(0,10,(2,2))

array([[12.49048155, -0.04066988],
       [ 8.88455934,  7.84696974]])

In [25]:
np.random.randint(1,11,(4,4))

array([[ 1,  6,  5,  7],
       [ 5,  6,  4,  6],
       [ 3, 10,  7,  4],
       [ 1,  4,  1,  3]])

In [35]:
np.arange(1,120,5,dtype=np.int8)

array([  1,   6,  11,  16,  21,  26,  31,  36,  41,  46,  51,  56,  61,
        66,  71,  76,  81,  86,  91,  96, 101, 106, 111, 116], dtype=int8)

# The Basics of NumPy Arrays

In [1]:
import numpy as np
x1 = np.random.randint(0,10,size=6)
x1

array([5, 0, 7, 5, 3, 0])

In [2]:
x2 = np.random.randint(0,10,(3,3))
x2

array([[8, 3, 2],
       [1, 9, 5],
       [9, 2, 1]])

In [3]:
#the number of dimensions
x1.ndim

1

In [4]:
#the number of dimensions
x2.ndim

2

In [5]:
x2.shape

(3, 3)

In [6]:
x2.size

9

In [7]:
x2.dtype

dtype('int64')

In [8]:
#size of each item
x2.itemsize

8

In [9]:
#size of each array   8 x 9 = 72
x2.nbytes

72

In [10]:
x1[-1]
#select last element

4

In [11]:
x2[1][1]

4

In [12]:
x2[1,1]

4

In [13]:
#the value will be silently truncated.
x2[0][0]=2.99216669
x2

array([[2, 4, 8],
       [9, 4, 5],
       [5, 4, 1]])

In [14]:
arr = np.arange(0,10)
x1

array([4, 2, 3, 9, 0, 4])

In [15]:
arr[::2]
# every two numbers has been deleted
# 1 , 3 , 5 , 7 , 9

array([0, 2, 4, 6, 8])

In [16]:
# every two numbers has been deleted starting from index 3
arr[3::2]

array([3, 5, 7, 9])

In [17]:
arr[::-1]
# reverse array

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [18]:
arr[7::-2]

array([7, 5, 3, 1])

In [19]:
x2[:2,(0,2)]

array([[2, 8],
       [9, 5]])

In [20]:
x2[::-1]

array([[5, 4, 1],
       [9, 4, 5],
       [2, 4, 8]])

In [21]:
#select last column
x2[:,2]

array([8, 5, 1])

In [22]:
#select the middle row
x2[1,:]

array([9, 4, 5])

## reshape

In [30]:
grid = np.arange(1,10)
grid.reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [44]:
# convert 2d array to 1d.
grid.reshape(1,9)

array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [40]:
# convert 2d array to 1d.
grid[:,np.newaxis]

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

# Array Concatenation and Splitting

In [7]:
x = np.array([1,2,3,4])
y = np.array([6,7,8,9])

In [8]:
np.concatenate([x,y])

array([1, 2, 3, 4, 6, 7, 8, 9])

In [54]:
z = np.zeros(2,dtype=int)
np.concatenate([x,y,z])

array([1, 2, 3, 4, 6, 7, 8, 9, 0, 0])

In [61]:
a  = np.array([1,2,3,4,5,6,7,8,9,11])

In [62]:
sub1 , sub2  = np.split(a,2)

In [63]:
sub1

array([1, 2, 3, 4, 5])

In [64]:
sub2

array([ 6,  7,  8,  9, 11])

In [72]:
a_2d = np.arange(24).reshape(4,6)
a_2d

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [75]:
np.vsplit(a_2d,4)
#splite the array into 4 arrays, divide number of raws into 4/.

[array([[0, 1, 2, 3, 4, 5]]),
 array([[ 6,  7,  8,  9, 10, 11]]),
 array([[12, 13, 14, 15, 16, 17]]),
 array([[18, 19, 20, 21, 22, 23]])]

In [88]:
left , right = np.hsplit(a_2d,2)
left
#splite the array into 2 arrays , divide number of cloumns into 2.

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14],
       [18, 19, 20]])

In [89]:
right

array([[ 3,  4,  5],
       [ 9, 10, 11],
       [15, 16, 17],
       [21, 22, 23]])

In [81]:
np.hsplit(a_2d,[4])
#splite the array into 2 arrays , divide number of cloumns into 2.

[array([[ 0,  1,  2,  3],
        [ 6,  7,  8,  9],
        [12, 13, 14, 15],
        [18, 19, 20, 21]]), array([[ 4,  5],
        [10, 11],
        [16, 17],
        [22, 23]])]

In [86]:
np.hsplit(a_2d,[2,4])
#splite the array into 2 arrays , divide number of cloumns into 2.

[array([[ 0,  1],
        [ 6,  7],
        [12, 13],
        [18, 19]]), array([[ 2,  3],
        [ 8,  9],
        [14, 15],
        [20, 21]]), array([[ 4,  5],
        [10, 11],
        [16, 17],
        [22, 23]])]

# Universal Functions

In [1]:
import numpy as np
arr = np.arange(5)

In [3]:
arr + 5

array([5, 6, 7, 8, 9])

In [3]:
arr = arr*-1
arr

array([ 0, -1, -2, -3, -4])

In [4]:
abs(arr)

array([0, 1, 2, 3, 4])

In [5]:
np.absolute(arr)

array([0, 1, 2, 3, 4])

In [6]:
np.abs(arr)

array([0, 1, 2, 3, 4])

In [7]:
theta = np.linspace(0,np.pi,5)
theta

array([0.        , 0.78539816, 1.57079633, 2.35619449, 3.14159265])

In [8]:
np.sin(theta)

array([0.00000000e+00, 7.07106781e-01, 1.00000000e+00, 7.07106781e-01,
       1.22464680e-16])

In [9]:
np.tan(theta)

array([ 0.00000000e+00,  1.00000000e+00,  1.63312394e+16, -1.00000000e+00,
       -1.22464680e-16])

In [10]:
alpha = np.array([-1,0,1])
np.arcsin(alpha)

array([-1.57079633,  0.        ,  1.57079633])

In [11]:
np.arctan(alpha)

array([-0.78539816,  0.        ,  0.78539816])

In [12]:
#natural logarithm
arr2 = np.array([1,2,4,10])
np.log(arr2)

array([0.        , 0.69314718, 1.38629436, 2.30258509])

In [13]:
#log 10
np.log10(arr2)

array([0.        , 0.30103   , 0.60205999, 1.        ])

In [14]:
#e^x
np.exp(arr2)

array([2.71828183e+00, 7.38905610e+00, 5.45981500e+01, 2.20264658e+04])

In [15]:
#natural logarithm
np.power(2,arr2)

array([   2,    4,   16, 1024])

In [16]:
x = np.arange(5)
y = np.empty(5)
y

array([6.94510592e-310, 6.94510592e-310, 0.00000000e+000, 4.65800579e-310,
       2.37151510e-322])

In [17]:
np.multiply(x, 10, out=y)
print(y)

[ 0. 10. 20. 30. 40.]


In [18]:
y=np.zeros(10)
np.multiply(x,10,out=y[::2])

array([ 0., 10., 20., 30., 40.])

In [19]:
y

array([ 0.,  0., 10.,  0., 20.,  0., 30.,  0., 40.,  0.])

## reduce

In [20]:
x=np.arange(1,9)

In [21]:
np.add.reduce(x)

36

In [22]:
np.multiply.reduce(x)

40320

In [23]:
np.subtract.reduce(x)

-34

In [24]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15, 21, 28, 36])

In [25]:
# create timetable
np.multiply.outer(x,x)

array([[ 1,  2,  3,  4,  5,  6,  7,  8],
       [ 2,  4,  6,  8, 10, 12, 14, 16],
       [ 3,  6,  9, 12, 15, 18, 21, 24],
       [ 4,  8, 12, 16, 20, 24, 28, 32],
       [ 5, 10, 15, 20, 25, 30, 35, 40],
       [ 6, 12, 18, 24, 30, 36, 42, 48],
       [ 7, 14, 21, 28, 35, 42, 49, 56],
       [ 8, 16, 24, 32, 40, 48, 56, 64]])

In [26]:
# create timetable
np.add.outer(x[:5],x[:5])

array([[ 2,  3,  4,  5,  6],
       [ 3,  4,  5,  6,  7],
       [ 4,  5,  6,  7,  8],
       [ 5,  6,  7,  8,  9],
       [ 6,  7,  8,  9, 10]])

### Aggregations: Min, Max, and Everything in Between

In [110]:
L = np.random.randint(1,20,size=10)
print(L)
print('The summation of the array is',sum(L))
print('The minimum number in the array is ',min(L))
print('The maximum number in the array is ',np.max(L))
print('The product numbers in the array is ',np.product(L))
print('The standard deviation is',np.std(L))
print('The variance is',np.var(L))
print('The index of the maximum number is ',np.argmax(L))
print('The index of the minimum number is ',np.argmin(L))
print('The median is ',np.median(L))
print('The mean is ',np.mean(L))
print('evaluate whether any elements are true: ',np.any(L))
print('evaluate whether all elements are true: ',np.all(L))

# np.max() is much faster than max()

[19  9 10  3 14  1 15 14  5  8]
The summation of the array is 98
The minimum number in the array is  1
The maximum number in the array is  19
The product numbers in the array is  603288000
The standard deviation is 5.455272678794342
The variance is 29.76
The index of the maximum number is  0
The index of the minimum number is  5
The median is  9.5
The mean is  9.8
evaluate whether any elements are true:  True
evaluate whether all elements are true:  True


In [111]:
print('first quartile', np.percentile(L, 25))
print('third quartile', np.percentile(L, 75))



first quartile 5.75
third quartile 14.0


In [5]:
LL= np.random.randint(1,15,size=(3,4))
LL

array([[ 2,  1,  4, 11],
       [ 8,  9,  6,  4],
       [14,  6, 13,  1]])

In [6]:
np.min(LL,axis=0)
# find the minimum in each column

array([2, 1, 4, 1])

In [7]:
np.max(LL,axis=1)
# find the maximum in each row

array([11,  9, 14])

In [8]:
np.product(LL)

166053888

# broadcasting

In [3]:
a = np.array([1,2,3])
b = np.array([15,16,17])
a+b

array([16, 18, 20])

In [4]:
a + 3

array([4, 5, 6])

In [5]:
M = np.ones((2,3))
M

array([[1., 1., 1.],
       [1., 1., 1.]])

In [6]:
a+M

array([[2., 3., 4.],
       [2., 3., 4.]])

In [7]:
y = np.arange(1,4).reshape(1,3)
z = np.arange(1,4).reshape(3,1)

In [8]:
print(y)
print(z)

[[1 2 3]]
[[1]
 [2]
 [3]]


In [9]:
y + z

array([[2, 3, 4],
       [3, 4, 5],
       [4, 5, 6]])

In [19]:
xx=np.linspace(0,50,6)
xxx=np.linspace(0,100,5).reshape(5,1)
print(xx)
print(xxx)

[ 0. 10. 20. 30. 40. 50.]
[[  0.]
 [ 25.]
 [ 50.]
 [ 75.]
 [100.]]


In [20]:
z=xx+xxx
z

array([[  0.,  10.,  20.,  30.,  40.,  50.],
       [ 25.,  35.,  45.,  55.,  65.,  75.],
       [ 50.,  60.,  70.,  80.,  90., 100.],
       [ 75.,  85.,  95., 105., 115., 125.],
       [100., 110., 120., 130., 140., 150.]])

### Boolean

In [2]:
a=np.array([1,2,3,4,5,6])

In [4]:
a>4

array([False, False, False, False,  True,  True])

In [5]:
a!=5

array([ True,  True,  True,  True, False,  True])

In [11]:
x = np.random.randint(10, size=(3, 4))
x

array([[3, 1, 3, 4],
       [3, 1, 4, 9],
       [8, 4, 7, 7]])

In [26]:
result = x>5
result

array([[False, False,  True, False],
       [ True, False,  True,  True],
       [False,  True,  True, False]])

In [27]:
np.count_nonzero(result)

6

In [29]:
#another way to count nonzero entries
np.sum(x>5)

6

In [30]:
np.sum(x>5,axis=1)

array([1, 3, 2])

In [39]:
print(np.any(x>8))
print(np.all(x>3))
print(np.any(x<0))
print(np.all(x>4,axis=0))

True
False
False
[False False  True False]


In [3]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [23]:
np.sum((arr>4) & (arr<8))
# 5,6,7

3

In [8]:
np.sum((arr>7) | (arr<4))
# 0,1,2,3,8,9

6

In [33]:
s=np.sum(~((8>arr) & (arr>4)))
# 0,1,2,3,9
s

7

In [30]:
arr[~((8>arr) & (arr>4))]

array([0, 1, 2, 3, 4, 8, 9])

In [41]:
A = np.array([1,1,0,0])
B = np.array([1,0,0,1])
A | B

array([1, 1, 0, 1])

### fancy indexing

In [2]:
x = np.random.randint(1,10,size=10)
x

array([7, 6, 1, 6, 8, 9, 1, 6, 3, 3])

In [3]:
x[[1,3,5]]
#select the numbers in the following position. 1 , 3 , 5.

array([6, 6, 9])

In [6]:
y = np.arange(12).reshape((3, 4))
y

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [7]:
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
y[row,col]

array([ 2,  5, 11])

In [8]:
row = np.array([0, 1, 2]).reshape(3,1)
col = np.array([2, 1, 3])
y[row,col]

array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])

In [9]:
y[2, [2, 0, 1]]

array([10,  8,  9])

In [15]:
mask = np.array([1, 0, 1, 0], dtype=bool)
y[row.reshape(3,1), mask]

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

### Example: Selecting Random Points

In [16]:
mean = [0, 0]

In [18]:
cov = [[1, 2],[2, 5]]

In [19]:
rand = np.random.RandomState(42)
X = rand.multivariate_normal(mean, cov, 100)
X.shape

(100, 2)

In [20]:
indices = np.random.choice(X.shape[0], 20, replace=False)
indices

array([85, 86, 55, 42, 22, 37, 71, 38, 11, 54,  7, 82, 20, 24,  8, 70, 75,
       76, 56, 39])

### Modifying Values with Fancy Indexing

In [21]:
a = np.arange(10)
i=[1,2,3,8,9,1,1,1,1]
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [22]:
a[i]=55
a

array([ 0, 55, 55, 55,  4,  5,  6,  7, 55, 55])

In [24]:
a[i]-=10
a

array([ 0, 35, 35, 35,  4,  5,  6,  7, 35, 35])

In [25]:
x = np.zeros(10)
np.add.at(x, i, 5)
print(x)

[ 0. 25.  5.  5.  0.  0.  0.  0.  5.  5.]


# Sorting Arrays

In [4]:
arr = np.array([9,4,1,6,7,1,2,8,4])
arr.sort()
arr

array([1, 1, 2, 4, 4, 6, 7, 8, 9])

In [5]:
arr2 = np.array([3,1,2,5,8,6])
np.sort(arr2)

array([1, 2, 3, 5, 6, 8])

In [7]:
two_d = np.random.randint(1,10,size=(5,6))
two_d

array([[7, 9, 9, 4, 4, 3],
       [3, 9, 3, 1, 5, 7],
       [4, 1, 6, 3, 2, 6],
       [9, 2, 7, 8, 6, 8],
       [7, 2, 4, 3, 7, 7]])

In [9]:
np.sort(two_d,axis=0)
#Sorting along coumns

array([[3, 1, 3, 1, 2, 3],
       [4, 2, 4, 3, 4, 6],
       [7, 2, 6, 3, 5, 7],
       [7, 9, 7, 4, 6, 7],
       [9, 9, 9, 8, 7, 8]])

In [10]:
np.sort(two_d,axis=1)
#Sorting along raws

array([[3, 4, 4, 7, 9, 9],
       [1, 3, 3, 5, 7, 9],
       [1, 2, 3, 4, 6, 6],
       [2, 6, 7, 8, 8, 9],
       [2, 3, 4, 7, 7, 7]])

In [12]:
a = np.array([20,55,12,10,66,11,40])
np.argsort(a)
#returns the indices of the sorted elements

array([3, 5, 2, 0, 6, 1, 4])

# Structured Data: NumPy’s Structured Arrays