# index
[1. NUMPY ARRAY GENERATION](#NUMPY-ARRAY-GENERATION "Goto NUMPY-ARRAY-GENERATION")<br>
[2. STATISTICAL OPERATIONS](#STATISTICAL-OPERATIONS "Goto STATISTICAL-OPERATIONS")<br>
[3. SORTING](#SORTING "Goto SORTING")<br>
[4. APPLYING CONDITIONS](#APPLYING-CONDITIONS "Goto APPLYING-CONDITIONS")<br>
[5. MISSING VALUES](#MISSING-VALUES "Goto MISSING-VALUES")<br>
[6. RESHAPE/REARRANGE](#RESHAPE/REARRANGE "Goto RESHAPE/REARRANGE")<br>
[7. NUMPY REPLACE](#NUMPY-REPLACE "Goto NUMPY-REPLACE")<br>
[8. JOINING TWO ARRAYS](#JOINING-TWO-ARRAYS "Goto JOINING-TWO-ARRAYS")<br>
[9. INDEX BASED](#INDEX-BASED "Goto INDEX-BASED")<br>
[10. USING SCALAR FUNCTIONS ON NUMPY ARRAY](#USING-SCALAR-FUNCTIONS-ON-NUMPY-ARRAY "Goto USING-SCALAR-FUNCTIONS-ON-NUMPY-ARRAY")<br>
[11. SETTING NUMPY OPTIONS](#SETTING-NUMPY-OPTIONS "Goto SETTING-NUMPY-OPTIONS")<br>
[12. OTHERS](#OTHERS "Goto OTHERS")<br>

In [1]:
import numpy as np

In [2]:
np.__version__

'1.18.1'

# NUMPY ARRAY GENERATION
[index](#index "Goto index")

### basic numpy array

In [3]:
# method 1
print(np.array([1,2,3,4,5,6]))

# method 2
print(np.arange(start=1, stop=7, step=1))

[1 2 3 4 5 6]
[1 2 3 4 5 6]


### basic numpy array with equally distant elements in a given range and size

In [4]:
np.linspace(start=0, stop=8, num=9)

array([0., 1., 2., 3., 4., 5., 6., 7., 8.])

### numpy boolean array

In [5]:
np.full((3,3), True), np.ones((3,3), dtype=bool)

(array([[ True,  True,  True],
        [ True,  True,  True],
        [ True,  True,  True]]),
 array([[ True,  True,  True],
        [ True,  True,  True],
        [ True,  True,  True]]))

### generating random numbers between 0 and 1

In [6]:
# method 1
print(np.random.rand(5, 3))

# method 2
print(np.random.random((5,3)))

[[0.0163871  0.06146832 0.10935033]
 [0.88083196 0.236081   0.18714134]
 [0.78481824 0.54764501 0.78559238]
 [0.742706   0.76973561 0.82004773]
 [0.23137507 0.77005119 0.52889394]]
[[0.52769145 0.70066254 0.41100249]
 [0.86937168 0.02095097 0.51553811]
 [0.38470419 0.63374695 0.02615258]
 [0.92250213 0.94519449 0.89399823]
 [0.91183218 0.16231118 0.30108379]]


### generating random float/int with a given range

In [7]:
# integer between 5 and 10 of shape (5, 3)
print(np.random.randint(5, 10, (5,3)))

# float between 5 and 10 of shape (5, 3)
# method 1
print(np.random.randint(5, 10, (5,3)) + np.random.random((5, 3)))

# method 2
print(np.random.uniform(5, 10, (5, 3)))

[[7 5 5]
 [7 8 5]
 [9 6 7]
 [9 6 8]
 [5 8 8]]
[[7.92644163 8.82776866 8.55219481]
 [8.06735007 5.24182329 7.12476472]
 [8.19505305 6.71719634 5.78486335]
 [9.92246191 9.1808777  9.41748807]
 [7.3711558  7.80691647 8.02777537]]
[[6.07335064 5.70720964 5.25584369]
 [7.30092102 8.94885878 5.54607046]
 [8.70098126 6.64070162 6.88560315]
 [5.86604157 5.6230472  7.77945769]
 [9.90409784 8.38849234 5.356277  ]]


### generating duplicates

In [8]:
arr = np.array([1, 2, 3])

# method 1
print(np.repeat(arr, 3))

# method 2
print(np.tile(arr, 3))

[1 1 1 2 2 2 3 3 3]
[1 2 3 1 2 3 1 2 3]


### get open source data

In [9]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
iris[:5]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.1', b'1.5', b'0.2', b'Iris-setosa'],
       [b'5.0', b'3.6', b'1.4', b'0.2', b'Iris-setosa']], dtype=object)

# STATISTICAL OPERATIONS
[index](#index "Goto index")

### max and min

In [10]:
arr = np.array([1, 5, 3, 2, 9, 1, 2])
print('array:', arr)
print('max:', np.max(arr))
print('max index:', np.argmax(arr))

print('min:', np.min(arr))
print('min index:', np.argmin(arr))

array: [1 5 3 2 9 1 2]
max: 9
max index: 4
min: 1
min index: 0


### percentile

In [11]:
np.percentile(arr, q=[5, 50, 95])

array([1. , 2. , 7.8])

### unique count

In [12]:
np.unique(arr, return_counts=True, return_index=True)

(array([1, 2, 3, 5, 9]),
 array([0, 3, 2, 1, 4], dtype=int64),
 array([2, 2, 1, 1, 1], dtype=int64))

### mean, median, standard deviation

In [13]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0])
np.mean(arr), np.median(arr), np.std(arr)

(4.090909090909091, 4.0, 3.0287874998104876)

### mode

In [14]:
print(arr)
val, counts = np.unique(arr, return_counts=True)
val[np.argmax(counts)]

[0 1 2 3 4 5 6 7 8 9 0]


0

### euclidean distance between two arrays

In [15]:
a = np.array([1,2,3,4,5])
b = np.array([4,5,6,7,8])

np.linalg.norm(a-b)

6.708203932499369

### difference of array elements

In [16]:
arr = np.array([3, 5, 1, 7, 2, 4])
np.diff(arr)

array([ 2, -4,  6, -5,  2])

### sum of array elements

In [17]:
print(arr)
np.cumsum(arr)

[3 5 1 7 2 4]


array([ 3,  8,  9, 16, 18, 22], dtype=int32)

### get sum of 2 consecutive terms

In [18]:
x = np.array([1, 2, 1, 1, 3, 4, 3, 1, 1, 2, 1, 1, 2])
print(x)
np.concatenate([np.cumsum(x)[1:2], np.cumsum(x)[2:]-np.cumsum(x)[:-2]])

[1 2 1 1 3 4 3 1 1 2 1 1 2]


array([3, 3, 2, 4, 7, 7, 4, 2, 3, 3, 2, 3], dtype=int32)

# SORTING
[index](#index "Goto index")

In [19]:
arr = np.array([1, 5, 2, 9, 2, 5, 7])
print('array:', arr)
# method 1
print(np.sort(arr))

# method 2
# np.argsort gives arguments of sorted numpy array
indexes = np.argsort(arr)
print('indexes:', indexes)
print(arr[indexes])

array: [1 5 2 9 2 5 7]
[1 2 2 5 5 7 9]
indexes: [0 2 4 1 5 6 3]
[1 2 2 5 5 7 9]


# APPLYING CONDITIONS
[index](#index "Goto index")

### remove elements divisible by 2

In [20]:
arr = np.array([0, 1, 1, 2, 5, 6, 3, 4, 5, 5, 6, 7, 4, 3, 8, 9])

# method 1
print(arr[arr%2!=0])

# method 2
print(arr[np.argwhere(arr%2!=0)])

[1 1 5 3 5 5 7 3 9]
[[1]
 [1]
 [5]
 [3]
 [5]
 [5]
 [7]
 [3]
 [9]]


### remove elements >5 and <10

In [21]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[(arr>5) & (arr<10)]

array([6, 7, 8, 9])

# MISSING VALUES
[index](#index "Goto index")

### find missing values

In [22]:
# method 1
print(np.isnan(arr).sum())

# method 2
print(np.where(arr==np.nan))

# method 3
print(np.any(np.isnan(arr)))

# method 4
print(np.isnan(arr).any())

0
(array([], dtype=int64),)
False
False


# RESHAPE/REARRANGE
[index](#index "Goto index")

### reshape

In [23]:
arr = np.arange(10)
print(arr)

# method 1
print(arr.reshape((2, 5)))

# method 2
print(arr.reshape((2, -1))) # setting -1 will automatically decide the number of columns

[0 1 2 3 4 5 6 7 8 9]
[[0 1 2 3 4]
 [5 6 7 8 9]]
[[0 1 2 3 4]
 [5 6 7 8 9]]


### rearrange columns

In [24]:
arr = np.arange(9).reshape((3,3))
print(arr)
arr[:,[1, 0, 2]]

[[0 1 2]
 [3 4 5]
 [6 7 8]]


array([[1, 0, 2],
       [4, 3, 5],
       [7, 6, 8]])

### rearrange rows

In [25]:
arr[[1, 2, 0], :]

array([[3, 4, 5],
       [6, 7, 8],
       [0, 1, 2]])

### reverse all columns

In [26]:
arr[:, ::-1]

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

### reverse all rows

In [27]:
arr[::-1, :]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

# NUMPY REPLACE
[index](#index "Goto index")

### Replace all odd numbers in arr with -1

In [28]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[arr%2==1] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

### Replacing without affecting the original array

In [29]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
new_arr = np.where(arr%2==1, -1, arr)
new_arr, arr

(array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

### Replace values >30 to 30 and <10 to 10

In [30]:
arr = np.arange(0, 50, 2)
print(arr)

# method 1
print(np.clip(arr, a_min=10, a_max=30))

# method 2
print(np.where(arr>30, 30, np.where(arr<10, 10, arr)))

[ 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46
 48]
[10 10 10 10 10 10 12 14 16 18 20 22 24 26 28 30 30 30 30 30 30 30 30 30
 30]
[10 10 10 10 10 10 12 14 16 18 20 22 24 26 28 30 30 30 30 30 30 30 30 30
 30]


# JOINING TWO ARRAYS
[index](#index "Goto index")

### vertical stacking

In [31]:
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

# method 1
print(np.concatenate((a, b), axis=0))

# method 2
np.vstack([a, b])

[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]


array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

### horizontal stacking

In [32]:
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

# method 1
print(np.concatenate((a, b), axis=1))

# method 2
np.hstack([a, b])

[[0 1 2 3 4 1 1 1 1 1]
 [5 6 7 8 9 1 1 1 1 1]]


array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

# INDEX BASED
[index](#index "Goto index")

### get index of an element of array

In [33]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
np.where(a==5)

(array([8], dtype=int64),)

### get index where a matches b

In [34]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.where(a==b)

(array([1, 3, 5, 7], dtype=int64),)

# USING SCALAR FUNCTIONS ON NUMPY ARRAY
[index](#index "Goto index")

In [35]:
print(max(5, 8))

a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])

pair_max = np.vectorize(max, otypes=[float])
pair_max(a, b)

8


array([6., 7., 9., 8., 9., 7., 5.])

# SETTING NUMPY OPTIONS
[index](#index "Goto index")

### set seed value

In [36]:
np.random.seed(5)

### set precision value to 3 or print numpy value upto 3 decimal places

In [37]:
np.set_printoptions(precision=3)
print(np.random.uniform(5, 10, (5,3)))

[[6.11  9.354 6.034]
 [9.593 7.442 8.059]
 [8.83  7.592 6.484]
 [5.939 5.404 8.692]
 [7.207 5.792 9.4  ]]


### suppressing scientific notifications

In [38]:
np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
print(rand_arr)

np.set_printoptions(suppress=True, precision=6)
print(rand_arr)

[[5.434e-04 2.784e-04 4.245e-04]
 [8.448e-04 4.719e-06 1.216e-04]
 [6.707e-04 8.259e-04 1.367e-04]]
[[0.000543 0.000278 0.000425]
 [0.000845 0.000005 0.000122]
 [0.000671 0.000826 0.000137]]


### set number of elements to be printed for a numpy array

In [39]:
np.set_printoptions(threshold=6)
print(np.arange(0, 20))

# print all the elements of numpy array
import sys
np.set_printoptions(threshold=sys.maxsize)
print(np.arange(0, 20))

[ 0  1  2 ... 17 18 19]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


# OTHERS
[index](#index "Goto index")

### intersection

In [40]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.intersect1d(a,b)

array([2, 4])

### From 'a' remove all of 'b'

In [41]:
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])

np.setdiff1d(a,b)

array([1, 2, 3, 4])

### discretizing array

In [42]:
arr = np.arange(-5, 20, 2)
print('array:',arr)
# method 1
bins = [0, 3, 5, 9]
print('bins:', bins)
print(np.digitize(arr, bins=bins))

# method 2
bins = np.arange(-4,5,2)
print('bins:', bins)
print(np.digitize(arr, bins=bins))

# method 3
bins = np.linspace(-5, 20, 5)
print('bins:', bins)
print(np.digitize(arr, bins=bins))

array: [-5 -3 -1  1  3  5  7  9 11 13 15 17 19]
bins: [0, 3, 5, 9]
[0 0 0 1 2 3 3 4 4 4 4 4 4]
bins: [-4 -2  0  2  4]
[0 1 2 3 4 5 5 5 5 5 5 5 5]
bins: [-5.    1.25  7.5  13.75 20.  ]
[1 1 1 1 2 2 2 3 3 3 4 4 4]


### randomly selecting elements from array with required proportion

In [43]:
random = np.random.choice([1, 2, 3], 10, p=[0.2, 0.2, 0.6])
print(np.unique(random, return_counts=True))

(array([1, 2, 3]), array([3, 2, 5], dtype=int64))


### rounding numpy to a given decimal digits

In [44]:
print(np.random.uniform(0, 10, 10))
print(np.around(np.random.uniform(0, 10, 10), 2))

[2.740737 4.317042 9.400298 8.176494 3.36112  1.754105 3.72832  0.056885
 2.524264 7.956625]
[0.15 5.99 6.04 1.05 3.82 0.36 8.9  9.81 0.6  8.91]


### convert multidimensional numpy array to 1d array

In [45]:
arr = np.arange(0, 25).reshape(5,5)
print(arr)
print(np.ravel(arr))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]


### applying a function on numpy array

In [46]:
arr = np.arange(0, 25).reshape(5,5)
print(arr)
np.apply_along_axis(lambda x: x*2, arr=arr, axis=1)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28],
       [30, 32, 34, 36, 38],
       [40, 42, 44, 46, 48]])