<a href="https://colab.research.google.com/github/Rohan-1103/Data-Science/blob/main/session_15_numpy_tricks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### np.sort(iterable)

- Default sorting algorithm for sort function is quicksort(n**2)

Return a sorted copy of an array.

https://numpy.org/doc/stable/reference/generated/numpy.sort.html

In [1]:
# code
import numpy as np
a = np.random.randint(1,100,15)
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [2]:
b = np.random.randint(1,100,24).reshape(6,4)
b

array([[82, 40, 47, 44],
       [56, 84,  1, 70],
       [69, 97, 17, 44],
       [65,  2, 13, 69],
       [50, 29, 81, 74],
       [15, 89, 72, 74]])

In [3]:
np.sort(a)[::-1]

array([98, 88, 87, 78, 76, 75, 72, 68, 48, 32, 27, 21, 21, 14,  6])

In [5]:
print(np.sort(b,axis=0))    # Column wise sorting
print()
print(np.sort(b))           # Row wise sorting (default)

[[15  2  1 44]
 [50 29 13 44]
 [56 40 17 69]
 [65 84 47 70]
 [69 89 72 74]
 [82 97 81 74]]

[[40 44 47 82]
 [ 1 56 70 84]
 [17 44 69 97]
 [ 2 13 65 69]
 [29 50 74 81]
 [15 72 74 89]]


### np.append(iterable, value)

The numpy.append() appends values along the mentioned axis at the end of the array

https://numpy.org/doc/stable/reference/generated/numpy.append.html

In [6]:
# code
np.append(a,200)

array([ 48,  32,  21,  98,  75,  87,  14,  21,  27,  78,   6,  72,  76,
        68,  88, 200])

In [14]:
b

array([[82, 40, 47, 44],
       [56, 84,  1, 70],
       [69, 97, 17, 44],
       [65,  2, 13, 69],
       [50, 29, 81, 74],
       [15, 89, 72, 74]])

In [15]:
b.shape[0]

6

In [18]:
np.append(b,np.random.random((b.shape[0],1)),axis=1)
# If no axis provided, converts to 1D

array([[82.        , 40.        , 47.        , 44.        ,  0.19782687],
       [56.        , 84.        ,  1.        , 70.        ,  0.67991572],
       [69.        , 97.        , 17.        , 44.        ,  0.97997791],
       [65.        ,  2.        , 13.        , 69.        ,  0.89375576],
       [50.        , 29.        , 81.        , 74.        ,  0.27759582],
       [15.        , 89.        , 72.        , 74.        ,  0.47333421]])

### np.concatenate(iterable(s), axis)

numpy.concatenate() function concatenate a sequence of arrays along an existing axis.

https://numpy.org/doc/stable/reference/generated/numpy.concatenate.html

In [19]:
# code
c = np.arange(6).reshape(2,3)
d = np.arange(6,12).reshape(2,3)

print(c)
print(d)

[[0 1 2]
 [3 4 5]]
[[ 6  7  8]
 [ 9 10 11]]


In [21]:
print(np.concatenate((c,d)))               # Similar to vstack
print(np.concatenate((c,d),axis=1))        # Similar to hstack

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[[ 0  1  2  6  7  8]
 [ 3  4  5  9 10 11]]


### np.unique(iterable)

With the help of np.unique() method, we can get the unique values from an array given as parameter in np.unique() method.

https://numpy.org/doc/stable/reference/generated/numpy.unique.html/

In [22]:
# code
e = np.array([1,1,2,2,7,7,7,3,3,4,4,5,5,6,6])

In [23]:
np.unique(e)

array([1, 2, 3, 4, 5, 6, 7])

### np.expand_dims()

With the help of Numpy.expand_dims() method, we can get the expanded dimensions of an array

https://numpy.org/doc/stable/reference/generated/numpy.expand_dims.html


- ML/Deep Learning: Row vector, column vector. As ML/DL work in batches

In [24]:
# code
a.shape

(15,)

In [25]:
np.expand_dims(a,axis=0).shape

(1, 15)

In [26]:
np.expand_dims(a,axis=1)

array([[48],
       [32],
       [21],
       [98],
       [75],
       [87],
       [14],
       [21],
       [27],
       [78],
       [ 6],
       [72],
       [76],
       [68],
       [88]])

### np.where(condition, True, False)

The numpy.where() function returns the <u>indices</u> of elements in an input array where the given condition is satisfied.

https://numpy.org/doc/stable/reference/generated/numpy.where.html

In [27]:
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [28]:
# find all *indices* with value greater than 50
np.where(a>50)

(array([ 3,  4,  5,  9, 11, 12, 13, 14]),)

In [29]:
# replace all values > 50 with 0
np.where(a>50,0,a)

array([48, 32, 21,  0,  0,  0, 14, 21, 27,  0,  6,  0,  0,  0,  0])

In [30]:
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [31]:
np.where(a%2 == 0,0,a)

array([ 0,  0, 21,  0, 75, 87,  0, 21, 27,  0,  0,  0,  0,  0,  0])

### np.argmax()

The numpy.argmax() function returns <u>indices of the max element of the array</u> in a particular axis.

https://numpy.org/doc/stable/reference/generated/numpy.argmax.html

In [32]:
# code
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [33]:
np.argmax(a)

np.int64(3)

In [34]:
b

array([[82, 40, 47, 44],
       [56, 84,  1, 70],
       [69, 97, 17, 44],
       [65,  2, 13, 69],
       [50, 29, 81, 74],
       [15, 89, 72, 74]])

In [35]:
np.argmax(b,axis=0)

array([0, 2, 4, 4])

In [36]:
np.argmax(b,axis=1)

array([0, 1, 1, 3, 2, 1])

### np.argmin()

In [37]:
# np.argmin
np.argmin(a)

np.int64(10)

### np.cumsum()

numpy.cumsum() function is used when we want to compute the <u>cumulative sum of array elements</u> over a given axis.

https://numpy.org/doc/stable/reference/generated/numpy.cumsum.html

In [38]:
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [39]:
np.cumsum(a)

array([ 48,  80, 101, 199, 274, 361, 375, 396, 423, 501, 507, 579, 655,
       723, 811])

In [40]:
b

array([[82, 40, 47, 44],
       [56, 84,  1, 70],
       [69, 97, 17, 44],
       [65,  2, 13, 69],
       [50, 29, 81, 74],
       [15, 89, 72, 74]])

In [41]:
print(np.cumsum(b,axis=1))         # If no axis -> 1D array
print(np.cumsum(b,axis=0))         # If no axis -> 1D array

[[ 82 122 169 213]
 [ 56 140 141 211]
 [ 69 166 183 227]
 [ 65  67  80 149]
 [ 50  79 160 234]
 [ 15 104 176 250]]
[[ 82  40  47  44]
 [138 124  48 114]
 [207 221  65 158]
 [272 223  78 227]
 [322 252 159 301]
 [337 341 231 375]]


In [42]:
np.cumsum(b)

array([  82,  122,  169,  213,  269,  353,  354,  424,  493,  590,  607,
        651,  716,  718,  731,  800,  850,  879,  960, 1034, 1049, 1138,
       1210, 1284])

### np.cumprod()

In [43]:
# np.cumprod
np.cumprod(a)

array([                  48,                 1536,                32256,
                    3161088,            237081600,          20626099200,
               288765388800,        6064073164800,      163729975449600,
          12770938085068800,    76625628510412800,  5517045252749721600,
       -4979674486340845568, -6576471744405569536, -6880447222694019072])

In [44]:
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

### np.percentile()

numpy.percentile()function used to compute the nth percentile of the given data (array elements) along the specified axis.

https://numpy.org/doc/stable/reference/generated/numpy.percentile.html

In [45]:
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [49]:
print(np.percentile(a,100))         # Max
print(np.percentile(a,50))          # Median
print(np.percentile(a,0))           # Min

98.0
68.0
6.0


In [50]:
np.median(a)

np.float64(68.0)

### np.histogram()

Numpy has a built-in numpy.histogram() function which <u>represents the frequency of data distribution</u> in the graphical form.

https://numpy.org/doc/stable/reference/generated/numpy.histogram.html

In [51]:
# code
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [52]:
print(np.histogram(a,bins=[0,50,100]))
print(np.histogram(a,bins=[0,10, 20, 30, 40, 50, 60, 70, 80, 90, 100]))

(array([7, 8]), array([  0,  50, 100]))
(array([1, 1, 3, 1, 1, 0, 1, 4, 2, 1]), array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100]))


### np.corrcoef()

Return Pearson product-moment correlation coefficients.

https://numpy.org/doc/stable/reference/generated/numpy.corrcoef.html

In [53]:
salary = np.array([20000,40000,25000,35000,60000])
experience = np.array([1,3,2,4,2])

print(np.corrcoef(salary,experience))

[[1.         0.25344572]
 [0.25344572 1.        ]]


### np.isin

With the help of numpy.isin() method, we can see that one array having <u>values</u> are checked in a different numpy array having different elements with different sizes.

https://numpy.org/doc/stable/reference/generated/numpy.isin.html

In [54]:
# code
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [56]:
items = [10,20,30,40,50,60,75,80,90,100]

a[np.isin(a,items)]

array([75])

### np.flip

The numpy.flip() function reverses the order of array elements along the specified axis, preserving the shape of the array.

https://numpy.org/doc/stable/reference/generated/numpy.flip.html

In [57]:
# code
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [58]:
np.flip(a)

array([88, 68, 76, 72,  6, 78, 27, 21, 14, 87, 75, 98, 21, 32, 48])

In [59]:
b

array([[82, 40, 47, 44],
       [56, 84,  1, 70],
       [69, 97, 17, 44],
       [65,  2, 13, 69],
       [50, 29, 81, 74],
       [15, 89, 72, 74]])

In [60]:
print(np.flip(b,axis=1))
print(np.flip(b,axis=0))
print(np.flip(b))

[[44 47 40 82]
 [70  1 84 56]
 [44 17 97 69]
 [69 13  2 65]
 [74 81 29 50]
 [74 72 89 15]]
[[15 89 72 74]
 [50 29 81 74]
 [65  2 13 69]
 [69 97 17 44]
 [56 84  1 70]
 [82 40 47 44]]
[[74 72 89 15]
 [74 81 29 50]
 [69 13  2 65]
 [44 17 97 69]
 [70  1 84 56]
 [44 47 40 82]]


### np.put

The numpy.put() function <u>replaces specific elements</u> of an array with given values of p_array. Array indexed works on flattened array.

https://numpy.org/doc/stable/reference/generated/numpy.put.html

In [61]:
# code
a

array([48, 32, 21, 98, 75, 87, 14, 21, 27, 78,  6, 72, 76, 68, 88])

In [62]:
np.put(a,[0,1],[110,530])               # Permanant changes -> No direct output
a

array([110, 530,  21,  98,  75,  87,  14,  21,  27,  78,   6,  72,  76,
        68,  88])

### np.delete

The numpy.delete() function returns a new array with the deletion of sub-arrays along with the mentioned axis.

https://numpy.org/doc/stable/reference/generated/numpy.delete.html

In [63]:
# code
a

array([110, 530,  21,  98,  75,  87,  14,  21,  27,  78,   6,  72,  76,
        68,  88])

In [64]:
np.delete(a,[0,2,4])

array([530,  98,  87,  14,  21,  27,  78,   6,  72,  76,  68,  88])

- `np.union1d(ar1, ar2)`: Returns the **sorted, unique union** of elements from both input arrays.
- `np.intersect1d(ar1, ar2)`: Returns the **sorted, unique intersection** of elements present in both arrays.
- `np.setdiff1d(ar1, ar2)`: Returns the **sorted, unique elements** in `ar1` that are **not in** `ar2`.
- `np.setxor1d(ar1, ar2)`: Returns the **sorted, unique elements** that are in **either** of the arrays but **not in both**.
- `np.in1d(ar1, ar2)`: Returns a **boolean array** indicating whether each element of `ar1` is **present in** `ar2`.


In [65]:
m = np.array([1,2,3,4,5])
n = np.array([3,4,5,6,7])

np.union1d(m,n)

array([1, 2, 3, 4, 5, 6, 7])

In [66]:
np.intersect1d(m,n)

array([3, 4, 5])

In [67]:
print(np.setdiff1d(n,m))
print(np.setdiff1d(m,n))

[6 7]
[1 2]


In [68]:
np.setxor1d(m,n)

array([1, 2, 6, 7])

In [69]:
m[np.in1d(m,1)]

  m[np.in1d(m,1)]


array([1])

### np.clip

numpy.clip() function is used to Clip (limit)/(range bound) the values in an array.

https://numpy.org/doc/stable/reference/generated/numpy.clip.html

In [70]:
# code
a

array([110, 530,  21,  98,  75,  87,  14,  21,  27,  78,   6,  72,  76,
        68,  88])

In [72]:
np.clip(a,a_min=25,a_max=85)

array([85, 85, 25, 85, 75, 85, 25, 25, 27, 78, 25, 72, 76, 68, 85])

In [73]:
# 17. np.swapaxes

In [77]:
b

array([[82, 40, 47, 44],
       [56, 84,  1, 70],
       [69, 97, 17, 44],
       [65,  2, 13, 69],
       [50, 29, 81, 74],
       [15, 89, 72, 74]])

In [76]:
np.swapaxes(b, 1, 0)

array([[82, 56, 69, 65, 50, 15],
       [40, 84, 97,  2, 29, 89],
       [47,  1, 17, 13, 81, 72],
       [44, 70, 44, 69, 74, 74]])

In [78]:
# 18. np.uniform

In [79]:
# 19. np.count_nonzero

In [80]:
np.count_nonzero(b)

24

`np.tile(A, reps)`
Construct an array by repeating A the number of times given by reps.



In [81]:
# 21. np.tile
# https://www.kaggle.com/code/abhayparashar31/best-numpy-functions-for-data-science-50?scriptVersionId=98816580

In [85]:
np.tile(3, 4)
# np.tile(3, 12).shape(4, 3)
np.tile(3, (4, 3))

array([[3, 3, 3],
       [3, 3, 3],
       [3, 3, 3],
       [3, 3, 3]])

In [87]:
np.tile(a, 3)

array([110, 530,  21,  98,  75,  87,  14,  21,  27,  78,   6,  72,  76,
        68,  88, 110, 530,  21,  98,  75,  87,  14,  21,  27,  78,   6,
        72,  76,  68,  88, 110, 530,  21,  98,  75,  87,  14,  21,  27,
        78,   6,  72,  76,  68,  88])

In [None]:
# 22. np.repeat
# https://towardsdatascience.com/10-numpy-functions-you-should-know-1dc4863764c5

In [86]:
np.repeat(a, 3)

array([110, 110, 110, 530, 530, 530,  21,  21,  21,  98,  98,  98,  75,
        75,  75,  87,  87,  87,  14,  14,  14,  21,  21,  21,  27,  27,
        27,  78,  78,  78,   6,   6,   6,  72,  72,  72,  76,  76,  76,
        68,  68,  68,  88,  88,  88])

In [88]:
# 25. np.allclose and equals

In [92]:
array1 = np.array([1.0, 2.0, 3.0])
array2 = np.array([1.00000001, 2.00000002, 3.00000003])

# These arrays are considered close with default tolerances
print(np.allclose(array1, array2))
print(np.equal(array1, array2))

array3 = np.array([1.0, 2.0, 3.0])
array4 = np.array([1.1, 2.0, 3.0])

# These arrays are not considered close with default tolerances
print(np.allclose(array3, array4))
print(np.equal(array3, array4))

# Example with NaN values
array5 = np.array([1.0, np.nan])
array6 = np.array([1.0, np.nan])

# False by default because equal_nan is False
print(np.allclose(array5, array6))
print(np.equal(array5, array6))

# True when equal_nan is set to True
print(np.allclose(array5, array6, equal_nan=True))
print(np.equal(array5, array6))

True
[False False False]
False
[False  True  True]
False
[ True False]
True
[ True False]
