In [1]:
import numpy as np

# A. Filtering data
#### sometimes we have data that contains values we don't want to use, then we filter the overall data for only the values that we want

In [2]:
arr = np.array([[0,2,3],
                [1,3,-6],
                [-3,-2,1]])
print(repr(arr == 3))
print(repr(arr > 0))
print(repr(arr != 1))
# negated from the previous step
print(repr(~(arr == 3)))

array([[False, False,  True],
       [False,  True, False],
       [False, False, False]])
array([[False,  True,  True],
       [ True,  True, False],
       [False, False,  True]])
array([[ True,  True,  True],
       [False,  True,  True],
       [ True,  True, False]])
array([[ True,  True, False],
       [ True, False,  True],
       [ True,  True,  True]])


#### (np.nan) can't be used with any relation operation. Instead we use (np.isnan) to determine which locations of the array contain (np.nan)

In [3]:
arr = np.array([[0,2,np.nan],
                [1,np.nan,-6],
                [np.nan,-2,1]])
print(repr(np.isnan(arr)))

array([[False, False,  True],
       [False,  True, False],
       [ True, False, False]])


# B. Filtering in NumPy
#### (np.where) is used to find out our desired element position

In [5]:
print(repr(np.where([True, False, True,False , False, True]))) # it prints the index wherever be True
arr = np.array([0,3,5,3,1])
print(repr(np.where(arr == 3)))

arr = np.array([[0,2,3],
                [1,0,0],
                [-3,0,0]])
x_ind, y_ind = np.where(arr != 0)
print(repr(x_ind)) # x indices of non-zero elements 
print(repr(y_ind)) # y indices of non-zero elements 
print(repr(arr[x_ind, y_ind]))

(array([0, 2, 5], dtype=int64),)
(array([1, 3], dtype=int64),)
array([0, 0, 1, 2], dtype=int64)
array([1, 2, 0, 0], dtype=int64)
array([ 2,  3,  1, -3])


##### The interesting thing about (np.where) is that it must be applied with exactly 1 or 3 arguments. When we use 3 arguments, the first argument is still the boolean array. However, the next two arguments represent the (True) replacement values and the (False) replacement values, respectively

In [2]:
np_filter = np.array([[True,False],[False,True]])
positives = np.array([[1,2],[3,4]])
negatives = np.array([[-2,-5],[-1,-8]])
print(repr(np.where(np_filter, positives, negatives)))
# in the above line positives takes (True) values and negatives takes (False) values because it comes at 2nd and 3rd place respectively
# so from positives 1 and 4 comes for position (0,0) and (1,1) respectively and from negatives -5 and -1 comes for position (0,1) and (1,0) respectively

np_filter = positives > 2 # definition of np_filter is changed now it only includes the values from positives which is greater than 2
print(repr(np.where(np_filter, positives, negatives)))

np_filter = positives > 0 # agian the definition of the np_filter is changed and now it can take the values which is greater than 0 from positives
print(repr(np.where(np_filter, positives, negatives)))

array([[ 1, -5],
       [-1,  4]])
array([[-2, -5],
       [ 3,  4]])
array([[1, 2],
       [3, 4]])


#### Note that our second and thirs arguments necessarily had the same shape as the first argument. However, if we wanted to use a constant replacement value e.g. -1, we could incorporate broadcasting. Rather than using an entire arraqy of the same value, we can just use the value itself as an argument.

In [3]:
np_filter = np.array([[True,False],[False,True]])
positives = np.array([[1,2],[3,4]])
print(repr(np.where(np_filter,positives,-1)))

array([[ 1, -1],
       [-1,  4]])


# C. Axis-wise filtering

In [5]:
arr = np.array([[-2,-1,-3],[4,5,-6],[3,9,1]])
print(repr(arr>0)) # it prints an array of (True) and (False) in which true indicates the position of positive value and false indicates the position of negative value
print(np.any(arr>0)) # if there is any value greater than 0 then it prints True --> OR(||) function
print(np.all(arr>0)) # if all the values in the array are greater than 0 then it prints True otherwise False --> AND(&&) function

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])
True
True


##### The below code is only for making understanding about axis or to get more clarity

In [6]:
arr = np.array([[[5,8,2], [1,9,4]],[[3,7,6],[0,12,10]]])
print(arr)
print(np.argmin(arr,axis=0)) # z axis each row in z-direction
print(np.argmin(arr,axis=1)) # y axis each column
print(np.argmin(arr,axis=2)) # x axis each row

[[[ 5  8  2]
  [ 1  9  4]]

 [[ 3  7  6]
  [ 0 12 10]]]
[[1 1 0]
 [1 0 0]]
[[1 0 0]
 [1 0 0]]
[[2 0]
 [0 0]]


In [7]:
arr = np.array([[-2,-1,-3],[4,5,-6],[3,9,1]])
print(repr(arr>0))
print(repr(np.any(arr>0, axis=0))) # check if any > 0 for each column
print(repr(np.any(arr>0, axis=1))) # check if any > 1 for each row
print(repr(np.all(arr>0, axis=1))) # check if all > 0 for each row

array([[False, False, False],
       [ True,  True, False],
       [ True,  True,  True]])
array([ True,  True,  True])
array([False,  True,  True])
array([False, False,  True])


##### We use (np.any) to obtain a boolean array representing the rows that have at least one positive number. We then use the boolean array as the input to (np.where), which gives us the actual indices of the rows with at least one positive number.

In [4]:
arr = np.array([[-2,-1,-3],
                [4,5,-6],
                [3,9,1]])
has_positive = np.any(arr > 0, axis = 1) # it check in each row that there is any positive or not 
print(has_positive)
print(repr(arr[np.where(has_positive)])) # it prints only those rows or columns where the definition of has positive is (True)

[False  True  True]
array([[ 4,  5, -6],
       [ 3,  9,  1]])
