# Filtering arrays

## Two ways to filter 
- Masks and fancy indexing
- np.where()

## Boolean masks 

In [8]:
import numpy as np 
one_to_five = np.arange(1,6)
one_to_five

array([1, 2, 3, 4, 5])

In [9]:
mask = one_to_five % 2 == 0
mask

array([False,  True, False,  True, False])

- Filtering with fancy indexing 

In [10]:
one_to_five[mask]

array([2, 4])

In [6]:
classroom_ids_and_sizes = np.array([[1, 22], [2, 21], [3, 27], [4, 26]])
classroom_ids_and_sizes

array([[ 1, 22],
       [ 2, 21],
       [ 3, 27],
       [ 4, 26]])

-  Let's say we are assigning partners in a school, and we want to know which class ids have an even number of students. Class ids are in the left column and class sizes are in the right

In [11]:
classroom_ids_and_sizes[:,1] % 2 == 0

array([ True, False, False,  True])

In [12]:
classroom_ids_and_sizes[:,0][classroom_ids_and_sizes[:,1] % 2 == 0]

array([1, 4])

In [13]:
np.where(classroom_ids_and_sizes[:,1] % 2 == 0)

(array([0, 3]),)

- Using np.where() in the classroom example returns indices indicating

sudoku_game = np.array([[0, 0, 4, 3, 0, 0, 2, 0, 9],
[0, 0, 5, 0, 0, 9, 0, 0, 1],
[0, 7, 0, 0, 6, 0, 0, 4, 3],
[0, 0, 6, 0, 0, 2, 0, 8, 7],
[1, 9, 0, 0, 0, 7, 4, 0, 0],
[0, 5, 0, 0, 8, 3, 0, 0, 0],
[6, 0, 0, 0, 0, 0, 1, 0, 5],
[0, 0, 3, 5, 0, 8, 6, 9, 0],
[0, 4, 2, 9, 1, 0, 3, 0, 0]])

In [23]:
row_ind , column_ind = np.where(sudoku_game == 0)
row_ind , column_ind 

(array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4,
        4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8,
        8, 8]),
 array([0, 1, 4, 5, 7, 0, 1, 3, 4, 6, 7, 0, 2, 3, 5, 6, 0, 1, 3, 4, 6, 2,
        3, 4, 7, 8, 0, 2, 3, 6, 7, 8, 1, 2, 3, 4, 5, 7, 0, 1, 4, 8, 0, 5,
        7, 8]))

## Find and replace

In [24]:
np.where(sudoku_game==0 , "", sudoku_game)

array([['', '', '4', '3', '', '', '2', '', '9'],
       ['', '', '5', '', '', '9', '', '', '1'],
       ['', '7', '', '', '6', '', '', '4', '3'],
       ['', '', '6', '', '', '2', '', '8', '7'],
       ['1', '9', '', '', '', '7', '4', '', ''],
       ['', '5', '', '', '8', '3', '', '', ''],
       ['6', '', '', '', '', '', '1', '', '5'],
       ['', '', '3', '5', '', '8', '6', '9', ''],
       ['', '4', '2', '9', '1', '', '3', '', '']], dtype='<U21')

# Adding and removing data

- Concatenating rows

In [25]:
classroom_ids_and_sizes = np.array([[1, 22], [2, 21], [3, 27], [4, 26]]) 
new_classrooms = np.array([[5,30],[5,17]])
np.concatenate((classroom_ids_and_sizes,new_classrooms))

array([[ 1, 22],
       [ 2, 21],
       [ 3, 27],
       [ 4, 26],
       [ 5, 30],
       [ 5, 17]])

- Concatenating columns 

In [30]:
classroom_ids_and_sizes = np.array([[1, 22], [2, 21], [3, 27], [4, 26]]) 
grade_levels_and_teachers = np.array([[1, "James"], [1, "George"], [3, "Amy"],[3, "Meehir"]])
classroom_data = np.concatenate((classroom_ids_and_sizes, grade_levels_and_teachers), axis=1)
classroom_data

array([['1', '22', '1', 'James'],
       ['2', '21', '1', 'George'],
       ['3', '27', '3', 'Amy'],
       ['4', '26', '3', 'Meehir']], dtype='<U21')

# Deleting with np.delete()

In [31]:
classroom_data

array([['1', '22', '1', 'James'],
       ['2', '21', '1', 'George'],
       ['3', '27', '3', 'Amy'],
       ['4', '26', '3', 'Meehir']], dtype='<U21')

- if you need to delete the row in the index = 1 

In [32]:
np.delete(classroom_data,1,axis=0)

array([['1', '22', '1', 'James'],
       ['3', '27', '3', 'Amy'],
       ['4', '26', '3', 'Meehir']], dtype='<U21')

- if you need to delete the column in the index = 1 

In [33]:
np.delete(classroom_data,1,axis=1)

array([['1', '1', 'James'],
       ['2', '1', 'George'],
       ['3', '3', 'Amy'],
       ['4', '3', 'Meehir']], dtype='<U21')

- If no axis is specified, NumPy deletes the indicated index or indices along a flattened version of the array.

In [34]:
np.delete(classroom_data,1)

array(['1', '1', 'James', '2', '21', '1', 'George', '3', '27', '3', 'Amy',
       '4', '26', '3', 'Meehir'], dtype='<U21')