[Reference](https://towardsdatascience.com/5-string-based-filtering-methods-every-pandas-user-should-know-48021938412e)

In [16]:
import pandas as pd
import numpy as np

In [6]:
data = pd.DataFrame([["A", 1], ["A", 2], ["B", 1], 
                     ["C", 4], ["A", 10], ["B", 7]], 
                     columns = ["col1", "col2"])

print("Data type of col1 is: ", data.col1.dtype)
print("Data type of col2 is: ", data.col2.dtype)

Data type of col1 is:  object
Data type of col2 is:  int64


In [7]:
data = pd.DataFrame([["A", "1"], ["A", 2], ["B", 1], 
                     ["C", 4], ["A", 10], ["B", 7]], 
                     columns = ["col1", "col2"])

print("Data type of col1 is: ", data.col1.dtype)
print("Data type of col2 is: ", data.col2.dtype)

Data type of col1 is:  object
Data type of col2 is:  object


# String-based Filtering Methods


In [8]:
data = pd.DataFrame([["A", 1], ["A", 2], ["B", 1], 
                     ["C", 4], ["A", 10], ["B", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1 == "A"]
print(df_filtered)

  col1  col2
0    A     1
1    A     2
4    A    10


In [9]:
data = pd.DataFrame([["A", 1], ["A", 2], ["B", 1], 
                     ["C", 4], ["A", 10], ["B", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data.query("col1 == 'A'")
print(df_filtered)

  col1  col2
0    A     1
1    A     2
4    A    10


In [10]:
data = pd.DataFrame([["A", 1], ["A", 2], ["B", 1], 
                     ["C", 4], ["A", 10], ["B", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[(data.col1 == "A") | (data.col1 == "B")]
print(df_filtered)

  col1  col2
0    A     1
1    A     2
2    B     1
4    A    10
5    B     7


In [11]:
data = pd.DataFrame([["A", 1], ["A", 2], ["B", 1], 
                     ["C", 4], ["A", 10], ["B", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.isin(["A", "B"])]
print(df_filtered)

  col1  col2
0    A     1
1    A     2
2    B     1
4    A    10
5    B     7


In [12]:
data = pd.DataFrame([["A", 1], ["A", 2], ["B", 1], 
                     ["C", 4], ["A", 10], ["B", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data.query("col1 == 'A' | col1 == 'B'")
print(df_filtered)

  col1  col2
0    A     1
1    A     2
2    B     1
4    A    10
5    B     7


In [13]:
data = pd.DataFrame([["Drake", 1], ["John", 2], ["Ben", 1], 
                     ["Charlie", 4], ["Andrew", 10], ["Rachel", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.len() > 4]
print(df_filtered)

      col1  col2
0    Drake     1
3  Charlie     4
4   Andrew    10
5   Rachel     7


In [14]:
data = pd.DataFrame([["Drake", 1], ["John", 2], ["Ben", 1], 
                     ["Joy", 4], ["Joseph", 10], ["Rachel", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.startswith("Jo")]
print(df_filtered)

     col1  col2
1    John     2
3     Joy     4
4  Joseph    10


In [19]:
data = pd.DataFrame([["Drake", 1], ["John", 2], ["Ben", 1], 
                     ["Joy", 4], ["Joseph", 10], ["Branden", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.startswith("Jo")]
print(df_filtered)

     col1  col2
1    John     2
3     Joy     4
4  Joseph    10


In [20]:
data = pd.DataFrame([["Drake", 1], ["John", 2], ["Ben", 1], 
                     ["Joy", 4], ["Joseph", 10], [np.nan, 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.startswith("Jo", na = False)]
print(df_filtered)

     col1  col2
1    John     2
3     Joy     4
4  Joseph    10


In [21]:
data = pd.DataFrame([["Drake", 1], ["John", 2], ["Ben", 1], 
                     ["Joy", 4], ["Joseph", 10], ["Rachel", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.endswith("n")]
print(df_filtered)

   col1  col2
1  John     2
2   Ben     1


In [22]:
data = pd.DataFrame([["Drake", 1], ["John", 2], ["Ben", 1], 
                     ["Joy", 4], ["Joseph", 10], ["Rachel", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.endswith("n")]
print(df_filtered)

   col1  col2
1  John     2
2   Ben     1


In [23]:
data = pd.DataFrame([["Cake", 1], ["Shake", 2], ["Lake", 1], 
                     ["Pizza", 4], ["Chocolate", 10], ["Bake", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.contains("ak")]
print(df_filtered)

    col1  col2
0   Cake     1
1  Shake     2
2   Lake     1
5   Bake     7


In [24]:
data = pd.DataFrame([["CaKe", 1], ["Shake", 2], ["LAKE", 1], 
                     ["Pizza", 4], ["Chocolate", 10], ["BAke", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.contains("ak", case = False)]
print(df_filtered)

    col1  col2
0   CaKe     1
1  Shake     2
2   LAKE     1
5   BAke     7


In [25]:
data = pd.DataFrame([["123", 1], ["Shake", 2], ["42", 1], 
                     ["Pizza", 4], ["273.19", 10], ["Bake121", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.isalnum()]
print(df_filtered)

      col1  col2
0      123     1
1    Shake     2
2       42     1
3    Pizza     4
5  Bake121     7


In [26]:
data = pd.DataFrame([["123", 1], ["shake", 2], ["42", 1], 
                     ["PIZZA", 4], ["273.19", 10], ["Bake121", 7]], 
                     columns = ["col1", "col2"])

df_filtered = data[data.col1.str.isnumeric()]
print(df_filtered)

  col1  col2
0  123     1
2   42     1
