[Reference](https://medium.com/@akaivdo/how-to-select-rows-containing-specified-string-7cbba8ffcac4)

# Preparing data


In [1]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Kevin", "Jack", "Mary", "Bob", "Robert", "Amy"],
    "score": [80, 90, 95, 93, 88, 81],
    "class": ["A", "B", "A", "A", "B", "B"],
    "Interests": ["Writing, Sports", "Sports, Traveling", "Violin, Reading", "Reading, Blogging","Learning, Reading, Writing","Volunteering, Piano"]
}, index=["K", "J", "M", "B", "R", "A"])

df

Unnamed: 0,name,score,class,Interests
K,Kevin,80,A,"Writing, Sports"
J,Jack,90,B,"Sports, Traveling"
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"
R,Robert,88,B,"Learning, Reading, Writing"
A,Amy,81,B,"Volunteering, Piano"


# How to select rows from DataFrame


In [2]:
df[df["class"] == 'A']

Unnamed: 0,name,score,class,Interests
K,Kevin,80,A,"Writing, Sports"
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"


In [3]:
df[df["score"] > 80]

Unnamed: 0,name,score,class,Interests
J,Jack,90,B,"Sports, Traveling"
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"
R,Robert,88,B,"Learning, Reading, Writing"
A,Amy,81,B,"Volunteering, Piano"


In [4]:
df[(df["class"] == 'A') & (df["score"] > 80)]

Unnamed: 0,name,score,class,Interests
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"


In [5]:
df[df["Interests"] == "Reading, Blogging"]

Unnamed: 0,name,score,class,Interests
B,Bob,93,A,"Reading, Blogging"


# Select rows with those columns contain specified string (str.contains())

In [6]:
df[df["Interests"].str.contains("Reading")]

Unnamed: 0,name,score,class,Interests
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"
R,Robert,88,B,"Learning, Reading, Writing"


In [7]:
import numpy as np
new_row = pd.DataFrame({"name": "Tom", "score": 85, "class": 'C', "Interests": np.nan}, index=["T"])
df2 = pd.concat([df, new_row], axis=0)
df2

Unnamed: 0,name,score,class,Interests
K,Kevin,80,A,"Writing, Sports"
J,Jack,90,B,"Sports, Traveling"
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"
R,Robert,88,B,"Learning, Reading, Writing"
A,Amy,81,B,"Volunteering, Piano"
T,Tom,85,C,


In [8]:
df2[df2["Interests"].str.contains("Reading")]

ValueError: ignored

In [9]:
df2["Interests"] = df2["Interests"].fillna("")
df2[df2["Interests"].str.contains("Reading")]

Unnamed: 0,name,score,class,Interests
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"
R,Robert,88,B,"Learning, Reading, Writing"


In [10]:
df[df["Interests"].str.contains("reading", case=False)]

Unnamed: 0,name,score,class,Interests
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"
R,Robert,88,B,"Learning, Reading, Writing"


In [11]:
df[df["Interests"].str.contains(".*V.*")]

Unnamed: 0,name,score,class,Interests
M,Mary,95,A,"Violin, Reading"
A,Amy,81,B,"Volunteering, Piano"


In [12]:
df[df["Interests"].str.contains(".*V.*", regex=False)]

Unnamed: 0,name,score,class,Interests


In [13]:
df[df["Interests"].str.startswith("Learning")]

Unnamed: 0,name,score,class,Interests
R,Robert,88,B,"Learning, Reading, Writing"


In [14]:
df[df["Interests"].str.endswith("ing")]

Unnamed: 0,name,score,class,Interests
J,Jack,90,B,"Sports, Traveling"
M,Mary,95,A,"Violin, Reading"
B,Bob,93,A,"Reading, Blogging"
R,Robert,88,B,"Learning, Reading, Writing"


In [15]:
df[df["Interests"].str.match("[^,]+,[^,]+,[^,]+")]

Unnamed: 0,name,score,class,Interests
R,Robert,88,B,"Learning, Reading, Writing"
