In [None]:
import pandas as pd

parks_df = pd.read_parquet("../../data/nps/nps_public_data_parks.parquet")
parks_df.head()

We've already got pretty fancy with our pandas filtering, so here are a few more helpful concepts for you to filter data.

The isin() method filters rows based on a list of values. You can also use string methods like `str.contains()` for filtering strings, similar to `LIKE` in SQL. Here's an example using both that we're familiar with:

In [None]:
national_parks_df = parks_df[parks_df["designation"].str.contains("National Park")]

national_parks_df["states_list"] = parks_df["states"].str.split(",")

states_exploded = national_parks_df.explode("states_list")

states_exploded[
    states_exploded["states_list"].isin(["CA", "NV", "AZ", "UT", "CO", "NM"])
][["name", "states_list"]]

We can also use the isnull() and notnull() methods to filter rows based on null (NaN) values in a column.

In [None]:
import pandas as pd

alerts_df = pd.read_parquet("../../data/nps/nps_public_data_alerts.parquet")

alerts_df["alert_date"] = pd.to_datetime(alerts_df["lastIndexedDate"]).dt.date

In [None]:
num_alerts = (
    alerts_df.groupby(["alert_date", "category"])["description"].count().reset_index()
)

num_alerts.rename(columns={"description": "num_alerts"}, inplace=True)

num_alerts.head()

But empty strings are _not_ considered null by pandas, so we'll have to first convert them!

In [None]:
import numpy as np

num_alerts["category"].replace("", np.nan, inplace=True)

num_alerts[num_alerts["category"].notnull()]