# Python Pandas Notes

## Sorting

In [None]:
# Decreasing order
coffee.sort_values("Units Sold", ascending=False)

# Multiple columns
coffee.sort_values(["Day"], ascending=[0])

## Filtering Data

In [None]:
bios.info()

# Convert height & weight to float if needed
# Filtering using .loc
bios.loc[bios["height_cm"] > 215, ["name", "height_cm"]]

# Multiple conditions
bios[(bios["height_cm"] > 215) & (bios["born_country"] == "USA")]

# String operations
bios[bios["name"].str.contains("Keith", case=True)]

# Using .query()
bios.query("born_country == 'USA' and biosnicity == 'White'")

## Adding & Removing Columns

In [None]:
coffee["price"] = 4.99

# Using numpy for conditions
import numpy as np
coffee["new_price"] = np.where(coffee["coffee_type"] == "Espresso", 3.99, 5.99)

# Drop rows/columns
coffee.drop(0)  # Removes 0th index row temporarily
coffee.drop(columns=["price"])  # Removes price column temporarily
coffee.drop(columns=["price"], inplace=True)  # Removes price column permanently

In [None]:
# Selecting multiple columns
coffee = coffee[["Day", "coffee_type"]]

# New column based on calculations
coffee["revenue"] = coffee["Units Sold"] * coffee["new_price"]
coffee.head()

# Renaming columns
coffee.rename(columns={"new_price": "Price"}, inplace=True)
coffee

## String Operations

In [None]:
bios_new = bios.copy()

# Extracting first name
bios_new["first_name"] = bios_new["name"].str.split(" ").str[0]

# Query with string filter
bios_new.query("first_name == 'Keith'")

bios.head()

## DateTime Operations

In [None]:
bios_new["born_datetime"] = pd.to_datetime(bios_new["born_date"])
bios_new["born_year"] = bios_new["born_datetime"].dt.year

bios_new[["name", "born_year"]]

## Saving DataFrame

In [None]:
bios_new.to_csv("data/bios-new.csv", index=False)

## Custom Columns using Lambda and Apply

In [None]:
bios["height_category"] = bios["height_cm"].apply(
    lambda x: "Short" if x < 165 else ("Average" if x < 185 else "Tall")
)

In [None]:
def categorize_athlete(row):
    if row["height_cm"] > 195 and row["weight_kg"] < 75:
        return "Lightweight"
    elif row["height_cm"] < 185 or row["weight_kg"] <= 80:
        return "Midweight"
    else:
        return "Heavyweight"

bios["category"] = bios.apply(categorize_athlete, axis=1)
bios.head()

## Merging and Concatenating Data