# DataFrame: Manipulation

In [None]:
import datetime as dt

import pandas as pd

In [None]:
def generate_data():
    """Return some dummy data."""
    return pd.DataFrame({
        "id": ["a", "b", "c", "d"],
        "score": [4, 6, 9, 8],
    })

scores = generate_data()
scores

## Creating New Columns

In [None]:
# Assign in-place using brackets [...].
# Note: Changes the original DataFrame!
scores["passed"] = scores["score"] > 5.5
scores

In [None]:
# Reset the data
scores = generate_data()

In [None]:
def who_passed(df):
    """Get IDs of people who passed."""
    df["passed"] = df["score"] > 5.5
    return list(df.loc[df["passed"], "id"])

# User just wants to know how many people passed...
passed = who_passed(scores)
print(f"Passed: ", passed)

# But now the data has changed unexpectedly!
scores

In [None]:
# Reset the data data
scores = generate_data()

# Using assign() returns a copy.
scores.assign(passed=scores["score"] > 5.5)

In [None]:
# Original DataFrame is left unchanged!
scores

In [None]:
# Performing multiple assignments.
(
    scores
    .assign(

        # Compute passed column using the existing score column.
        passed=scores["score"] > 5.5,
        
        # Compute label using outcome of the previous step.
        # Note: passed column is *NOT* in the original DataFrame!
        # Pandas passes the modified DataFrame on from the previous assign.
        label=lambda df: df["passed"].replace({True: "Passed", False: "Failed"}),

    )
)

## Applying Functions

In [None]:
df = pd.DataFrame({
    "name": ["john", "JANE", "Jack"],
    "lastname": ["doe", "DOE", "DOE"],
    "age": [45, 26, None],
})
df

In [None]:
# Using functions on columns is identical to Series.
df["name"].map(str.capitalize)

In [None]:
# Use apply() to apply a function to a row.
# Note: Use axis=1 to get rows.
_ = df.apply(
    lambda row: print(list(row)),
    axis=1
)

In [None]:
def make_fullname(person, case):
    """Generate full name for a person."""
    fullname = person["name"] + " " + person["lastname"]
    
    if case == "lower":
        return fullname.lower()
    return fullname.upper()
    

In [None]:
make_fullname({"name": "JOHN", "lastname": "DOE"}, case="lower")

In [None]:
# Function arguments can be provided through apply(). 
df.apply(
    make_fullname,
    axis=1,
    
    # Arguments for make_fullname
    case="lower",
)

In [None]:
# Use applymap() to apply a function to all DataFrame cells.
df[["name", "lastname"]].applymap(
    lambda v: v.upper()
)

### Method Chaining

In [None]:
df

In [None]:
# Method chaining can produce cleaner code!
(
    df
    .loc[~df["age"].isna()]
    .assign(
        name=df["name"].str.capitalize(),
        lastname=df["lastname"].str.capitalize(),
        full_name=lambda df: df["name"] + " " + df["lastname"],
    )
    .sort_values("age")
)