In [1]:
# Four functions to level up
# Assign, Map, Query and Explode

## Assign

In [3]:
# Allows you to add columns to a dataframe

In [1]:
import pandas as pd
data = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv")

grouped = (
    data.groupby("species")
    .agg(["mean"])
    .assign(
        fancy_column=lambda df: df["sepal_width"]["mean"]
        / df["sepal_width"]["mean"].mean(),
        useless_column="I am useless"
    )
)
grouped

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width,fancy_column,useless_column
Unnamed: 0_level_1,mean,mean,mean,mean,Unnamed: 5_level_1,Unnamed: 6_level_1
species,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
setosa,5.006,3.428,1.462,0.246,1.121239,I am useless
versicolor,5.936,2.77,4.26,1.326,0.906018,I am useless
virginica,6.588,2.974,5.552,2.026,0.972743,I am useless


## Map

In [2]:
import pandas as pd
data = pd.read_csv(
    "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
).assign(
    to_big_to_small=lambda df: (df.sepal_width > 3).map({True: "Too Big", False: "Perfect"}),
    inverted_name=lambda df: df.species.map(lambda name: name[::-1]),
)
data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,to_big_to_small,inverted_name
0,5.1,3.5,1.4,0.2,setosa,Too Big,asotes
1,4.9,3.0,1.4,0.2,setosa,Perfect,asotes
2,4.7,3.2,1.3,0.2,setosa,Too Big,asotes
3,4.6,3.1,1.5,0.2,setosa,Too Big,asotes
4,5.0,3.6,1.4,0.2,setosa,Too Big,asotes
...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,Perfect,acinigriv
146,6.3,2.5,5.0,1.9,virginica,Perfect,acinigriv
147,6.5,3.0,5.2,2.0,virginica,Perfect,acinigriv
148,6.2,3.4,5.4,2.3,virginica,Too Big,acinigriv


## Query

In [3]:
import pandas as pd
data = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv")
length_th = 0.5
filtered_data = (data
    .assign(**{"PW Squared": data["petal_width"] ** 2})
    .query("`PW Squared` > 0.4 and petal_length > @length_th and species != 'setosa'")
)
filtered_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,PW Squared
50,7.0,3.2,4.7,1.4,versicolor,1.96
51,6.4,3.2,4.5,1.5,versicolor,2.25
52,6.9,3.1,4.9,1.5,versicolor,2.25
53,5.5,2.3,4.0,1.3,versicolor,1.69
54,6.5,2.8,4.6,1.5,versicolor,2.25
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,5.29
146,6.3,2.5,5.0,1.9,virginica,3.61
147,6.5,3.0,5.2,2.0,virginica,4.00
148,6.2,3.4,5.4,2.3,virginica,5.29


# Explode

In [5]:
import pandas as pd
n_rows = 3
result = pd.DataFrame(
    {"a": [list(range(1 + i ** 2)) for i in range(n_rows)], "b": list(range(n_rows))}
).explode("a").astype({'a':int})
result

Unnamed: 0,a,b
0,0,0
1,0,1
1,1,1
2,0,2
2,1,2
2,2,2
2,3,2
2,4,2
