# [Lazy API](https://docs.pola.rs/user-guide/concepts/lazy-api/)

polars supports two modes of operation:
- lazy: only evaluated when collected. Can give a performance boost.
- eager: immediate execution of commands

## eager way

In [1]:
import polars as pl

df = pl.read_csv("data/iris/iris.data")
df = df.with_columns(
    pl.col("class").str.strip_prefix("Iris-")
).rename({
    "sepal length": "sepal_length",
    "sepal width": "sepal_width",
    "petal length": "petal_length",
    "petal width": "petal_width",
    "class": "species"
    })
df

sepal_length,sepal_width,petal_length,petal_width,species
f64,f64,f64,f64,str
5.1,3.5,1.4,0.2,"""setosa"""
4.9,3.0,1.4,0.2,"""setosa"""
4.7,3.2,1.3,0.2,"""setosa"""
4.6,3.1,1.5,0.2,"""setosa"""
5.0,3.6,1.4,0.2,"""setosa"""
…,…,…,…,…
6.3,2.5,5.0,1.9,"""virginica"""
6.5,3.0,5.2,2.0,"""virginica"""
6.2,3.4,5.4,2.3,"""virginica"""
5.9,3.0,5.1,1.8,"""virginica"""


In [2]:
df_small = df.filter(pl.col("sepal_length") > 5)
df_agg = df_small.group_by("species").agg(pl.col("sepal_width").mean())
df_agg

species,sepal_width
str,f64
"""versicolor""",2.804255
"""virginica""",2.983673
"""setosa""",3.713636


## lazy way

Uses less memory and CPU,(only loads data that is needed).

In [5]:
q = (
    pl.scan_csv("data/iris/iris.data")
    .with_columns(
        pl.col("class").str.strip_prefix("Iris-")
    ).rename({
        "sepal length": "sepal_length",
        "sepal width": "sepal_width",
        "petal length": "petal_length",
        "petal width": "petal_width",
        "class": "species"
    }).filter(pl.col("sepal_length") > 5)
    .group_by("species")
    .agg(pl.col("sepal_width").mean())
    
)
df = q.collect()
df

species,sepal_width
str,f64
"""versicolor""",2.804255
"""setosa""",3.713636
"""virginica""",2.983673


Here you can see that unneeded operations (like renaming most columns) is not executed, because it is not used.

In [7]:
print(q.explain())

AGGREGATE
  [col("sepal_width").mean()] BY [col("species")]
  FROM
  RENAME
     WITH_COLUMNS:
     [col("class").str.strip_prefix(["Iris-"])] 
      simple π 3/3 ["sepal width", "class", ... 1 other column]
        Csv SCAN [data/iris/iris.data]
        PROJECT 3/5 COLUMNS
        SELECTION: [(col("sepal length")) > (5.0)]


In [9]:
schema = pl.Schema(
    {
        "int_1": pl.Int16,
        "int_2": pl.Int32,
        "float_1": pl.Float64,
        "float_2": pl.Float64,
        "float_3": pl.Float64,
    }
)
print(
    pl.LazyFrame(schema=schema)
    .select((pl.col(pl.Float64) * 1.1).name.suffix("*1.1"))
    .explain()
)

SELECT [[(col("float_1")) * (1.1)].alias("float_1*1.1"), [(col("float_2")) * (1.1)].alias("float_2*1.1"), [(col("float_3")) * (1.1)].alias("float_3*1.1")]
FROM
  DF ["int_1", "int_2", "float_1", "float_2", ...]; PROJECT["float_1", "float_2", "float_3"] 3/5 COLUMNS
