In [1]:
import polars as pl
import numpy as np

In [2]:
nrows = 5000
rng = np.random.default_rng(42)

df = pl.DataFrame(
    {
        "sqm": rng.gamma(shape=1.5, scale=80, size=nrows),
        "year": rng.integers(low=1970, high=2022, size=nrows),
        "energy_rating": rng.choice(['A', 'B', 'C', 'D', 'F', 'G'], size=nrows),
        "good": rng.choice([True, False], size=nrows),

    }
)
print(df)

shape: (5_000, 4)
┌────────────┬──────┬───────────────┬───────┐
│ sqm        ┆ year ┆ energy_rating ┆ good  │
│ ---        ┆ ---  ┆ ---           ┆ ---   │
│ f64        ┆ i64  ┆ str           ┆ bool  │
╞════════════╪══════╪═══════════════╪═══════╡
│ 122.21758  ┆ 2020 ┆ D             ┆ false │
│ 174.357146 ┆ 2000 ┆ G             ┆ false │
│ 104.821556 ┆ 1970 ┆ F             ┆ true  │
│ 91.889062  ┆ 1980 ┆ A             ┆ false │
│ 191.809985 ┆ 1976 ┆ A             ┆ false │
│ …          ┆ …    ┆ …             ┆ …     │
│ 176.581779 ┆ 1982 ┆ B             ┆ true  │
│ 214.25315  ┆ 1970 ┆ B             ┆ false │
│ 119.995315 ┆ 2007 ┆ C             ┆ true  │
│ 193.937095 ┆ 1995 ┆ A             ┆ true  │
│ 44.025054  ┆ 2010 ┆ A             ┆ false │
└────────────┴──────┴───────────────┴───────┘


In [3]:
# count number of rows where A == True
df.select(pl.col("good").sum() / pl.len()).item()

0.4932

In [4]:
# you can access a single element of a Polars dataframe in at least two ways...
irow = 3
print(df[irow, 'energy_rating'])
print(df.item(irow, 'energy_rating')) 

A
A


In [5]:
# ...but only the first way allows you to also edit the element!
df[irow, 'energy_rating'] = "Z"
print(df[irow, 'energy_rating'])

Z


In [6]:
# Instead, the other one uses a function call, and you cannot assign to a function call. Try uncommenting next line and executing this cell!
# df.item(irow, 'energy_rating') = "Z"

In [7]:
# new syntax to replace a column
new = np.arange(nrows)
df = df.with_columns(pl.Series('sqm', new))
df

sqm,year,energy_rating,good
i32,i64,str,bool
0,2020,"""D""",false
1,2000,"""G""",false
2,1970,"""F""",true
3,1980,"""Z""",false
4,1976,"""A""",false
…,…,…,…
4995,1982,"""B""",true
4996,1970,"""B""",false
4997,2007,"""C""",true
4998,1995,"""A""",true


In [8]:
# The new syntax doesn't check for the column to exist already, so this can introduce subtle bugs
df = df.with_columns(pl.Series('sq', new))
print(df)

shape: (5_000, 5)
┌──────┬──────┬───────────────┬───────┬──────┐
│ sqm  ┆ year ┆ energy_rating ┆ good  ┆ sq   │
│ ---  ┆ ---  ┆ ---           ┆ ---   ┆ ---  │
│ i32  ┆ i64  ┆ str           ┆ bool  ┆ i32  │
╞══════╪══════╪═══════════════╪═══════╪══════╡
│ 0    ┆ 2020 ┆ D             ┆ false ┆ 0    │
│ 1    ┆ 2000 ┆ G             ┆ false ┆ 1    │
│ 2    ┆ 1970 ┆ F             ┆ true  ┆ 2    │
│ 3    ┆ 1980 ┆ Z             ┆ false ┆ 3    │
│ 4    ┆ 1976 ┆ A             ┆ false ┆ 4    │
│ …    ┆ …    ┆ …             ┆ …     ┆ …    │
│ 4995 ┆ 1982 ┆ B             ┆ true  ┆ 4995 │
│ 4996 ┆ 1970 ┆ B             ┆ false ┆ 4996 │
│ 4997 ┆ 2007 ┆ C             ┆ true  ┆ 4997 │
│ 4998 ┆ 1995 ┆ A             ┆ true  ┆ 4998 │
│ 4999 ┆ 2010 ┆ A             ┆ false ┆ 4999 │
└──────┴──────┴───────────────┴───────┴──────┘


In [9]:
df.drop('sq')
# replace_column can be combined with df.columns to prevent such bugs
df.replace_column(df.columns.index("name"), pl.Series('name', new))

ValueError: 'name' is not in list

In [10]:
# how to add a constant value to a column
df = df.with_columns(pl.col('sqm') + 500.0)
print(df)

shape: (5_000, 5)
┌────────┬──────┬───────────────┬───────┬──────┐
│ sqm    ┆ year ┆ energy_rating ┆ good  ┆ sq   │
│ ---    ┆ ---  ┆ ---           ┆ ---   ┆ ---  │
│ f64    ┆ i64  ┆ str           ┆ bool  ┆ i32  │
╞════════╪══════╪═══════════════╪═══════╪══════╡
│ 500.0  ┆ 2020 ┆ D             ┆ false ┆ 0    │
│ 501.0  ┆ 2000 ┆ G             ┆ false ┆ 1    │
│ 502.0  ┆ 1970 ┆ F             ┆ true  ┆ 2    │
│ 503.0  ┆ 1980 ┆ Z             ┆ false ┆ 3    │
│ 504.0  ┆ 1976 ┆ A             ┆ false ┆ 4    │
│ …      ┆ …    ┆ …             ┆ …     ┆ …    │
│ 5495.0 ┆ 1982 ┆ B             ┆ true  ┆ 4995 │
│ 5496.0 ┆ 1970 ┆ B             ┆ false ┆ 4996 │
│ 5497.0 ┆ 2007 ┆ C             ┆ true  ┆ 4997 │
│ 5498.0 ┆ 1995 ┆ A             ┆ true  ┆ 4998 │
│ 5499.0 ┆ 2010 ┆ A             ┆ false ┆ 4999 │
└────────┴──────┴───────────────┴───────┴──────┘
