# [Polars](https://www.pola.rs/) cheat sheet

[![Polars cheat sheet: download as PDF](https://img.shields.io/badge/Polars%20cheat%20sheet-Download%20as%20PDF-green)](https://franzdiebold.github.io/polars-cheat-sheet/Polars_cheat_sheet.pdf)

Most examples were taken from the official [Polars user guide](https://pola-rs.github.io/polars-book/user-guide/) and the structure is inspired by the [Pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf).

### Install

In [None]:
!pip install polars

### Import

In [1]:
import polars as pl

### Creating/reading DataFrames

In [2]:
# Create DataFrame
df = pl.DataFrame(
    {
        "nrs": [1, 2, 3, None, 5],
        "names": ["foo", "ham", "spam", "egg", None],
        "random": [0.3, 0.7, 0.1, 0.9, 0.6],
        "groups": ["A", "A", "B", "C", "B"],
    }
)

In [None]:
# Read CSV
df = pl.read_csv("https://j.mp/iriscsv", has_header=True)

In [None]:
# Read parquet
df = pl.read_parquet("path.parquet")

### Subset Observations - rows

In [3]:
# Filter: Extract rows that meet logical criteria.
df.filter(pl.col("random") > 0.5)
df.filter((pl.col("groups") == "B") & (pl.col("random") > 0.5))

nrs,names,random,groups
i64,str,f64,str
5,,0.6,"""B"""


In [4]:
# Sample
# Randomly select fraction of rows.
df.sample(frac=0.5)

# Randomly select n rows.
df.sample(n=2)

nrs,names,random,groups
i64,str,f64,str
2,"""ham""",0.7,"""A"""
1,"""foo""",0.3,"""A"""


In [5]:
# Select first n rows
df.head(n=2)

# Select last n rows.
df.tail(n=2)

nrs,names,random,groups
i64,str,f64,str
,"""egg""",0.9,"""C"""
5.0,,0.6,"""B"""


### Subset Variables - columns

In [6]:
# Select multiple columns with specific names.
df.select(["nrs", "names"])

nrs,names
i64,str
1.0,"""foo"""
2.0,"""ham"""
3.0,"""spam"""
,"""egg"""
5.0,


In [7]:
# Select columns whose name matches regular expression regex.
df.select(pl.col("^n.*$"))

nrs,names
i64,str
1.0,"""foo"""
2.0,"""ham"""
3.0,"""spam"""
,"""egg"""
5.0,


### Subsets - rows and columns

In [31]:
# Select rows 2-4.
df[2:4, :]

nrs,names,random,groups
i64,str,f64,str
3.0,"""spam""",0.1,"""B"""
,"""egg""",0.9,"""C"""


In [32]:
# Select columns in positions 1 and 3 (first column is 0).
df[:, [1, 3]]

names,groups
str,str
"""foo""","""A"""
"""ham""","""A"""
"""spam""","""B"""
"""egg""","""C"""
,"""B"""


In [33]:
# Select rows meeting logical condition, and only the specific columns.
df[df["random"] > 0.5, ["names", "groups"]]

names,groups
str,str
"""ham""","""A"""
"""egg""","""C"""
,"""B"""


### Reshaping Data – Change layout, sorting, renaming

In [18]:
df2 = pl.DataFrame(
    {
        "nrs": [6],
        "names": ["wow"],
        "random": [0.9],
        "groups": ["B"],
    }
)

df3 = pl.DataFrame(
    {
        "primes": [2, 3, 5, 7, 11],
    }
)

In [17]:
# Append rows of DataFrames.
pl.concat([df, df2])

nrs,names,random,groups
i64,str,f64,str
1.0,"""foo""",0.3,"""A"""
2.0,"""ham""",0.7,"""A"""
3.0,"""spam""",0.1,"""B"""
,"""egg""",0.9,"""C"""
5.0,,0.6,"""B"""
6.0,"""wow""",0.9,"""B"""


In [19]:
# Append columns of DataFrames
pl.concat([df, df3], how="horizontal")

nrs,names,random,groups,primes
i64,str,f64,str,i64
1.0,"""foo""",0.3,"""A""",2
2.0,"""ham""",0.7,"""A""",3
3.0,"""spam""",0.1,"""B""",5
,"""egg""",0.9,"""C""",7
5.0,,0.6,"""B""",11


In [16]:
# Gather columns into rows.
df.melt(id_vars=["nrs", "names"], value_vars=["random", "groups"])

PanicException: should not fail: SchemaMisMatch("cannot append Series; data types don't match")

In [25]:
# Spread rows into columns.
df.pivot(values="nrs", index="groups", columns="names")

groups,egg,foo,ham,null,spam
str,i64,i64,i64,i64,i64
"""A""",,1.0,2.0,,
"""B""",,,,5.0,3.0
"""C""",,,,,


In [26]:
# Order rows by values of a column (low to high).
df.sort("random")

nrs,names,random,groups
i64,str,f64,str
3.0,"""spam""",0.1,"""B"""
1.0,"""foo""",0.3,"""A"""
5.0,,0.6,"""B"""
2.0,"""ham""",0.7,"""A"""
,"""egg""",0.9,"""C"""


In [27]:
# Order rows by values of a column (high to low).
df.sort("random", reverse=True)

nrs,names,random,groups
i64,str,f64,str
,"""egg""",0.9,"""C"""
2.0,"""ham""",0.7,"""A"""
5.0,,0.6,"""B"""
1.0,"""foo""",0.3,"""A"""
3.0,"""spam""",0.1,"""B"""


In [30]:
# Rename the columns of a DataFrame.
df.rename({"nrs": "idx"})

idx,names,random,groups
i64,str,f64,str
1.0,"""foo""",0.3,"""A"""
2.0,"""ham""",0.7,"""A"""
3.0,"""spam""",0.1,"""B"""
,"""egg""",0.9,"""C"""
5.0,,0.6,"""B"""


In [28]:
# Drop columns from DataFrame.
df.drop(["names", "random"])

nrs,groups
i64,str
1.0,"""A"""
2.0,"""A"""
3.0,"""B"""
,"""C"""
5.0,"""B"""
