https://docs.pola.rs/user-guide/concepts/data-types/categoricals/

In [1]:
import polars as pl

In [2]:
enum_dtype = pl.Enum(["Polar", "Panda", "Brown"])
print(enum_dtype)

Enum(categories=['Polar', 'Panda', 'Brown'])


In [6]:
data = ["Polar", "Panda", "Brown", "Brown", "Polar"]

In [7]:
enum_series = pl.Series(data, dtype=enum_dtype)
print(enum_series)

shape: (5,)
Series: '' [enum]
[
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
]


In [8]:
cat_series = pl.Series(
    data,
    dtype=pl.Categorical
)
print(cat_series)

shape: (5,)
Series: '' [cat]
[
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
]


In [10]:
cat_series = pl.Series(
    ["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical
)
cat2_series = pl.Series(
    ["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical
)
# Triggers a CategoricalRemappingWarning: Local categoricals have different encodings, expensive re-encoding is done
print(cat_series.append(cat2_series))

shape: (10,)
Series: '' [cat]
[
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
	"Polar"
]


  print(cat_series.append(cat2_series))


In [11]:
with pl.StringCache():
    cat_series = pl.Series(
        ["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical
    )
    cat2_series = pl.Series(
        ["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical
    )
    print(cat_series.append(cat2_series))

shape: (10,)
Series: '' [cat]
[
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
	"Polar"
]


In [12]:
dtype = pl.Enum(["Polar", "Panda", "Brown"])
cat_series = pl.Series(["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=dtype)
cat2_series = pl.Series(["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=dtype)
print(cat_series.append(cat2_series))

shape: (10,)
Series: '' [enum]
[
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
	"Panda"
	"Brown"
	"Brown"
	"Polar"
	"Polar"
]


In [13]:
dtype = pl.Enum(["Polar", "Panda", "Brown"])
try:
    cat_series = pl.Series(["Polar", "Panda", "Brown", "Black"], dtype=dtype)
except Exception as e:
    print(e)

value 'Black' is not present in Enum: LargeUtf8Array[Polar, Panda, Brown]


In [15]:
with pl.StringCache():
    cat_series = pl.Series(["Polar", "Panda", "Polar"], dtype=pl.Categorical)
    cat_series2 = pl.Series(["Polar", "Panda", "Black"], dtype=pl.Categorical)
    print(cat_series == cat_series2)

shape: (3,)
Series: '' [bool]
[
	true
	true
	false
]


In [16]:
cat_series = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)
print(cat_series <= "Cat")

shape: (3,)
Series: '' [bool]
[
	true
	false
	false
]


In [17]:
cat_series = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)
cat_series_utf = pl.Series(["Panda", "Panda", "Polar"])
print(cat_series <= cat_series_utf)

shape: (3,)
Series: '' [bool]
[
	true
	true
	true
]


In [18]:
dtype = pl.Enum(["Polar", "Panda", "Brown"])
cat_series = pl.Series(["Brown", "Panda", "Polar"], dtype=dtype)
cat_series2 = pl.Series(["Polar", "Panda", "Brown"], dtype=dtype)
print(cat_series == cat_series2)

shape: (3,)
Series: '' [bool]
[
	false
	true
	false
]


In [19]:
try:
    cat_series = pl.Series(
        ["Low", "Medium", "High"], dtype=pl.Enum(["Low", "Medium", "High"])
    )
    cat_series <= "Excellent"
except Exception as e:
    print(e)

value 'Excellent' is not present in Enum: LargeUtf8Array[Low, Medium, High]


In [20]:
dtype = pl.Enum(["Low", "Medium", "High"])
cat_series = pl.Series(["Low", "Medium", "High"], dtype=dtype)
print(cat_series <= "Medium")

shape: (3,)
Series: '' [bool]
[
	true
	true
	false
]


In [21]:
dtype = pl.Enum(["Low", "Medium", "High"])
cat_series = pl.Series(["Low", "Medium", "High"], dtype=dtype)
cat_series2 = pl.Series(["High", "High", "Low"], dtype=dtype)
print(cat_series <= cat_series2)

shape: (3,)
Series: '' [bool]
[
	true
	true
	false
]
