# Polars

In [1]:
import numpy as np
import pandas as pd
import polars as pl
import time

In [2]:
# Генерация синтетических данных
n = 100_000_000  # 100 миллионов строк
columns = ["A", "B", "C", "D"]
data = {col: np.random.randn(n) for col in columns}

In [3]:
# Конвертация в pandas DataFrame
start = time.time()
pdf = pd.DataFrame(data)
pandas_creation_time = time.time() - start
print(f"Pandas DataFrame creation time: {pandas_creation_time:.2f} seconds")

# Конвертация в Polars DataFrame
start = time.time()
pldf = pl.DataFrame(data)
polars_creation_time = time.time() - start
print(f"Polars DataFrame creation time: {polars_creation_time:.2f} seconds")

Pandas DataFrame creation time: 1.56 seconds
Polars DataFrame creation time: 0.02 seconds


In [5]:
# Фильтрация данных
start = time.time()
pdf_filtered = pdf[(pdf["A"] > 0) & (pdf["B"] < 0)]
pandas_filter_time = time.time() - start
print(f"Pandas filter time: {pandas_filter_time:.2f} seconds")

start = time.time()
pldf_filtered = pldf.filter((pl.col("A") > 0) & (pl.col("B") < 0))
polars_filter_time = time.time() - start
print(f"Polars filter time: {polars_filter_time:.2f} seconds")

Pandas filter time: 2.49 seconds
Polars filter time: 1.68 seconds


In [6]:
# Группировка и агрегация
start = time.time()
pdf_grouped = pdf.groupby("A").agg({"B": "mean", "C": "sum", "D": "count"})
pandas_group_time = time.time() - start
print(f"Pandas group time: {pandas_group_time:.2f} seconds")

start = time.time()
pldf_grouped = pldf.group_by("A").agg([pl.col("B").mean(), pl.col("C").sum(), pl.col("D").count()])
polars_group_time = time.time() - start
print(f"Polars group time: {polars_group_time:.2f} seconds")

Pandas group time: 255.29 seconds
Polars group time: 16.89 seconds
