In [14]:
from dpyr import DataFrame, filter, select, mutate, c, read_csv, head, arrange, preview
import polars as pl
import pandas as pd

In [15]:
# Pandas DataFrame syntax
df = pd.read_csv("iris.csv")
print("Original DataFrame")
display(df)

df = df[df["variety"]  == "Setosa"]
df = df[["sepal.length", "sepal.width"]]
df['sepal.ratio'] = df['sepal.length'] / df['sepal.width']
df = df.sort_values("sepal.ratio", ascending=False)
df = df.head()

print("Shortened DataFrame")
display(df)

Original DataFrame


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


Shortened DataFrame


Unnamed: 0,sepal.length,sepal.width,sepal.ratio
41,4.5,2.3,1.956522
25,5.0,3.0,1.666667
1,4.9,3.0,1.633333
45,4.8,3.0,1.6
12,4.8,3.0,1.6


In [16]:
# Polars DataFrame syntax
orig_df = pl.read_csv("iris.csv")

print("Original DataFrame")
display(orig_df)

shortened_df = pl.read_csv("iris.csv")\
    .filter(pl.col("variety") == "Setosa")\
    .select("sepal.length","sepal.width")\
    .with_columns(
        (pl.col("sepal.length") / pl.col("sepal.width")).alias("sepal.ratio")
    ).sort("sepal.ratio", descending=True)\
    .head()
    
print("Shortened DataFrame")
display(shortened_df)

Original DataFrame


sepal.length,sepal.width,petal.length,petal.width,variety
f64,f64,f64,f64,str
5.1,3.5,1.4,0.2,"""Setosa"""
4.9,3.0,1.4,0.2,"""Setosa"""
4.7,3.2,1.3,0.2,"""Setosa"""
4.6,3.1,1.5,0.2,"""Setosa"""
5.0,3.6,1.4,0.2,"""Setosa"""
…,…,…,…,…
6.7,3.0,5.2,2.3,"""Virginica"""
6.3,2.5,5.0,1.9,"""Virginica"""
6.5,3.0,5.2,2.0,"""Virginica"""
6.2,3.4,5.4,2.3,"""Virginica"""


Shortened DataFrame


sepal.length,sepal.width,sepal.ratio
f64,f64,f64
4.5,2.3,1.956522
5.0,3.0,1.666667
4.9,3.0,1.633333
4.8,3.0,1.6
4.8,3.0,1.6


In [17]:
short_df = read_csv("iris.csv") \
    | preview("Original Dataframe") \
    | filter(c.variety == "Setosa") \
    | select(c.sepal_length, c.sepal_width, c.variety) \
    | mutate(sepal_ratio=c.sepal_length / c.sepal_width) \
    | arrange(-c.sepal_ratio) \
    | head() \
    | preview("Shortened Dataframe")

Original Dataframe


sepal_length,sepal_width,petal_length,petal_width,variety
f64,f64,f64,f64,str
5.1,3.5,1.4,0.2,"""Setosa"""
4.9,3.0,1.4,0.2,"""Setosa"""
4.7,3.2,1.3,0.2,"""Setosa"""
4.6,3.1,1.5,0.2,"""Setosa"""
5.0,3.6,1.4,0.2,"""Setosa"""


Shortened Dataframe


sepal_length,sepal_width,variety,sepal_ratio
f64,f64,str,f64
4.5,2.3,"""Setosa""",1.956522
5.0,3.0,"""Setosa""",1.666667
4.9,3.0,"""Setosa""",1.633333
4.8,3.0,"""Setosa""",1.6
4.8,3.0,"""Setosa""",1.6
