In [1]:
import polars as pl

df=pl.read_csv('size.csv')


In [2]:
df.shape

(25000, 3)

In [3]:
df.head()

index,height,weight
i64,f64,f64
1,65.78331,112.9925
2,71.51521,136.4873
3,69.39874,153.0269
4,68.2166,142.3354
5,67.78781,144.2971


In [7]:
df.filter(pl.col('height')>73).head()

index,height,weight
i64,f64,f64
139,73.90107,151.3913
175,73.83364,139.2983
270,73.26872,130.2636
1163,74.24899,150.2167
1384,74.19488,129.0597


In [8]:
df[0,:]

index,height,weight
i64,f64,f64
1,65.78331,112.9925


In [9]:
df[:,['height', "weight"]].head()

height,weight
f64,f64
65.78331,112.9925
71.51521,136.4873
69.39874,153.0269
68.2166,142.3354
67.78781,144.2971


In [10]:
df.select(['height']).head()

height
f64
65.78331
71.51521
69.39874
68.2166
67.78781


In [11]:
df.with_columns([
    (pl.col("height") / 100).alias("price_x_100")
]).head()

index,height,weight,price_x_100
i64,f64,f64,f64
1,65.78331,112.9925,0.6578331
2,71.51521,136.4873,0.7151521
3,69.39874,153.0269,0.6939874
4,68.2166,142.3354,0.682166
5,67.78781,144.2971,0.677878


In [12]:
df.filter(
    pl.col('height').is_in([70.34,71.34])
).head()

index,height,weight
i64,f64,f64


In [13]:
df.select([
    pl.col("height").min().alias("min_height"),
    pl.col("weight").mean().alias("mean_height"),
])

min_height,mean_height
f64,f64
60.27836,127.079421


In [14]:
df.select([
    pl.col("height")
]).describe()

describe,height
str,f64
"""count""",25000.0
"""null_count""",0.0
"""mean""",67.993114
"""std""",1.901679
"""min""",60.27836
"""25%""",66.70442
"""50%""",67.99592
"""75%""",69.27294
"""max""",75.1528


In [19]:
df.select([
    pl.col("index")
]).describe()

describe,index
str,f64
"""count""",25000.0
"""null_count""",0.0
"""mean""",12500.5
"""std""",7217.022701
"""min""",1.0
"""25%""",6251.0
"""50%""",12501.0
"""75%""",18750.0
"""max""",25000.0


In [31]:
df_height_weight_ratio=df.with_columns(
    (pl.col("height")/pl.col("weight")).alias("height_weight_ratio")
)
print(df_height_weight_ratio.head())

shape: (5, 4)
┌───────┬──────────┬──────────┬─────────────────────┐
│ index ┆ height   ┆ weight   ┆ height_weight_ratio │
│ ---   ┆ ---      ┆ ---      ┆ ---                 │
│ i64   ┆ f64      ┆ f64      ┆ f64                 │
╞═══════╪══════════╪══════════╪═════════════════════╡
│ 1     ┆ 65.78331 ┆ 112.9925 ┆ 0.582192            │
│ 2     ┆ 71.51521 ┆ 136.4873 ┆ 0.52397             │
│ 3     ┆ 69.39874 ┆ 153.0269 ┆ 0.453507            │
│ 4     ┆ 68.2166  ┆ 142.3354 ┆ 0.479267            │
│ 5     ┆ 67.78781 ┆ 144.2971 ┆ 0.469779            │
└───────┴──────────┴──────────┴─────────────────────┘


In [30]:
df_height_weight_ratio.filter(pl.col("height_weight_ratio")>0.6).head()

index,height,weight,height_weight_ratio
i64,f64,f64,f64
9,67.90265,112.3723,0.604265
23,63.48115,97.90191,0.648416
29,66.53401,108.3324,0.614165
31,69.5233,103.3016,0.673013
40,64.04535,106.7115,0.600173


In [33]:
df_weight_height_ratio=df_height_weight_ratio.with_columns(
   (pl.col("weight")/pl.col("height")).alias("weight_height_ratio")
)
print(df_weight_height_ratio)

shape: (25_000, 5)
┌───────┬──────────┬──────────┬─────────────────────┬─────────────────────┐
│ index ┆ height   ┆ weight   ┆ height_weight_ratio ┆ weight_height_ratio │
│ ---   ┆ ---      ┆ ---      ┆ ---                 ┆ ---                 │
│ i64   ┆ f64      ┆ f64      ┆ f64                 ┆ f64                 │
╞═══════╪══════════╪══════════╪═════════════════════╪═════════════════════╡
│ 1     ┆ 65.78331 ┆ 112.9925 ┆ 0.582192            ┆ 1.717647            │
│ 2     ┆ 71.51521 ┆ 136.4873 ┆ 0.52397             ┆ 1.908507            │
│ 3     ┆ 69.39874 ┆ 153.0269 ┆ 0.453507            ┆ 2.205039            │
│ 4     ┆ 68.2166  ┆ 142.3354 ┆ 0.479267            ┆ 2.086521            │
│ …     ┆ …        ┆ …        ┆ …                   ┆ …                   │
│ 24997 ┆ 64.54826 ┆ 120.1932 ┆ 0.537038            ┆ 1.862067            │
│ 24998 ┆ 64.69855 ┆ 118.2655 ┆ 0.547062            ┆ 1.827947            │
│ 24999 ┆ 67.52918 ┆ 132.2682 ┆ 0.510547            ┆ 1.958682       

In [38]:
df_weight_height_ratio.filter(pl.col("weight_height_ratio")>2.2).head()

index,height,weight,height_weight_ratio,weight_height_ratio
i64,f64,f64,f64,f64
3,69.39874,153.0269,0.453507,2.205039
57,70.40617,155.8987,0.451615,2.214276
83,70.05147,155.3767,0.450849,2.218036
157,70.98112,158.9562,0.446545,2.239415
288,69.93715,155.3046,0.450322,2.220631


In [39]:
df=pl.read_csv("size.csv").lazy()


In [43]:
cts=pl.SQLContext(mytable1=df)
table=cts.execute("SELECT height FROM mytable1;",eager=True)
print(table)

shape: (25_000, 1)
┌──────────┐
│ height   │
│ ---      │
│ f64      │
╞══════════╡
│ 65.78331 │
│ 71.51521 │
│ 69.39874 │
│ 68.2166  │
│ …        │
│ 64.54826 │
│ 64.69855 │
│ 67.52918 │
│ 68.87761 │
└──────────┘


In [48]:
table=cts.execute("SELECT index FROM mytable1;",eager=True)
print(table)

shape: (25_000, 1)
┌───────┐
│ index │
│ ---   │
│ i64   │
╞═══════╡
│ 1     │
│ 2     │
│ 3     │
│ 4     │
│ …     │
│ 24997 │
│ 24998 │
│ 24999 │
│ 25000 │
└───────┘


In [49]:
print(table)

shape: (25_000, 1)
┌───────┐
│ index │
│ ---   │
│ i64   │
╞═══════╡
│ 1     │
│ 2     │
│ 3     │
│ 4     │
│ …     │
│ 24997 │
│ 24998 │
│ 24999 │
│ 25000 │
└───────┘
