# Import Polars

In [1]:
import polars as pl 
print(pl.__version__)

1.5.0


# Creating a LazyFrame 

In [7]:
# Creating a Lazyframe Directly 

lazy_df = pl.scan_csv("Insurance.csv")
type(lazy_df)

polars.lazyframe.frame.LazyFrame

In [8]:
# Creating a Lazyframe from an existing Polars DataFrame 

insurance_df = pl.read_csv("Insurance.csv")
lazy_df = insurance_df.lazy()
type(lazy_df)

polars.lazyframe.frame.LazyFrame

In [45]:
# Create a DataFrame
df = pl.LazyFrame({
    "name": ["Alice", "Bob", "Charlie"],
    "age": [25, 30, 35],
    "salary": [50000, 60000, 70000]
})
type(df)

polars.lazyframe.frame.LazyFrame

# Common LazyFrame Methods 

In [9]:
lazy_df.head()

In [10]:
# Collect Method 
lazy_df.head().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0


In [44]:
#  collect method with streaming as True 
lazy_df.collect(streaming=True)

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0
…,…,…,…,…,…,…,…,…,…,…,…
11504793,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0
11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1
11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0
11504796,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1


In [12]:
# Fetch Method

lazy_df.fetch(5)

  lazy_df.fetch(5)


id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0


In [25]:
# collect schema

lazy_df.collect_schema()

Schema([('id', Int64),
        ('Gender', String),
        ('Age', Int64),
        ('Driving_License', Int64),
        ('Region_Code', Float64),
        ('Previously_Insured', Int64),
        ('Vehicle_Age', String),
        ('Vehicle_Damage', String),
        ('Annual_Premium', Float64),
        ('Policy_Sales_Channel', Float64),
        ('Vintage', Int64),
        ('Response', Int64)])

In [28]:
# with_columns
lazy_df.with_columns(pl.col("Annual_Premium")==2630.0).collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,bool,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",false,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",false,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",false,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",true,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",false,152.0,294,0
…,…,…,…,…,…,…,…,…,…,…,…
11504793,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",false,26.0,218,0
11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",false,152.0,115,1
11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",true,152.0,189,0
11504796,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",false,26.0,274,1


In [33]:
# Dealing with operations not in Lazy Dataframe
lazy_df.with_columns(
    (2 * pl.col("Annual_Premium")).alias("double_premium")
).collect().pivot(
    index="id", 
    on="Gender", 
    values="double_premium", 
    aggregate_function="first"
).lazy().filter(
    pl.col("Male").is_null()
).collect()


id,Male,Female
i64,f64,f64
2,,76086.0
3,,5260.0
4,,63902.0
5,,56300.0
7,,81318.0
…,…,…
11504790,,76194.0
11504791,,5260.0
11504794,,59018.0
11504795,,5260.0


In [39]:
q1 = (lazy_df.with_columns(pl.col("Annual_Premium").str.to_uppercase()).filter(pl.col("Gender")=="Male"))
q1.explain(optimized=False)

'FILTER [(col("Gender")) == (String(Male))] FROM\n   WITH_COLUMNS:\n   [col("Annual_Premium").str.uppercase()] \n    DF ["id", "Gender", "Age", "Driving_License"]; PROJECT */12 COLUMNS; SELECTION: None'

In [40]:
q1.explain()

' WITH_COLUMNS:\n [col("Annual_Premium").str.uppercase()] \n  DF ["id", "Gender", "Age", "Driving_License"]; PROJECT */12 COLUMNS; SELECTION: [(col("Gender")) == (String(Male))]'

# Attribute Methods

In [46]:
# columns method 
lazy_df.columns

  lazy_df.columns


['id',
 'Gender',
 'Age',
 'Driving_License',
 'Region_Code',
 'Previously_Insured',
 'Vehicle_Age',
 'Vehicle_Damage',
 'Annual_Premium',
 'Policy_Sales_Channel',
 'Vintage',
 'Response']

In [48]:
# polars.LazyFrame.dtypes
lazy_df.dtypes

  lazy_df.dtypes


[Int64,
 String,
 Int64,
 Int64,
 Float64,
 Int64,
 String,
 String,
 Float64,
 Float64,
 Int64,
 Int64]

In [49]:
# polars.LazyFrame.schema
lazy_df.schema

  lazy_df.schema


Schema([('id', Int64),
        ('Gender', String),
        ('Age', Int64),
        ('Driving_License', Int64),
        ('Region_Code', Float64),
        ('Previously_Insured', Int64),
        ('Vehicle_Age', String),
        ('Vehicle_Damage', String),
        ('Annual_Premium', Float64),
        ('Policy_Sales_Channel', Float64),
        ('Vintage', Int64),
        ('Response', Int64)])

In [50]:
# polars.LazyFrame.width
lazy_df.width

  lazy_df.width


12

# Descriptive Methods 

In [51]:
# polars.LazyFrame.describe
lazy_df.describe()

statistic,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
str,f64,str,f64,f64,f64,f64,str,str,f64,f64,f64,f64
"""count""",11504798.0,"""11504798""",11504798.0,11504798.0,11504798.0,11504798.0,"""11504798""","""11504798""",11504798.0,11504798.0,11504798.0,11504798.0
"""null_count""",0.0,"""0""",0.0,0.0,0.0,0.0,"""0""","""0""",0.0,0.0,0.0,0.0
"""mean""",5752398.5,,38.383563,0.998022,26.41869,0.462997,,,30461.370411,112.425442,163.897744,0.122997
"""std""",3321100.0,,14.993459,0.044431,12.99159,0.498629,,,16454.745205,54.035708,79.979531,0.328434
"""min""",0.0,"""Female""",20.0,0.0,0.0,0.0,"""1-2 Year""","""No""",2630.0,1.0,10.0,0.0
"""25%""",2876199.0,,24.0,1.0,15.0,0.0,,,25277.0,29.0,99.0,0.0
"""50%""",5752399.0,,36.0,1.0,28.0,0.0,,,31824.0,151.0,166.0,0.0
"""75%""",8628598.0,,49.0,1.0,35.0,1.0,,,39451.0,152.0,232.0,0.0
"""max""",11504797.0,"""Male""",85.0,1.0,52.0,1.0,"""> 2 Years""","""Yes""",540165.0,163.0,299.0,1.0


# Aggregate Methods

In [54]:
# polars.LazyFrame.count

lazy_df.count().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798


In [55]:
# polars.LazyFrame.max

lazy_df.max().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
11504797,"""Male""",85,1,52.0,1,"""> 2 Years""","""Yes""",540165.0,163.0,299,1


In [56]:
# polars.LazyFrame.median
lazy_df.median().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
f64,str,f64,f64,f64,f64,str,str,f64,f64,f64,f64
5752398.5,,36.0,1.0,28.0,0.0,,,31824.0,151.0,166.0,0.0


In [57]:
# polars.LazyFrame.null_count

lazy_df.null_count().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,0


In [58]:
# polars.LazyFrame.sum
lazy_df.sum().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
66180182758003,,441595143,11482041,303940000.0,5326682,,,350450000000.0,1293400000.0,1885610436,1415059


In [59]:
# polars.LazyFrame.var

lazy_df.var().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
f64,str,f64,f64,f64,f64,str,str,f64,f64,f64,f64
11030000000000.0,,224.803798,0.001974,168.781416,0.248631,,,270760000.0,2919.857715,6396.725396,0.107869


# GroupBy Methods

In [61]:
# agg
lazy_df.group_by("Gender").agg(
    pl.col("Policy_Sales_Channel").sum()
).collect()  

Gender,Policy_Sales_Channel
str,f64
"""Male""",664249355.0
"""Female""",629182644.0


In [62]:
# all

lazy_df.group_by("Gender", maintain_order=True).all().collect()

Gender,id,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
str,list[i64],list[i64],list[i64],list[f64],list[i64],list[str],list[str],list[f64],list[f64],list[i64],list[i64]
"""Male""","[0, 1, … 11504797]","[21, 43, … 25]","[1, 1, … 1]","[35.0, 28.0, … 28.0]","[0, 0, … 1]","[""1-2 Year"", ""> 2 Years"", … ""< 1 Year""]","[""Yes"", ""Yes"", … ""No""]","[65101.0, 58911.0, … 32855.0]","[124.0, 26.0, … 152.0]","[187, 288, … 189]","[0, 1, … 0]"
"""Female""","[2, 3, … 11504796]","[25, 35, … 51]","[1, 1, … 1]","[14.0, 1.0, … 28.0]","[1, 0, … 0]","[""< 1 Year"", ""1-2 Year"", … ""1-2 Year""]","[""No"", ""Yes"", … ""Yes""]","[38043.0, 2630.0, … 48443.0]","[152.0, 156.0, … 26.0]","[254, 76, … 274]","[0, 0, … 1]"


In [63]:
# len method

lazy_df.group_by("Gender").count().collect() 

  lazy_df.group_by("Gender").count().collect()


Gender,count
str,u32
"""Male""",6228134
"""Female""",5276664


In [64]:
# first 

lazy_df.group_by("Gender", maintain_order=True).first().collect()

Gender,id,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
str,i64,i64,i64,f64,i64,str,str,f64,f64,i64,i64
"""Male""",0,21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
"""Female""",2,25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0


In [65]:
# n_unique

lazy_df.group_by("Gender", maintain_order=True).n_unique().collect()


Gender,id,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
str,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
"""Male""",6228134,65,2,53,2,3,2,49023,151,290,2
"""Female""",5276664,66,2,54,2,3,2,48170,151,290,2


# Manipulation/Selection Methods

In [66]:
# Select 

lazy_df.select("Annual_Premium").collect()

Annual_Premium
f64
65101.0
58911.0
38043.0
2630.0
31951.0
…
27412.0
29509.0
2630.0
48443.0


In [67]:
lazy_df.select(["Annual_Premium","Policy_Sales_Channel"]).collect()

Annual_Premium,Policy_Sales_Channel
f64,f64
65101.0,124.0
58911.0,26.0
38043.0,152.0
2630.0,156.0
31951.0,152.0
…,…
27412.0,26.0
29509.0,152.0
2630.0,152.0
48443.0,26.0


In [69]:
# Join 

df1 = pl.LazyFrame(
    {
    "Countries" : ["INDIA","PAKISTAN","AUSTRALIA","ENGLAND"],
    "Matches" : [80,42,55,32],
    "Ranking" : [1,2,7,6]
}
)

df2 = pl.LazyFrame(
    {
        "win" :[20,31,12,9],
        "Countries" : ["INDIA","AUSTRALIA","NL","SA"]
    }
)


In [70]:
df1.join(df2, on="Countries", how="full").collect()

Countries,Matches,Ranking,win,Countries_right
str,i64,i64,i64,str
"""INDIA""",80.0,1.0,20.0,"""INDIA"""
"""AUSTRALIA""",55.0,7.0,31.0,"""AUSTRALIA"""
,,,12.0,"""NL"""
,,,9.0,"""SA"""
"""ENGLAND""",32.0,6.0,,
"""PAKISTAN""",42.0,2.0,,


In [71]:
df1.join(df2, on="Countries", how="left").collect()

Countries,Matches,Ranking,win
str,i64,i64,i64
"""INDIA""",80,1,20.0
"""PAKISTAN""",42,2,
"""AUSTRALIA""",55,7,31.0
"""ENGLAND""",32,6,


In [72]:
df1.join(df2, on="Countries", how="semi").collect()

Countries,Matches,Ranking
str,i64,i64
"""INDIA""",80,1
"""AUSTRALIA""",55,7


In [73]:
df1.join(df2, on="Countries", how="anti").collect()

Countries,Matches,Ranking
str,i64,i64
"""PAKISTAN""",42,2
"""ENGLAND""",32,6


In [76]:
# Slice 

lazy_df.slice(6, 3).collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
6,"""Male""",23,1,45.0,1,"""< 1 Year""","""No""",27128.0,152.0,190,0
7,"""Female""",47,1,8.0,0,"""1-2 Year""","""Yes""",40659.0,26.0,262,1
8,"""Female""",26,1,28.0,1,"""< 1 Year""","""No""",31639.0,152.0,36,0


In [77]:
# rename 

lazy_df.rename({"Region_Code": "Location_Code"}).collect()

id,Gender,Age,Driving_License,Location_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0
…,…,…,…,…,…,…,…,…,…,…,…
11504793,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0
11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1
11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0
11504796,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1


In [78]:
# interpolate 

lazy_df.interpolate().collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
f64,str,f64,f64,f64,f64,str,str,f64,f64,f64,f64
0.0,"""Male""",21.0,1.0,35.0,0.0,"""1-2 Year""","""Yes""",65101.0,124.0,187.0,0.0
1.0,"""Male""",43.0,1.0,28.0,0.0,"""> 2 Years""","""Yes""",58911.0,26.0,288.0,1.0
2.0,"""Female""",25.0,1.0,14.0,1.0,"""< 1 Year""","""No""",38043.0,152.0,254.0,0.0
3.0,"""Female""",35.0,1.0,1.0,0.0,"""1-2 Year""","""Yes""",2630.0,156.0,76.0,0.0
4.0,"""Female""",36.0,1.0,15.0,1.0,"""1-2 Year""","""No""",31951.0,152.0,294.0,0.0
…,…,…,…,…,…,…,…,…,…,…,…
1.1504793e7,"""Male""",48.0,1.0,6.0,0.0,"""1-2 Year""","""Yes""",27412.0,26.0,218.0,0.0
1.1504794e7,"""Female""",26.0,1.0,36.0,0.0,"""< 1 Year""","""Yes""",29509.0,152.0,115.0,1.0
1.1504795e7,"""Female""",29.0,1.0,32.0,1.0,"""< 1 Year""","""No""",2630.0,152.0,189.0,0.0
1.1504796e7,"""Female""",51.0,1.0,28.0,0.0,"""1-2 Year""","""Yes""",48443.0,26.0,274.0,1.0


In [80]:
# Filter 

lazy_df.filter((pl.col("Gender") == "Female") & (pl.col("Driving_License") == 1.0)).collect()

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0
5,"""Female""",31,1,47.0,1,"""< 1 Year""","""No""",28150.0,152.0,197,0
7,"""Female""",47,1,8.0,0,"""1-2 Year""","""Yes""",40659.0,26.0,262,1
…,…,…,…,…,…,…,…,…,…,…,…
11504790,"""Female""",21,1,36.0,1,"""< 1 Year""","""No""",38097.0,152.0,275,0
11504791,"""Female""",31,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,204,0
11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1
11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0


In [81]:
# drop a column or muilple columns

lazy_df.drop("id").collect()

Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0
…,…,…,…,…,…,…,…,…,…,…
"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0
"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1
"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0
"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1


In [82]:
lazy_df.drop("Age","Driving_License").collect()

id,Gender,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
1,"""Male""",28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0
…,…,…,…,…,…,…,…,…,…
11504793,"""Male""",6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0
11504794,"""Female""",36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1
11504795,"""Female""",32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0
11504796,"""Female""",28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1


# Miscellaneous Methods 

In [84]:
# profile 

lazy_df.group_by("Gender", maintain_order=True).agg(pl.all().sum()).sort(
    "Gender"
).profile()

(shape: (2, 12)
 ┌────────┬────────────┬───────────┬────────────┬───┬────────────┬───────────┬───────────┬──────────┐
 │ Gender ┆ id         ┆ Age       ┆ Driving_Li ┆ … ┆ Annual_Pre ┆ Policy_Sa ┆ Vintage   ┆ Response │
 │ ---    ┆ ---        ┆ ---       ┆ cense      ┆   ┆ mium       ┆ les_Chann ┆ ---       ┆ ---      │
 │ str    ┆ i64        ┆ i64       ┆ ---        ┆   ┆ ---        ┆ el        ┆ i64       ┆ i64      │
 │        ┆            ┆           ┆ i64        ┆   ┆ f64        ┆ ---       ┆           ┆          │
 │        ┆            ┆           ┆            ┆   ┆            ┆ f64       ┆           ┆          │
 ╞════════╪════════════╪═══════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪══════════╡
 │ Female ┆ 3035405904 ┆ 188985590 ┆ 5270990    ┆ … ┆ 1.5973e11  ┆ 6.2918264 ┆ 869205331 ┆ 545061   │
 │        ┆ 3684       ┆           ┆            ┆   ┆            ┆ 4e8       ┆           ┆          │
 │ Male   ┆ 3582612371 ┆ 252609553 ┆ 6211051    ┆ … ┆ 1.9072e11  ┆

In [85]:
# pipe method

df = pl.LazyFrame({
    "a": [1, 2, 3, 4],
    "b": [5, 6, 7, 8]
})

# Function to add a new column
def add_new_column(df, col1, col2):
    return df.with_columns((pl.col(col1) + pl.col(col2)).alias("c"))

# Function to filter rows where the new column is greater than a threshold
def filter_rows(df, threshold):
    return df.filter(pl.col("c") > threshold)

# Chaining multiple operations
df_final = (
    df.pipe(add_new_column, "a", "b")
       .pipe(filter_rows, threshold=10)
).collect()

print(df_final)

shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 4   ┆ 8   ┆ 12  │
└─────┴─────┴─────┘
