# Import Polars 

In [1]:
import polars as pl 
print(pl.__version__)

1.5.0


In [2]:
insurance_df = pl.read_csv("Insurance.csv")

In [3]:
type(insurance_df)

polars.dataframe.frame.DataFrame

In [4]:
insurance_df.columns

['id',
 'Gender',
 'Age',
 'Driving_License',
 'Region_Code',
 'Previously_Insured',
 'Vehicle_Age',
 'Vehicle_Damage',
 'Annual_Premium',
 'Policy_Sales_Channel',
 'Vintage',
 'Response']

# Aggregate Functions

In [6]:
# polars.Expr.agg_groups
insurance_df.group_by("Gender",maintain_order=True).agg(pl.col("Annual_Premium").agg_groups())

Gender,Annual_Premium
str,list[u32]
"""Male""","[0, 1, … 11504797]"
"""Female""","[2, 3, … 11504796]"


In [7]:
# polars.Expr.arg_max
insurance_df.select(pl.col("Annual_Premium").arg_max())

Annual_Premium
u32
586353


In [8]:
# polars.Expr.arg_min
insurance_df.select(pl.col("Annual_Premium").arg_min())

Annual_Premium
u32
3


In [9]:
# polars.Expr.count
insurance_df.select(pl.all().count())

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798,11504798


In [11]:
# polars.Expr.implode
df = pl.DataFrame({
    "Countries":["India","Australia","England"],
    "GDP":[26,45,60]
})
df.select(pl.all().implode())

Countries,GDP
list[str],list[i64]
"[""India"", ""Australia"", ""England""]","[26, 45, 60]"


In [12]:
# polars.Expr.max

insurance_df.select(pl.col("Policy_Sales_Channel").max())

Policy_Sales_Channel
f64
163.0


In [13]:
# polars.Expr.mean
insurance_df.select(pl.col("Policy_Sales_Channel").mean())

Policy_Sales_Channel
f64
112.425442


In [15]:
# polars.Expr.quantile

insurance_df.select(pl.col("Policy_Sales_Channel").quantile(0.5))


Policy_Sales_Channel
f64
151.0


# Array Methods 

In [28]:
df = pl.DataFrame( data = {
    "GDP":[[26,45],[60,35]]
},schema={"GDP": pl.Array(pl.Int64, 2)},)
df.select(pl.col("GDP").arr.max())

GDP
i64
45
60


In [17]:
# polars.Expr.arr.sum

df.select(pl.col("GDP").arr.sum())


GDP
i64
71
95


In [18]:
# polars.Expr.arr.to_list

df.select(pl.col("GDP").arr.to_list())


GDP
list[i64]
"[26, 45]"
"[60, 35]"


In [19]:
# polars.Expr.arr.reverse

df.with_columns(reverse=pl.col("GDP").arr.reverse())

GDP,reverse
"array[i64, 2]","array[i64, 2]"
"[26, 45]","[45, 26]"
"[60, 35]","[35, 60]"


In [20]:
# polars.Expr.arr.contains
df.with_columns(contains=pl.col("GDP").arr.contains(60))

GDP,contains
"array[i64, 2]",bool
"[26, 45]",False
"[60, 35]",True


In [21]:
# polars.Expr.arr.arg_max

df.with_columns(arg_max=pl.col("GDP").arr.arg_max())


GDP,arg_max
"array[i64, 2]",u32
"[26, 45]",1
"[60, 35]",0


In [30]:
# polars.Expr.arr.sort

df.with_columns(sort=pl.col("GDP").arr.sort())

GDP,sort
"array[i64, 2]","array[i64, 2]"
"[26, 45]","[26, 45]"
"[60, 35]","[35, 60]"


In [31]:
# polars.Expr.arr.unique
df.select(pl.col("GDP").arr.unique())


GDP
list[i64]
"[26, 45]"
"[35, 60]"


In [32]:
# polars.Expr.arr.n_unique
df.with_columns(n_unique=pl.col("GDP").arr.n_unique())


GDP,n_unique
"array[i64, 2]",u32
"[26, 45]",2
"[60, 35]",2


# Categories Method 

In [35]:
# polars.Expr.cat.get_categories
insurance_df.with_columns(
    pl.col("Gender").cast(pl.Categorical)
).select(
    pl.col("Gender").cat.get_categories()
)

  insurance_df.with_columns(


Gender
str
"""Male"""
"""Female"""


# Column/Names Method

In [36]:
# polars.Expr.alias

df = df = pl.DataFrame({
    "A": [1, 2, 3],
    "B": [4, 5, 6]
})

df.select(
    pl.col("A").alias("X"),
    pl.col("B").alias("Y")
)

X,Y
i64,i64
1,4
2,5
3,6


In [37]:
df.select(
    pl.col("A"),
    pl.col("B"),
    (pl.col("A") + pl.col("B")).alias("C")  # Computed column with alias
)


A,B,C
i64,i64,i64
1,4,5
2,5,7
3,6,9


In [38]:
df = pl.DataFrame({
    "A": [1, 2, 3],
    "B": [4, 5, 6],
    "C": [7, 8, 9]
})
df_result = df.select(
    pl.exclude("B") * 2  # Multiplies all columns except 'B' by 2
)
print(df_result)

shape: (3, 2)
┌─────┬─────┐
│ A   ┆ C   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2   ┆ 14  │
│ 4   ┆ 16  │
│ 6   ┆ 18  │
└─────┴─────┘


# Computation Methods 

In [39]:
# polars.Expr.abs
insurance_df.select(pl.col("Policy_Sales_Channel").abs())

Policy_Sales_Channel
f64
124.0
26.0
152.0
156.0
152.0
…
26.0
152.0
152.0
26.0


In [40]:
# polars.Expr.cos
insurance_df.select(pl.col("Policy_Sales_Channel").cos())

Policy_Sales_Channel
f64
-0.092776
0.646919
0.359044
0.471652
0.359044
…
0.646919
0.359044
0.359044
0.646919


In [41]:
# polars.Expr.cum_max

insurance_df.select(pl.col("Policy_Sales_Channel").cum_max())

Policy_Sales_Channel
f64
124.0
124.0
152.0
156.0
156.0
…
163.0
163.0
163.0
163.0


In [42]:
# polars.Expr.kurtosis
insurance_df.select(pl.col("Policy_Sales_Channel").kurtosis())

Policy_Sales_Channel
f64
-0.945331


In [43]:
# polars.Expr.log

insurance_df.select(pl.col("Policy_Sales_Channel").log())

Policy_Sales_Channel
f64
4.820282
3.258097
5.023881
5.049856
5.023881
…
3.258097
5.023881
5.023881
3.258097


In [44]:
# polars.Expr.skew
insurance_df.select(pl.col("Policy_Sales_Channel").skew())

Policy_Sales_Channel
f64
-0.915082


In [45]:
# polars.Expr.unique
insurance_df.select(pl.col("Policy_Sales_Channel").unique())

Policy_Sales_Channel
f64
1.0
2.0
3.0
4.0
5.0
…
157.0
158.0
159.0
160.0


In [46]:
# polars.Expr.value_counts
insurance_df.select(pl.col("Policy_Sales_Channel").value_counts())

Policy_Sales_Channel
struct[2]
"{33.0,5}"
"{44.0,1828}"
"{150.0,8259}"
"{160.0,640380}"
"{129.0,709}"
…
"{126.0,84}"
"{121.0,1113}"
"{94.0,632}"
"{136.0,4775}"


# Manipulation/Selection

In [48]:
# polars.Expr.drop_nans

insurance_df.select(pl.col("Gender").drop_nans())


Gender
str
"""Male"""
"""Male"""
"""Female"""
"""Female"""
"""Female"""
…
"""Male"""
"""Female"""
"""Female"""
"""Female"""


In [49]:
# polars.Expr.drop_nulls

insurance_df.select(pl.col("Policy_Sales_Channel").drop_nulls())


Policy_Sales_Channel
f64
124.0
26.0
152.0
156.0
152.0
…
26.0
152.0
152.0
26.0


In [52]:
# polars.Expr.filter
insurance_df.group_by("Gender").agg(
    lt=pl.col("Annual_Premium").filter(pl.col("Annual_Premium") < 2000).sum(),
    gte=pl.col("Policy_Sales_Channel").filter(pl.col("Policy_Sales_Channel") >= 500).sum(),
).sort("Gender")

Gender,lt,gte
str,f64,f64
"""Female""",0.0,0.0
"""Male""",0.0,0.0


In [53]:
# polars.Expr.pipe

df = pl.DataFrame({
    "A": [11, 22, 43, 54],
    "B": [56, 86, 70, 81]
})

def add_two_and_multiply(expr, multiplier):
    return (expr + 2) * multiplier

df_transformed = df.select(
    pl.col("A").pipe(add_two_and_multiply, multiplier=3).alias("A_transformed")
)

print(df_transformed)


shape: (4, 1)
┌───────────────┐
│ A_transformed │
│ ---           │
│ i64           │
╞═══════════════╡
│ 39            │
│ 72            │
│ 135           │
│ 168           │
└───────────────┘


In [54]:
# polars.Expr.slice

insurance_df.select(pl.all().slice(1, 2))


id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0


In [56]:
# polars.Expr.sort

insurance_df.select(pl.col("Annual_Premium").sort())

Annual_Premium
f64
2630.0
2630.0
2630.0
2630.0
2630.0
…
540165.0
540165.0
540165.0
540165.0


In [58]:
# polars.Expr.sort_by

insurance_df.select(pl.col("Policy_Sales_Channel").sort_by("Annual_Premium"))

Policy_Sales_Channel
f64
156.0
26.0
152.0
8.0
157.0
…
124.0
124.0
122.0
26.0


In [59]:
# polars.Expr.limit

df = pl.DataFrame({"Ranking": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
df.select(pl.col("Ranking").reshape((3, 3)))


Ranking
"array[i64, 3]"
"[1, 2, 3]"
"[4, 5, 6]"
"[7, 8, 9]"


# Name Methods 

In [61]:
# polars.Expr.name.to_lowercase

insurance_df.with_columns(pl.all().name.to_lowercase())

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response,gender,age,driving_license,region_code,previously_insured,vehicle_age,vehicle_damage,annual_premium,policy_sales_channel,vintage,response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
11504793,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0
11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1
11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0
11504796,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1


In [62]:
# polars.Expr.name.to_uppercase
insurance_df.with_columns(pl.all().name.to_uppercase())

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response,ID,GENDER,AGE,DRIVING_LICENSE,REGION_CODE,PREVIOUSLY_INSURED,VEHICLE_AGE,VEHICLE_DAMAGE,ANNUAL_PREMIUM,POLICY_SALES_CHANNEL,VINTAGE,RESPONSE
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64,i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0,0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",65101.0,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1,1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",58911.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0,2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",38043.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0,3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",2630.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0,4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",31951.0,152.0,294,0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
11504793,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0,11504793,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",27412.0,26.0,218,0
11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1,11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",29509.0,152.0,115,1
11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0,11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",2630.0,152.0,189,0
11504796,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1,11504796,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",48443.0,26.0,274,1


In [None]:
# polars.Expr.name.map

df.with_columns(pl.all().reverse().name.map(lambda c: c.rstrip("_reverse").lower()))

In [63]:
#  polars.Expr.name.keep
insurance_df.with_columns((pl.col("Annual_Premium") * 9).alias("c").name.keep())

id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
i64,str,i64,i64,f64,i64,str,str,f64,f64,i64,i64
0,"""Male""",21,1,35.0,0,"""1-2 Year""","""Yes""",585909.0,124.0,187,0
1,"""Male""",43,1,28.0,0,"""> 2 Years""","""Yes""",530199.0,26.0,288,1
2,"""Female""",25,1,14.0,1,"""< 1 Year""","""No""",342387.0,152.0,254,0
3,"""Female""",35,1,1.0,0,"""1-2 Year""","""Yes""",23670.0,156.0,76,0
4,"""Female""",36,1,15.0,1,"""1-2 Year""","""No""",287559.0,152.0,294,0
…,…,…,…,…,…,…,…,…,…,…,…
11504793,"""Male""",48,1,6.0,0,"""1-2 Year""","""Yes""",246708.0,26.0,218,0
11504794,"""Female""",26,1,36.0,0,"""< 1 Year""","""Yes""",265581.0,152.0,115,1
11504795,"""Female""",29,1,32.0,1,"""< 1 Year""","""No""",23670.0,152.0,189,0
11504796,"""Female""",51,1,28.0,0,"""1-2 Year""","""Yes""",435987.0,26.0,274,1


# Operators 

In [64]:
# polars.Expr.and_

df = pl.DataFrame({"Ranking":[2,4,5,8,6],"GDP":[22,87,54,65,13]})
df.filter(
    (pl.col("Ranking") > 2).and_(pl.col("GDP") < 54)
)

Ranking,GDP
i64,i64
6,13


In [65]:
# polars.Expr.or_
df.filter(
    (pl.col("Ranking") > 2).or_(pl.col("GDP") < 54)
)

Ranking,GDP
i64,i64
2,22
4,87
5,54
8,65
6,13


In [66]:
# polars.Expr.ge


df.filter(pl.col("Ranking").ge(3))

Ranking,GDP
i64,i64
4,87
5,54
8,65
6,13


In [67]:
# polars.Expr.gt
df.filter(pl.col("Ranking").gt(3))

Ranking,GDP
i64,i64
4,87
5,54
8,65
6,13


In [68]:
#polars.Expr.le
df.filter(pl.col("Ranking").gt(4))

Ranking,GDP
i64,i64
5,54
8,65
6,13


In [69]:
# polars.Expr.lt
df.filter(pl.col("Ranking").lt(4))

Ranking,GDP
i64,i64
2,22


In [71]:
# polars.Expr.ne

df.filter(pl.col("Ranking").ne(2))

Ranking,GDP
i64,i64
4,87
5,54
8,65
6,13


In [72]:
# polars.Expr.add

df.with_columns(
    (pl.col("Ranking") + pl.col("GDP")).alias("Total")
)

Ranking,GDP,Total
i64,i64,i64
2,22,24
4,87,91
5,54,59
8,65,73
6,13,19


In [75]:
# polars.Expr.floordiv
df.with_columns(
    (pl.col("Ranking").floordiv(3).alias("New_Ranking"))
)


Ranking,GDP,New_Ranking
i64,i64,i64
2,22,0
4,87,1
5,54,1
8,65,2
6,13,2


# Windows Function 

In [78]:
# polars.Expr.over

df = pl.DataFrame({
    "Department": ["Aerial", "Aerial", "Aerial", "Biscuits", "Biscuits", "Biscuits"],
    "Sales": [200, 150, 300, 400, 250, 500]
})

df.with_columns(
    pl.col("Sales").cum_sum().over("Department").alias("CumulativeSales")
)

Department,Sales,CumulativeSales
str,i64,i64
"""Aerial""",200,200
"""Aerial""",150,350
"""Aerial""",300,650
"""Biscuits""",400,400
"""Biscuits""",250,650
"""Biscuits""",500,1150


In [81]:
# polars.Expr.rolling

df = pl.DataFrame({
    "Date": ["2023-08-01", "2023-08-02", "2023-08-03", "2023-08-04", "2023-08-05"],
    "Sales": [100, 150, 200, 250, 300]
})

df.with_columns(
    pl.col("Sales").rolling_sum(window_size=3).alias("RollingMean")
)

Date,Sales,RollingMean
str,i64,i64
"""2023-08-01""",100,
"""2023-08-02""",150,
"""2023-08-03""",200,450.0
"""2023-08-04""",250,600.0
"""2023-08-05""",300,750.0


In [82]:
df.with_columns(
    pl.col("Sales").rolling_mean(window_size=3).alias("RollingMean")
)

Date,Sales,RollingMean
str,i64,f64
"""2023-08-01""",100,
"""2023-08-02""",150,
"""2023-08-03""",200,150.0
"""2023-08-04""",250,200.0
"""2023-08-05""",300,250.0


# Temporal Functions

In [84]:
df = pl.DataFrame({
    "Timestamp": [
        "2023-08-01 12:34:56",
        "2023-08-02 14:22:11",
        "2023-08-03 08:19:03"
    ]
})
df = df.with_columns(
    pl.col("Timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")
)

In [85]:
# polars.Expr.dt.date

df.with_columns(
    pl.col("Timestamp").dt.date().alias("Date")
)

Timestamp,Date
datetime[μs],date
2023-08-01 12:34:56,2023-08-01
2023-08-02 14:22:11,2023-08-02
2023-08-03 08:19:03,2023-08-03


In [86]:
# polars.Expr.dt.day

df.with_columns(
    pl.col("Timestamp").dt.day().alias("Day")
)

Timestamp,Day
datetime[μs],i8
2023-08-01 12:34:56,1
2023-08-02 14:22:11,2
2023-08-03 08:19:03,3


In [87]:
# polars.Expr.dt.hour
df.with_columns(
    pl.col("Timestamp").dt.hour().alias("Hour")
)


Timestamp,Hour
datetime[μs],i8
2023-08-01 12:34:56,12
2023-08-02 14:22:11,14
2023-08-03 08:19:03,8


In [88]:
# polars.Expr.dt.minute
df.with_columns(
    pl.col("Timestamp").dt.minute().alias("Minute")
)


Timestamp,Minute
datetime[μs],i8
2023-08-01 12:34:56,34
2023-08-02 14:22:11,22
2023-08-03 08:19:03,19


In [89]:
# polars.Expr.dt.month
df.with_columns(
    pl.col("Timestamp").dt.month().alias("Month")
)


Timestamp,Month
datetime[μs],i8
2023-08-01 12:34:56,8
2023-08-02 14:22:11,8
2023-08-03 08:19:03,8


In [90]:
# polars.Expr.dt.time

df.with_columns(
    pl.col("Timestamp").dt.time().alias("Time")
)


Timestamp,Time
datetime[μs],time
2023-08-01 12:34:56,12:34:56
2023-08-02 14:22:11,14:22:11
2023-08-03 08:19:03,08:19:03


# There are some other methods for list , string and List expressions which can be practiced on own. 