Why Polars over Pandas?

Polars was built from the ground up to be blazingly fast and can do common operations around 5–10 times faster than pandas. In addition, the memory requirement for Polars operations is significantly smaller than for pandas: pandas requires around 5 to 10 times as much RAM as the size of the dataset to carry out operations, compared to the 2 to 4 times needed for Polars.

- Written in Rust, similar to C, C++ in terms of execution speed.
- Pandas built on top of Python package Numpy which uses C, and struggles with how it handles string.
- Better concurrency (all cores can be utilized)
- Based on apache arrow, interoperability.
    - It uses standarized data format that can be used across different popular library pandas, spark, kudu, parquet, etc.
    - Avoidin serialize and deserialization.

In [23]:
#!pip install polars 

import polars as pl

In [24]:
"""Create Polars Dataframe"""

pl.DataFrame(
    {
        "name" : ["A", "B","C"],
        "age": [1, 2, 3]
    }
)

name,age
str,i64
"""A""",1
"""B""",2
"""C""",3


In [25]:
"""Read a csv using polars.read_csv"""

data = pl.read_csv('./titanic.csv')
data.head()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S"""


In [26]:
"""data type of columns"""

data.dtypes

[Int64,
 Int64,
 Int64,
 String,
 String,
 Float64,
 Int64,
 Int64,
 String,
 Float64,
 String,
 String]

In [27]:
"""schema or columns of your data"""

data.schema

Schema([('PassengerId', Int64),
        ('Survived', Int64),
        ('Pclass', Int64),
        ('Name', String),
        ('Sex', String),
        ('Age', Float64),
        ('SibSp', Int64),
        ('Parch', Int64),
        ('Ticket', String),
        ('Fare', Float64),
        ('Cabin', String),
        ('Embarked', String)])

In [28]:
pl.read_csv('./titanic.csv', infer_schema=False).head()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
str,str,str,str,str,str,str,str,str,str,str,str
"""1""","""0""","""3""","""Braund, Mr. Owen Harris""","""male""","""22""","""1""","""0""","""A/5 21171""","""7.25""",,"""S"""
"""2""","""1""","""1""","""Cumings, Mrs. John Bradley (Fl…","""female""","""38""","""1""","""0""","""PC 17599""","""71.2833""","""C85""","""C"""
"""3""","""1""","""3""","""Heikkinen, Miss. Laina""","""female""","""26""","""0""","""0""","""STON/O2. 3101282""","""7.925""",,"""S"""
"""4""","""1""","""1""","""Futrelle, Mrs. Jacques Heath (…","""female""","""35""","""1""","""0""","""113803""","""53.1""","""C123""","""S"""
"""5""","""0""","""3""","""Allen, Mr. William Henry""","""male""","""35""","""0""","""0""","""373450""","""8.05""",,"""S"""


In [29]:
"""Only columns of the data"""

data.columns

['PassengerId',
 'Survived',
 'Pclass',
 'Name',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Ticket',
 'Fare',
 'Cabin',
 'Embarked']

In [30]:
"""Rows and Columns"""

data.shape

(891, 12)

In [31]:
"""Basic Stats on data"""

data.describe()

statistic,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
str,f64,f64,f64,str,str,f64,f64,f64,str,f64,str,str
"""count""",891.0,891.0,891.0,"""891""","""891""",714.0,891.0,891.0,"""891""",891.0,"""204""","""889"""
"""null_count""",0.0,0.0,0.0,"""0""","""0""",177.0,0.0,0.0,"""0""",0.0,"""687""","""2"""
"""mean""",446.0,0.383838,2.308642,,,29.699118,0.523008,0.381594,,32.204208,,
"""std""",257.353842,0.486592,0.836071,,,14.526497,1.102743,0.806057,,49.693429,,
"""min""",1.0,0.0,1.0,"""Abbing, Mr. Anthony""","""female""",0.42,0.0,0.0,"""110152""",0.0,"""A10""","""C"""
"""25%""",224.0,0.0,2.0,,,20.0,0.0,0.0,,7.925,,
"""50%""",446.0,0.0,3.0,,,28.0,0.0,0.0,,14.4542,,
"""75%""",669.0,1.0,3.0,,,38.0,1.0,0.0,,31.0,,
"""max""",891.0,1.0,3.0,"""van Melkebeke, Mr. Philemon""","""male""",80.0,8.0,6.0,"""WE/P 5735""",512.3292,"""T""","""S"""


In [35]:
"""Pick Random Samples with replacement"""

data.sample(n=10, seed=0)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
395,1,3,"""Sandstrom, Mrs. Hjalmar (Agnes…","""female""",24.0,0,2,"""PP 9549""",16.7,"""G6""","""S"""
388,1,2,"""Buss, Miss. Kate""","""female""",36.0,0,0,"""27849""",13.0,,"""S"""
867,1,2,"""Duran y More, Miss. Asuncion""","""female""",27.0,1,0,"""SC/PARIS 2149""",13.8583,,"""C"""
410,0,3,"""Lefebre, Miss. Ida""","""female""",,3,1,"""4133""",25.4667,,"""S"""
795,0,3,"""Dantcheff, Mr. Ristiu""","""male""",25.0,0,0,"""349203""",7.8958,,"""S"""
837,0,3,"""Pasic, Mr. Jakob""","""male""",21.0,0,0,"""315097""",8.6625,,"""S"""
523,0,3,"""Lahoud, Mr. Sarkis""","""male""",,0,0,"""2624""",7.225,,"""C"""
406,0,2,"""Gale, Mr. Shadrach""","""male""",34.0,1,0,"""28664""",21.0,,"""S"""
352,0,1,"""Williams-Lambert, Mr. Fletcher…","""male""",,0,0,"""113510""",35.0,"""C128""","""S"""
730,0,3,"""Ilmakangas, Miss. Pieta Sofia""","""female""",25.0,1,0,"""STON/O2. 3101271""",7.925,,"""S"""


In [39]:
"""Handle sensitive information using hash_rows"""

data.hash_rows()

12390185154443768781
4272254447490207055
11748959180842246540
6284992647284586780
10595624317311043842
…
13511004476170039329
6740564057463475827
14819379494895030569
12041094271256195589
16566577748700670310


In [42]:
"""Get size of the dataframe"""

data.estimated_size('mb')

0.08188438415527344

In [43]:
"""Getting count across columns"""

data.count()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
891,891,891,891,891,714,891,891,891,891,204,889


In [44]:
"""Find nulls across columns"""

data.null_count()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,177,0,0,0,0,687,2


In [45]:
"""Remove rows containing single null in a column"""

data.drop_nulls()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S"""
11,1,3,"""Sandstrom, Miss. Marguerite Ru…","""female""",4.0,1,1,"""PP 9549""",16.7,"""G6""","""S"""
12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S"""
…,…,…,…,…,…,…,…,…,…,…,…
872,1,1,"""Beckwith, Mrs. Richard Leonard…","""female""",47.0,1,1,"""11751""",52.5542,"""D35""","""S"""
873,0,1,"""Carlsson, Mr. Frans Olof""","""male""",33.0,0,0,"""695""",5.0,"""B51 B53 B55""","""S"""
880,1,1,"""Potter, Mrs. Thomas Jr (Lily A…","""female""",56.0,0,1,"""11767""",83.1583,"""C50""","""C"""
888,1,1,"""Graham, Miss. Margaret Edith""","""female""",19.0,0,0,"""112053""",30.0,"""B42""","""S"""
