### Polars Series - Lesson III - GroupBy and Simple Data Analysis

In [103]:
import polars as pl

In [104]:
data = pl.read_csv('./titanic.csv')
data.head()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S"""


In [105]:
"""Simple GroupBy operation: How many men and women?"""

data.group_by('Sex').len()

Sex,len
str,u32
"""male""",577
"""female""",314


In [106]:
"""Creating percentage for the above result."""

data.group_by('Sex').len().with_columns((pl.col('len')/pl.sum('len')).alias('Percent'))

Sex,len,Percent
str,u32,f64
"""female""",314,0.352413
"""male""",577,0.647587


In [108]:
"""GroupBy operation with multiple columns: Gender and Survival in percentage terms"""

data.group_by(['Sex', 'Survived']).len().with_columns((pl.col('len')/pl.sum('len')).alias('Percent'))

Sex,Survived,len,Percent
str,i64,u32,f64
"""female""",0,81,0.090909
"""male""",0,468,0.525253
"""male""",1,109,0.122334
"""female""",1,233,0.261504


In [109]:
"""Simple GroupBy with aggregations: Gender and mean Fare"""

data.group_by('Sex').agg(pl.mean('Fare'))

Sex,Fare
str,f64
"""female""",44.479818
"""male""",25.523893


In [111]:
"""Complex GroupBy with Aggregation - I: Which Pclass with Sex has survived?"""

data.group_by(['Pclass', 'Sex']).agg(pl.col('Survived').value_counts())

Pclass,Sex,Survived
i64,str,list[struct[2]]
3,"""female""","[{0,72}, {1,72}]"
1,"""female""","[{1,91}, {0,3}]"
2,"""female""","[{0,6}, {1,70}]"
3,"""male""","[{1,47}, {0,300}]"
1,"""male""","[{1,45}, {0,77}]"
2,"""male""","[{1,17}, {0,91}]"


In [115]:
"""Complex Groupby with aggregation - II: Percentage of male survived?"""

data.group_by('Survived').agg(pl.arg_where(pl.col('Sex')=='male').len()).with_columns((pl.col('Sex')/pl.sum('Sex')).alias('Percent'))

Survived,Sex,Percent
i64,u32,f64
0,468,0.811092
1,109,0.188908


In [116]:
"""Percentage of female survived?"""

data.group_by('Survived').agg(pl.arg_where(pl.col('Sex')=='female').len()).with_columns((pl.col('Sex')/pl.sum('Sex')).alias('Percent'))

Survived,Sex,Percent
i64,u32,f64
0,81,0.257962
1,233,0.742038


**Simple Data Analysis**

In [117]:
"""How money matter for survival?"""

data.group_by('Survived').agg(pl.mean('Fare'))

Survived,Fare
i64,f64
0,22.117887
1,48.395408


In [118]:
"""Does Age matters for survival?"""

data.group_by('Survived').agg(pl.mean('Age'))

Survived,Age
i64,f64
0,30.626179
1,28.34369


In [120]:
"""Does Gender matters for survival?"""

data.filter(pl.col('Survived')==1)['Sex'].value_counts()

Sex,count
str,u32
"""female""",233
"""male""",109
