# 行のフィルタリングと並び替え

In [1]:
import os

import polars as pl

## 定数定義

In [2]:
DATA_PAR_PATH = os.path.join('..','..','data')
INPUT_CSV_PATH_TOOLS = os.path.join(DATA_PAR_PATH,'tools.csv')

## データの確認

In [3]:
tools = pl.read_csv(INPUT_CSV_PATH_TOOLS)
tools

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0


## フィルタリング

### 式を利用したフィルタリング

In [4]:
tools.filter(
    pl.col('cordless') &
    (pl.col('brand') == 'Makita')
)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


In [5]:
tools.filter(
    pl.col('cordless'),  # bool値のデータなので`==True`は自明
    pl.col('brand') == 'Makita'
)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


### 列名によるフィルタリング

In [6]:
tools.filter('cordless')

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0


pythonであれば`True`以外でも、`1`や`[1]`といったint型やlist型（要素数1以上）の値に対してもTrueの判定を処理できるが、polarsはboolean型のTrueのみを受け付ける。  
Rustで組まれているという背景があり、pythonの言語仕様が完全に使えるわけではない点に注意

### 制約によるフィルタリング

In [7]:
tools.filter(cordless=True, brand='Makita')

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


このときは、パラメータ名で使用する列名はクォテーションで括る必要ないのか。  
その特徴もあってか、pythonの予約語は使えない模様

## 行の並び替え

### 列による並び替え

In [8]:
tools.sort('price')

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0


In [9]:
tools.sort('price', descending=True)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0


In [10]:
tools.sort('brand', 'price')

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0


In [11]:
tools.sort('brand', 'price', descending=[False, True])

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0


複数のカラムに対してソートを実施するときに逆順を指定する場合は、`descending`はカラムの数だけ指定する必要がある

### 式による並び替え

In [12]:
tools.sort(pl.col('rpm') / pl.col('price'))

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0


### ネストされたデータの並び替え

In [13]:
tools_collection = tools.group_by('brand').agg(collection=pl.struct(pl.all()))
tools_collection

brand,collection
str,list[struct[6]]
"""Makita""","[{""Rotary Hammer"",""HR2230"",""Makita"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",""Makita"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",""Makita"",true,199,11000}]"
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",""DeWalt"",true,129,null}, {""Table Saw"",""DWE7485"",""DeWalt"",false,516,5800}]"
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",""Bosch"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",""Bosch"",false,79,3100}, {""Router"",""POF 1400 ACE"",""Bosch"",false,185,28000}]"


In [14]:
tools_collection.sort(pl.col('collection').list.len(), descending=True)

brand,collection
str,list[struct[6]]
"""Makita""","[{""Rotary Hammer"",""HR2230"",""Makita"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",""Makita"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",""Makita"",true,199,11000}]"
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",""Bosch"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",""Bosch"",false,79,3100}, {""Router"",""POF 1400 ACE"",""Bosch"",false,185,28000}]"
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",""DeWalt"",true,129,null}, {""Table Saw"",""DWE7485"",""DeWalt"",false,516,5800}]"


In [15]:
tools.columns

['tool', 'product', 'brand', 'cordless', 'price', 'rpm']

## 行の操作

In [16]:
tools.drop_nulls('rpm').height

9

In [17]:
(
    tools.with_row_index()
    .gather_every(2).head(3)
)

index,tool,product,brand,cordless,price,rpm
u32,str,str,str,bool,i64,i64
0,"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050
2,"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
4,"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100


In [18]:
tools.top_k(3, by='price')

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500


In [19]:
tools.sample(fraction=0.2)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800


`fraction`で指定した割合に応じて、ランダムサンプリングする操作。  
パラメータに`seed`の設定も当然可能

In [20]:
saws = pl.DataFrame({'tool': ['Table Saw', 'Plunge Cut Saw', 'Miter Saw',
                              'Jigsaw', 'Bandsaw', 'Chainsow', 'Seesaw']})

tools.join(saws, how='semi', on='tool')

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800
