# Polarsテクニック

In [2]:
import polars as pl

## - 正規表現による不要な列の選定
次のようなDataframeがあり、  
全行の値が一つ以上の"0"または"00:00:00"となっているカラムを不要なカラムとして考える。  
この不要なカラムを正規表現（下記）を用いて選定する。  
```
df[col].str.contains(regex_pattern).all():
```

In [3]:
df = pl.DataFrame({
    "col1": [1, 2, 3, 4, 5, 6],
    "col2": ["000000", "000000", "000000", "000000", "000000", "000000"],
    "col3": ["000001", "000001", "000001", "000001", "000001", "000001"],
    "col4": ["0000000000", "0000000000", "0000000000", "0000000000", "0000000000", "0000000000"],
    "col5": ["00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00"],
    "col6": ["000001", "000002", "000003", "000004", "000005", "000006"],
    "col7": ["00", "00", "00", "00", "00", "00"],
})

df

col1,col2,col3,col4,col5,col6,col7
i64,str,str,str,str,str,str
1,"""000000""","""000001""","""0000000000""","""00:00:00""","""000001""","""00"""
2,"""000000""","""000001""","""0000000000""","""00:00:00""","""000002""","""00"""
3,"""000000""","""000001""","""0000000000""","""00:00:00""","""000003""","""00"""
4,"""000000""","""000001""","""0000000000""","""00:00:00""","""000004""","""00"""
5,"""000000""","""000001""","""0000000000""","""00:00:00""","""000005""","""00"""
6,"""000000""","""000001""","""0000000000""","""00:00:00""","""000006""","""00"""


In [4]:
# 正規表現を定義。0または:が一つ以上末尾まで続く文字列にマッチするようにする。n回以上なら{n,}
pattern = "[0:]+$"

In [9]:
useless_cols = []

# 不要なカラムを抽出
for col in df.columns:
    # .all()はカラムの全値が条件にマッチするか判定し、Booleanを返す
    if df[col].str.contains(pattern).all():
        useless_cols.append(col)


In [10]:
# 補足：リスト内包表記で書くとすっきりかける
useless_cols = [col for col in df.columns if df[col].str.contains(pattern).all()]

In [11]:
useless_cols

['col2', 'col4', 'col5', 'col7']

想定通りのカラムが選定できた。