# Pandasのstylerを使ったDataFrameの装飾

In [30]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML

In [31]:
# サンプルデータ読み込み
df = pd.DataFrame(
    [
        [11, None, 15, 4, 5],
        [2, 12, 10, 8, 16],
        [17, 24, 14, 13, 23],
        [19, 6, 18, None, 7],
        [3, 9, 20, 21, 1],
    ],
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)
df

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


In [44]:
display(df)

# スタイルを変更すると、DataFrameオブジェクトではなく、Stylerオブジェクトに変わる。
# Notebookでは、dfと同じように出力可能
print(type(df))
print(type(df.style))

display(df.style)

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.io.formats.style.Styler'>


Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


In [46]:
# Stylerはあくまで見た目を整えるものっぽく、Stylerのままデータハンドリングは出来なさそう
df.style[['a']]

TypeError: 'Styler' object is not subscriptable

## 強調表現

### 欠損値を強調(`Styler.highlight_null()`)

In [133]:
# サンプルデータ読み込み
df = pd.DataFrame(
    [
        [11, None, 15, 4, 5],
        [2, 12, 10, 8, 16],
        [17, 24, 14, 13, 23],
        [19, 6, 18, None, 7],
        [3, 9, 20, 21, 1],
    ],
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

style = df.style.highlight_null(null_color='skyblue')
display(style)

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


### 最小値/最大値を強調(`Styler.highlight_min()`/`Styler.highlight_max()`)

In [77]:
# サンプルデータ読み込み
df = pd.DataFrame(
    [
        [11, None, 15, 4, 5],
        [2, 12, 10, 8, 16],
        [17, 24, 14, 13, 23],
        [19, 6, 18, None, 7],
        [3, 9, 20, 21, 1],
    ],
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

df.style.highlight_min(color="green", axis=1, subset=['A', 'C'])

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


### 値の大きさに応じて背景のバーを表示 (`Styler.bar()`)

In [61]:
# サンプルデータ読み込み
df = pd.DataFrame(
    {
        "A": [132, -107, -162, 96, 145, -133, 12, 35, 168, -12],
        "B": [0, -143, 21, 10, 105, 56, 120, 103, -132, 63],
        "C": [-31, 56, 67, 0, 16, 139, 0, -38, 142, -58]
    }
)

In [75]:
df.style.bar(color=["red", "green"], align="mid", axis=0)

Unnamed: 0,A,B,C
0,132,0,-31
1,-107,-143,56
2,-162,21,67
3,96,10,0
4,145,105,16
5,-133,56,139
6,12,120,0
7,35,103,-38
8,168,-132,142
9,-12,63,-58


### 値の大きさに応じて背景をグラデーション(`Styler.background_gradient()`)

In [89]:
# サンプルデータ
df = pd.DataFrame(
    np.arange(25).reshape(5, 5),
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

display(df)

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


In [128]:
# 単純に適用し、ヒートマップ化
display(df.style.background_gradient(cmap='Reds', axis=None))

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


In [130]:
# 列ごとにグラデーション
# text_color_threshold: テキストを白黒どちらで判断するのかの閾値
display(df.style.background_gradient(axis=0, cmap='Reds', text_color_threshold=0, subset=['A', 'C']))

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


In [92]:
# 行ごとにグラデーション
display(df.style.background_gradient(axis=1))

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


## 書式設定

### 小数点表示桁数設定(`Styler.set_precision()`)

In [79]:
# サンプルデータ読み込み
df = pd.DataFrame(
    [
        [11, None, 15, 4, 5],
        [2, 12, 10, 8, 16],
        [17, 24, 14, 13, 23],
        [19, 6, 18, None, 7],
        [3, 9, 20, 21, 1],
    ],
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

display(df)

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


In [80]:
display(df.style.set_precision(2))

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


### 欠損値の表示方法を変更(`Styler.set_na_rep()`)

In [86]:
# サンプルデータ読み込み
df = pd.DataFrame(
    [
        [11, None, 15, 4, 5],
        [2, 12, 10, 8, 16],
        [17, 24, 14, 13, 23],
        [19, 6, 18, None, 7],
        [3, 9, 20, 21, 1],
    ],
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

display(df)

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


In [87]:
df.style.set_na_rep("-")

Unnamed: 0,A,B,C,D,E
a,11,-,15,4.000000,5
b,2,12.000000,10,8.000000,16
c,17,24.000000,14,13.000000,23
d,19,6.000000,18,-,7
e,3,9.000000,20,21.000000,1


### 書式の設定(`Styler.format()`)

In [82]:
# サンプルデータ読み込み
df = pd.DataFrame(
    [
        [11, None, 15, 4, 5],
        [2, 12, 10, 8, 16],
        [17, 24, 14, 13, 23],
        [19, 6, 18, None, 7],
        [3, 9, 20, 21, 1],
    ],
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

df2 = (df / df.sum(axis=0))

display(df2)

Unnamed: 0,A,B,C,D,E
a,0.211538,,0.194805,0.086957,0.096154
b,0.038462,0.235294,0.12987,0.173913,0.307692
c,0.326923,0.470588,0.181818,0.282609,0.442308
d,0.365385,0.117647,0.233766,,0.134615
e,0.057692,0.176471,0.25974,0.456522,0.019231


In [84]:
# パーセントフォーマットに変更、欠損値の表示を-に変更
display(df2.style.format("{:.0%}", na_rep="-"))

Unnamed: 0,A,B,C,D,E
a,21%,-,19%,9%,10%
b,4%,24%,13%,17%,31%
c,33%,47%,18%,28%,44%
d,37%,12%,23%,-,13%
e,6%,18%,26%,46%,2%


In [85]:
# dictを使って列ごとに異なる書式にすることも可能
display(df2.style.format({"A": "{:.0%}", "B": "{:.2%}"}, na_rep="-"))

Unnamed: 0,A,B,C,D,E
a,21%,-,0.194805,0.086957,0.096154
b,4%,23.53%,0.12987,0.173913,0.307692
c,33%,47.06%,0.181818,0.282609,0.442308
d,37%,11.76%,0.233766,,0.134615
e,6%,17.65%,0.25974,0.456522,0.019231


## CSS設定

### CSSプロパティの設定(`Styler.set_table_styles()`)
各スタイルは下記構造で指定

```
{
    {
        'selector': <CSSセレクタ>, 
        'props': [<プロパティ名>, <値>], 
        ...
    }, 
    ...
}
```

In [105]:
#  サンプルデータ読み込み
df = pd.DataFrame(
    np.arange(25).reshape(5, 5),
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

display(df)

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


In [108]:
# css設定 (テーブルにマウスオーバーすると、行方向に黄色でハイライト)
style = df.style.set_table_styles(
    [{"selector": "tr:hover", "props": [("background-color", "yellow")]}]
)

display(style)

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


### 行/列ごとにCSSを適用 (`Styler.apply()`)

In [109]:
#  サンプルデータ読み込み
df = pd.DataFrame(
    np.arange(25).reshape(5, 5),
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

display(df)

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


In [111]:
# 行、または列のSeriesを対象に、各要素に適用するCSSプロパティの一覧を返す関数を指定
def highlight_max(s):
    return ["background-color: green" if x == s.max() else "" for x in s]

In [121]:
# CSSを行または列方向に適用
display(df.style.apply(highlight_max, axis=0))


Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


### セルごとにCSSを適用 (`Styler.applymap()`)

In [122]:
#  サンプルデータ読み込み
df = pd.DataFrame(
    np.arange(25).reshape(5, 5),
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

display(df)

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


In [123]:
# セルごとにCSSを生成する関数
def highlight_even(x):
    return "background-color: green" if x % 2 == 0 else ""

In [124]:
# CSSをセルごとに適用
display(df.style.applymap(highlight_even))

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24


## Tips


### 複数メソッドの同時適用

In [104]:
# 複数適用する場合はメソッドを数珠繋ぎにする。
df = pd.DataFrame(
    [
        [11, None, 15, 4, 5],
        [2, 12, 10, 8, 16],
        [17, 24, 14, 13, 23],
        [19, 6, 18, None, 7],
        [3, 9, 20, 21, 1],
    ],
    index=["a", "b", "c", "d", "e"],
    columns=["A", "B", "C", "D", "E"],
)

# 数珠繋ぎ
style = (
    df.style.highlight_null().highlight_min(color="green").highlight_max(color="blue")
)

display(style)

Unnamed: 0,A,B,C,D,E
a,11,,15,4.0,5
b,2,12.0,10,8.0,16
c,17,24.0,14,13.0,23
d,19,6.0,18,,7
e,3,9.0,20,21.0,1


### 色の定義
基本的にmatplotlibのcolor palleteをつかう。以下を参照   
https://matplotlib.org/stable/tutorials/colors/colormaps.html

## Reference
- [Styler Doc (Pandas Doc.)](https://pandas.pydata.org/docs/reference/style.html)
- [Table Visualization (Pandas Doc.)](https://pandas.pydata.org/docs/user_guide/style.html)
- [Matplotlib Color Pallete](https://matplotlib.org/stable/tutorials/colors/colormaps.html)