# `pd.cut()` & `pd.qcut()`
`pd.cut`: 按照數據的`值`進行分割 (每個bins的數據量`不一樣`，但bins數值區間距離`差不多`) <br>
`pd.qcut`: 按照數據本身的`數量`進行分割 (每個bins的數據量`差不多`，但bins數值區間距離`不一樣`) 

---

In [1]:
import pandas as pd 
import numpy as np 

In [2]:
df = pd.DataFrame({'number':np.random.randint(1, 100, 10)}) 
df

Unnamed: 0,number
0,35
1,27
2,3
3,30
4,30
5,81
6,91
7,25
8,36
9,2


---

## `pd.cut()`

***Example 1.***<br>
- Customized bins

In [3]:
df['cut_group'] = pd.cut(df['number'], bins=[1, 20, 40, 60, 80, 100]) 
df

Unnamed: 0,number,cut_group
0,35,"(20, 40]"
1,27,"(20, 40]"
2,3,"(1, 20]"
3,30,"(20, 40]"
4,30,"(20, 40]"
5,81,"(80, 100]"
6,91,"(80, 100]"
7,25,"(20, 40]"
8,36,"(20, 40]"
9,2,"(1, 20]"


In [4]:
df['cut_group'].unique()

[(20, 40], (1, 20], (80, 100]]
Categories (3, interval[int64]): [(1, 20] < (20, 40] < (80, 100]]

In [5]:
df['cut_group'].value_counts()

(20, 40]     6
(80, 100]    2
(1, 20]      2
(60, 80]     0
(40, 60]     0
Name: cut_group, dtype: int64

***Example 2.***<br>
- Add the label

In [6]:
df['cut_group_with_label'] = pd.cut(x = df['number'], bins = [1, 20, 40, 60, 80, 100]
                    ,labels = ['1 to 20', '21 to 40', '41 to 60', '61 to 80', '81 to 100'])
df

Unnamed: 0,number,cut_group,cut_group_with_label
0,35,"(20, 40]",21 to 40
1,27,"(20, 40]",21 to 40
2,3,"(1, 20]",1 to 20
3,30,"(20, 40]",21 to 40
4,30,"(20, 40]",21 to 40
5,81,"(80, 100]",81 to 100
6,91,"(80, 100]",81 to 100
7,25,"(20, 40]",21 to 40
8,36,"(20, 40]",21 to 40
9,2,"(1, 20]",1 to 20


***Example 2.***<br>
- automated bins (each bin's range are basically the `same`.)

In [7]:
df['cut_group_auto_bin'] = pd.cut(df['number'], 4) 
df

Unnamed: 0,number,cut_group,cut_group_with_label,cut_group_auto_bin
0,35,"(20, 40]",21 to 40,"(24.25, 46.5]"
1,27,"(20, 40]",21 to 40,"(24.25, 46.5]"
2,3,"(1, 20]",1 to 20,"(1.911, 24.25]"
3,30,"(20, 40]",21 to 40,"(24.25, 46.5]"
4,30,"(20, 40]",21 to 40,"(24.25, 46.5]"
5,81,"(80, 100]",81 to 100,"(68.75, 91.0]"
6,91,"(80, 100]",81 to 100,"(68.75, 91.0]"
7,25,"(20, 40]",21 to 40,"(24.25, 46.5]"
8,36,"(20, 40]",21 to 40,"(24.25, 46.5]"
9,2,"(1, 20]",1 to 20,"(1.911, 24.25]"


In [8]:
df['cut_group_auto_bin'].value_counts()

(24.25, 46.5]     6
(68.75, 91.0]     2
(1.911, 24.25]    2
(46.5, 68.75]     0
Name: cut_group_auto_bin, dtype: int64

---

## `pd.qcut()`

In [9]:
df['qcut_group'] = pd.qcut(df['number'], 4)
df

Unnamed: 0,number,cut_group,cut_group_with_label,cut_group_auto_bin,qcut_group
0,35,"(20, 40]",21 to 40,"(24.25, 46.5]","(30.0, 35.75]"
1,27,"(20, 40]",21 to 40,"(24.25, 46.5]","(25.5, 30.0]"
2,3,"(1, 20]",1 to 20,"(1.911, 24.25]","(1.999, 25.5]"
3,30,"(20, 40]",21 to 40,"(24.25, 46.5]","(25.5, 30.0]"
4,30,"(20, 40]",21 to 40,"(24.25, 46.5]","(25.5, 30.0]"
5,81,"(80, 100]",81 to 100,"(68.75, 91.0]","(35.75, 91.0]"
6,91,"(80, 100]",81 to 100,"(68.75, 91.0]","(35.75, 91.0]"
7,25,"(20, 40]",21 to 40,"(24.25, 46.5]","(1.999, 25.5]"
8,36,"(20, 40]",21 to 40,"(24.25, 46.5]","(35.75, 91.0]"
9,2,"(1, 20]",1 to 20,"(1.911, 24.25]","(1.999, 25.5]"


In [10]:
df['qcut_group'].value_counts()

(35.75, 91.0]    3
(25.5, 30.0]     3
(1.999, 25.5]    3
(30.0, 35.75]    1
Name: qcut_group, dtype: int64