## 等寬

In [1]:
from sklearn.preprocessing import KBinsDiscretizer
import numpy as np

# 原始數據
data = np.array([[1], [2], [3], [4], [5]])

# 使用等寬分箱將數據分成 3 個區間，並用編碼表示
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
binned_data = discretizer.fit_transform(data)

print(binned_data)


[[0.]
 [0.]
 [1.]
 [2.]
 [2.]]


## 等頻

In [3]:
# 使用等頻分箱將數據分成 3 個區間
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='quantile')
binned_data = discretizer.fit_transform(data)

print(binned_data)


[[0.]
 [0.]
 [1.]
 [2.]
 [2.]]


## K-means

In [4]:
# 使用 KMeans 分箱將數據分成 3 個區間
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='kmeans')
binned_data = discretizer.fit_transform(data)

print(binned_data)


[[0.]
 [0.]
 [1.]
 [2.]
 [2.]]


## Encode

In [5]:
# 使用等寬分箱和 One-Hot 編碼
discretizer = KBinsDiscretizer(n_bins=3, encode='onehot-dense', strategy='uniform')
binned_data = discretizer.fit_transform(data)

print(binned_data)


[[1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [7]:
# 使用等寬分箱和 One-Hot 編碼
discretizer = KBinsDiscretizer(n_bins=3, encode='onehot', strategy='uniform')
binned_data = discretizer.fit_transform(data)

print(binned_data)


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10 stored elements and shape (5, 6)>
  Coords	Values
  (0, 0)	1.0
  (0, 3)	1.0
  (1, 0)	1.0
  (1, 3)	1.0
  (2, 1)	1.0
  (2, 4)	1.0
  (3, 2)	1.0
  (3, 5)	1.0
  (4, 2)	1.0
  (4, 5)	1.0


In [6]:
# 多個特徵的數據
data = np.array([[1, 10], [2, 15], [3, 25], [4, 35], [5, 45]])

# 使用等寬分箱將兩個特徵分別分成 3 個區間
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
binned_data = discretizer.fit_transform(data)

print(binned_data)


[[0. 0.]
 [0. 0.]
 [1. 1.]
 [2. 2.]
 [2. 2.]]
