In [1]:
from sklearn.preprocessing import OneHotEncoder

# 原始類別數據
data = [['Red'], ['Blue'], ['Green'], ['Blue'], ['Red']]

# 創建 OneHotEncoder 物件並對數據進行編碼
encoder = OneHotEncoder()
encoded_data = encoder.fit_transform(data).toarray()

print(encoded_data)


[[0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]]


In [2]:
# 有多個類別特徵的數據
data = [['Red', 'Small'], ['Blue', 'Large'], ['Green', 'Medium'], ['Blue', 'Small'], ['Red', 'Medium']]

# 對數據的每個類別特徵進行編碼
encoder = OneHotEncoder()
encoded_data = encoder.fit_transform(data).toarray()

print(encoded_data)


[[0. 0. 1. 0. 0. 1.]
 [1. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 1. 0.]]


## Sparse

In [7]:
# 使用 dense (非稀疏) 輸出
encoder = OneHotEncoder(sparse_output=False)
encoded_data = encoder.fit_transform(data)

print(encoded_data)


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10 stored elements and shape (5, 6)>
  Coords	Values
  (0, 2)	1.0
  (0, 5)	1.0
  (1, 0)	1.0
  (1, 3)	1.0
  (2, 1)	1.0
  (2, 4)	1.0
  (3, 0)	1.0
  (3, 5)	1.0
  (4, 2)	1.0
  (4, 4)	1.0


In [8]:
# 使用 dense (非稀疏) 輸出
encoder = OneHotEncoder(sparse_output=True)
encoded_data = encoder.fit_transform(data)

print(encoded_data)


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10 stored elements and shape (5, 6)>
  Coords	Values
  (0, 2)	1.0
  (0, 5)	1.0
  (1, 0)	1.0
  (1, 3)	1.0
  (2, 1)	1.0
  (2, 4)	1.0
  (3, 0)	1.0
  (3, 5)	1.0
  (4, 2)	1.0
  (4, 4)	1.0


## Handle Unknwon

In [4]:
# 原始數據
train_data = [['Red'], ['Blue'], ['Green']]
test_data = [['Red'], ['Yellow']]

# 設置 handle_unknown='ignore' 來處理未知類別
encoder = OneHotEncoder(handle_unknown='ignore')
encoder.fit(train_data)

# 對測試數據進行編碼
encoded_test_data = encoder.transform(test_data).toarray()

print(encoded_test_data)


[[0. 0. 1.]
 [0. 0. 0.]]


## Drop


In [6]:
# 設置 drop='first'，移除第一個類別來避免多重共線性
encoder = OneHotEncoder(drop='first', sparse_output=False)
encoded_data = encoder.fit_transform(data)

print(encoded_data)


[[0. 1. 0. 1.]
 [0. 0. 0. 0.]
 [1. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 1. 1. 0.]]
