In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# sample data
df = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green', 'Red', 'Green']
})

# OneHotEncoder with sparse_output=True (default)
ohe_sparse = OneHotEncoder(drop='first', sparse_output=True, dtype=np.int32)
encoded_sparse = ohe_sparse.fit_transform(df[['Color']])

print("Sparse Matrix Output:")
print(encoded_sparse)  # shows compressed sparse format
print()
print("\nType:", type(encoded_sparse))

# convert sparse matrix to dense array for viewing
encoded_dense = encoded_sparse.toarray()
print("\nDense Array Equivalent:")
print(encoded_dense)

# OneHotEncoder with sparse_output=False
ohe_dense = OneHotEncoder(drop='first', sparse_output=False, dtype=np.int32)
encoded_dense_direct = ohe_dense.fit_transform(df[['Color']])

print("\nDirect Dense Output (sparse_output=False):")
print(encoded_dense_direct)
print("\nType:", type(encoded_dense_direct))


Sparse Matrix Output:
<Compressed Sparse Row sparse matrix of dtype 'int32'
	with 4 stored elements and shape (5, 2)>
  Coords	Values
  (0, 1)	1
  (2, 0)	1
  (3, 1)	1
  (4, 0)	1


Type: <class 'scipy.sparse._csr.csr_matrix'>

Dense Array Equivalent:
[[0 1]
 [0 0]
 [1 0]
 [0 1]
 [1 0]]

Direct Dense Output (sparse_output=False):
[[0 1]
 [0 0]
 [1 0]
 [0 1]
 [1 0]]

Type: <class 'numpy.ndarray'>
