handle_unknown: "error", "ignore"

In [1]:
from typing import Any
import numpy as np

In [2]:
class OneHotEncoder():
    def __init__(self) -> None:
        self._value_to_idx: dict[Any, int] = {}
        self._idx_to_value: dict[int, Any] = {}
        self._dimension: int = 0

    def fit(self, X: np.ndarray) -> None:
        if X.ndim == 1:
            X = X.reshape(-1, 1)

        unique_values = np.unique(X)
        self._value_to_idx = {value: idx for idx, value in enumerate(unique_values)}
        self._idx_to_value = {idx: value for value, idx in self._value_to_idx.items()}
        self._dimension = len(self._value_to_idx)



    def _value_to_one_hot(self, values: list[Any]) -> np.ndarray:
        one_hot = np.zeros(self._dimension)
        ids = [self._value_to_idx[value] for value in values]
        one_hot[ids] = 1
        return one_hot

    def transform(self, X: np.ndarray) -> np.ndarray:
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        return np.array([self._value_to_one_hot(row) for row in X])

    def fit_transform(self, X: np.ndarray) -> np.ndarray:
        self.fit(X)
        return self.transform(X)

    def inverse_transform(self, X: np.ndarray) -> np.ndarray:
        if X.ndim == 1:
            ids = np.where(X == 1)[0]
            return np.array([self._idx_to_value[idx] for idx in ids])

        print(f"{X=}")
        
        inv = []
        for row in X:
            ids = np.where(row == 1)[0]
            values = [self._idx_to_value[idx] for idx in ids]
            inv.append(values)

        print(f"{inv}")

        return np.array(inv)

In [3]:
X = np.array([0, 1, 2, 1, 0])

encoder = OneHotEncoder()

print(encoder.fit_transform(X))

encoder.inverse_transform(np.array([0, 1, 0]))


[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]]


array([1])

In [4]:
encoder.inverse_transform(np.array([[1, 0, 0], [0, 1, 0]]))

X=array([[1, 0, 0],
       [0, 1, 0]])
[[0], [1]]


array([[0],
       [1]])

In [5]:
X = np.array(["bird", "cat", "dog", "cat", "bird"])

encoder = OneHotEncoder()

encoder.fit(X)
print(encoder.transform(X))

encoder.inverse_transform(np.array([0, 1, 0]))

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]]


array(['cat'], dtype='<U3')

In [6]:
encoder.inverse_transform(np.array([[1, 0, 0], [0, 1, 0]]))

X=array([[1, 0, 0],
       [0, 1, 0]])
[['bird'], ['cat']]


array([['bird'],
       ['cat']], dtype='<U4')

In [7]:
X = np.array([['Male', 1], ['Female', 3], ['Female', 2]])

encoder = OneHotEncoder()

encoder.fit(X)

print(encoder.transform(X))

encoder.inverse_transform(np.array([0, 1, 0, 1, 0]))

[[1. 0. 0. 0. 1.]
 [0. 0. 1. 1. 0.]
 [0. 1. 0. 1. 0.]]


array(['2', 'Female'], dtype='<U6')

In [8]:
encoder.inverse_transform(np.array([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]]))

X=array([[0, 1, 1, 0, 0],
       [0, 0, 0, 1, 0]])
[['2', '3'], ['Female']]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.