In [1]:
import numpy as np


class OneHotEncoder:
    def __init__(self):
        """
        Convert between category labels and their one-hot vector
        representations.
        Parameters
        ----------
        categories : list of length `C`
            List of the unique category labels for the items to encode.
        """
        self._is_fit = False
        self.hyperparameters = {}
        self.parameters = {"categories": None}

    def __call__(self, labels):
        return self.transform(labels)

    def fit(self, categories):
        """
        Create mappings between columns and category labels.
        Parameters
        ----------
        categories : list of length `C`
            List of the unique category labels for the items to encode.
        """
        self.parameters["categories"] = categories
        self.cat2idx = {c: i for i, c in enumerate(categories)}
        self.idx2cat = {i: c for i, c in enumerate(categories)}
        self._is_fit = True

    def transform(self, labels, categories=None):
        """
        Convert a list of labels into a one-hot encoding.
        Parameters
        ----------
        labels : list of length `N`
            A list of category labels.
        categories : list of length `C`
            List of the unique category labels for the items to encode. Default
            is None.
        Returns
        -------
        Y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            The one-hot encoded labels. Each row corresponds to an example,
            with a single 1 in the column corresponding to the respective
            label.
        """
        if not self._is_fit:
            categories = set(labels) if categories is None else categories
            self.fit(categories)

        unknown = list(set(labels) - set(self.cat2idx.keys()))
        assert len(unknown) == 0, "Unrecognized label(s): {}".format(unknown)

        N, C = len(labels), len(self.cat2idx)
        cols = np.array([self.cat2idx[c] for c in labels])

        Y = np.zeros((N, C))
        Y[np.arange(N), cols] = 1
        return Y

    def inverse_transform(self, Y):
        """
        Convert a one-hot encoding back into the corresponding labels
        Parameters
        ----------
        Y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            One-hot encoded labels. Each row corresponds to an example, with a
            single 1 in the column associated with the label for that example
        Returns
        -------
        labels : list of length `N`
            The list of category labels corresponding to the nonzero columns in
            `Y`
        """
        C = len(self.cat2idx)
        assert Y.ndim == 2, "Y must be 2D, but has shape {}".format(Y.shape)
        assert Y.shape[1] == C, "Y must have {} columns, got {}".format(C, Y.shape[1])
        return [self.idx2cat[ix] for ix in Y.nonzero()[1]]
    
test=OneHotEncoder()

In [6]:
test.fit(["DC1","DC2","DC20"])

In [11]:
yu=test.transform(["DC20","DC1"])
yu

array([[0., 0., 1.],
       [1., 0., 0.]])

In [12]:
test.inverse_transform(yu)

['DC20', 'DC1']

In [14]:
test2=Standardizer()
test2.fit(yu)

In [16]:
test2.transform(yu)

  return (X - self._mean) / self._std


array([[-1., nan,  1.],
       [ 1., nan, -1.]])

In [13]:
class Standardizer:
    def __init__(self, with_mean=True, with_std=True):
        """
        Feature-wise standardization for vector inputs.
        Notes
        -----
        Due to the sensitivity of empirical mean and standard deviation
        calculations to extreme values, `Standardizer` cannot guarantee
        balanced feature scales in the presence of outliers. In particular,
        note that because outliers for each feature can have different
        magnitudes, the spread of the transformed data on each feature can be
        very different.
        Similar to sklearn, `Standardizer` uses a biased estimator for the
        standard deviation: ``numpy.std(x, ddof=0)``.
        Parameters
        ----------
        with_mean : bool
            Whether to scale samples to have 0 mean during transformation.
            Default is True.
        with_std : bool
            Whether to scale samples to have unit variance during
            transformation. Default is True.
        """
        self.with_mean = with_mean
        self.with_std = with_std
        self._is_fit = False

    @property
    def hyperparameters(self):
        H = {"with_mean": self.with_mean, "with_std": self.with_std}
        return H

    @property
    def parameters(self):
        params = {
            "mean": self._mean if hasattr(self, "mean") else None,
            "std": self._std if hasattr(self, "std") else None,
        }
        return params

    def __call__(self, X):
        return self.transform(X)

    def fit(self, X):
        """
        Store the feature-wise mean and standard deviation across the samples
        in `X` for future scaling.
        Parameters
        ----------
        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            An array of N samples, each with dimensionality `C`
        """
        if not isinstance(X, np.ndarray):
            X = np.array(X)

        if X.shape[0] < 2:
            raise ValueError("`X` must contain at least 2 samples")

        std = np.ones(X.shape[1])
        mean = np.zeros(X.shape[1])

        if self.with_mean:
            mean = np.mean(X, axis=0)

        if self.with_std:
            std = np.std(X, axis=0, ddof=0)

        self._mean = mean
        self._std = std
        self._is_fit = True

    def transform(self, X):
        """
        Standardize features by removing the mean and scaling to unit variance.
        For a sample `x`, the standardized score is calculated as:
        .. math::
            z = (x - u) / s
        where `u` is the mean of the training samples or zero if `with_mean` is
        False, and `s` is the standard deviation of the training samples or 1
        if `with_std` is False.
        Parameters
        ----------
        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            An array of N samples, each with dimensionality `C`.
        Returns
        -------
        Z : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            The feature-wise standardized version of `X`.
        """
        if not self._is_fit:
            raise Exception("Must call `fit` before using the `transform` method")
        return (X - self._mean) / self._std

    def inverse_transform(self, Z):
        """
        Convert a collection of standardized features back into the original
        feature space.
        For a standardized sample `z`, the unstandardized score is calculated as:
        .. math::
            x = z s + u
        where `u` is the mean of the training samples or zero if `with_mean` is
        False, and `s` is the standard deviation of the training samples or 1
        if `with_std` is False.
        Parameters
        ----------
        Z : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            An array of `N` standardized samples, each with dimensionality `C`.
        Returns
        -------
        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            The unstandardixed samples from `Z`.
        """
        assert self._is_fit, "Must fit `Standardizer` before calling inverse_transform"
        P = self.parameters
        mean, std = P["mean"], P["std"]
        return Z * std + mean


In [18]:
import numpy as np
import scipy.signal

def discount_cumsum(x, discount):
    """Discounted cumulative sum.
    See https://docs.scipy.org/doc/scipy/reference/tutorial/signal.html#difference-equation-filtering  # noqa: E501
    Here, we have y[t] - discount*y[t+1] = x[t]
    or rev(y)[t] - discount*rev(y)[t-1] = rev(x)[t]
    Args:
        x (np.ndarrary): Input.
        discount (float): Discount factor.
    Returns:
        np.ndarrary: Discounted cumulative sum.
    """
    return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1],
                                axis=-1)[::-1]



In [19]:
discount_cumsum([1,2,3],0.96)

array([5.6848, 4.88  , 3.    ])

In [24]:
class dis:
    def calculate_discounted_returns(self, rewards,gamma=0.96):
        """
        Calculate discounted reward and then normalize it
        (see Sutton book for definition)
        Params:
            rewards: list of rewards for every episode
        """
        returns = np.zeros(len(rewards))
    
        next_return = 0 # 0 because we start at the last timestep
        for t in reversed(range(0, len(rewards))):
            next_return = rewards[t] + gamma * next_return
            returns[t] = next_return
        # normalize for better statistical properties
        returns = (returns - returns.mean()) / (returns.std() + np.finfo(np.float32).eps)
        return returns
    def _discount(self, x, gamma):
        return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
    
dis().calculate_discounted_returns([1,2,3])

array([5.6848, 4.88  , 3.    ])

In [23]:
dis()._discount([1,2,3],0.96)

array([5.6848, 4.88  , 3.    ])

In [28]:
gamma = 0.96

def discount_rewards(r, gamma):
    if isinstance(r,list):
        r=np.vstack(r)
    discounted_r = np.zeros_like(r)
    running_add = 0
    # From the last reward to the first...
    for t in reversed(range(0, r.size)):
        # ...reset the reward sum
        if r[t] != 0: running_add = 0
        # ...compute the discounted reward
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add
    return discounted_r
r=[1,2,3]
discount_rewards(r, gamma)

array([[1],
       [2],
       [3]])