### Organise Data

In [1]:
from dataclasses import dataclass
from pandas import DataFrame

@dataclass
class Data:
    train: DataFrame
    validation: DataFrame
    test: DataFrame    

    def map(self, f):
        return Data(f(self.train), f(self.validation), f(self.test))

### Preprocessing Scaling

In [2]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame

class IdentityScaler: 
    
    def __init__(self):
        pass
    
    def fit(self):
        pass
    
    def transform(self, x):
        return x.values
    
    def inverse_transform(self, x):
        return x.values
    


class Scalers:
    
    def __init__(self, types):
        self.types=types
        self.scalers = [] 
        self.indices = {}
    
    def fit(self, df: DataFrame):
        self.assign_indices(df)
        for col in df.columns: 
            if self.types[col] == "categorical":
                scaler = OrdinalEncoder()
                scaler.fit(df[[col]])
                self.scalers.append(scaler)
            elif self.types[col] == "continuous":
                scaler = MinMaxScaler()
                scaler.fit(df[[col]])
                self.scalers.append(scaler)
            elif self.types[col] == "identity":
                scaler = IdentityScaler()
                self.scalers.append(scaler)
            else: 
                raise LookupError(f"could not find type for column '{col}' given columns {df.columns}")
                
    def assign_indices(self, df): 
        for i, col in enumerate(df.columns):
            self.indices.update({col: i})
    
    def transform(self, df: DataFrame):
        index = df.index
        transformed = {}
        for col in df.columns:
            i = self.indices[col]
            trafo = self.scalers[i].transform(df[[col]]).flatten()
            transformed.update({col: trafo})
        return pd.DataFrame(transformed, index=index)
    
    def inverse_transform(self, xs, col:str): 
        i = self.indices[col]
        scaler = self.scalers[i]
        return scaler.inverse_transform(xs)
    
    

class GetNumberOfCategories:
    
    def __init__(self, df: DataFrame):
        self.df = df
    
    def __call__(self, column: str):
        return self.df[column].nunique()

### Generate Dataset

In [4]:
from pandas import DataFrame
import tensorflow as tf
from tensorflow.data import Dataset


class Offsets:
    def __init__(self, 
                 input_width: int, 
                 label_width: int, 
                 shift: int=None):
        self.input_width = input_width
        self.label_width = label_width 
        self.shift = shift
        if not shift:
            self.shift = self.label_width
        self.sequence_width = self.input_width + self.shift
        

class SplitInTime:
    
    def __init__(self, offsets: Offsets):
        self.offsets = offsets
    
    def __call__(self, t: tf.Tensor):
        features = t[: -self.offsets.shift]
        labels = t[-self.offsets.label_width :]
        return (features, labels)
    
    
    
def get_label_indices(df: DataFrame, labels: list):
    return [idx for idx, name in enumerate(df.columns) if name in labels]

    
class PickLabels:
    
    def __init__(self, label_indices: list):
        self.label_indices = label_indices
        
    def __call__(self, features, labels):
        picked_labels = [labels[:, i] for i in self.label_indices]
        picked_labels = tf.stack(picked_labels, axis=1)
        return (features, picked_labels)
    
        
class BatchWindow:
    
    def __init__(self, sequence_width: int):
        self.sequence_width = sequence_width
    
    def __call__(self, window: Dataset):
        return window.batch(self.sequence_width)
        
        
        
class MakeDatasetFromDataFrame:
    
    def __init__(self, offsets: Offsets, batch_size: int, labels: list):
        self.offsets = offsets
        self.batch_size = batch_size
        self.labels = labels
    
    def __call__(self, df: DataFrame):
        sequence_width = self.offsets.sequence_width
        label_indices = get_label_indices(df, self.labels)
        return (Dataset.from_tensor_slices(df)
                .window(sequence_width, 1, drop_remainder=True)
                .flat_map(BatchWindow(sequence_width))
                .map(SplitInTime(offsets))
                .map(PickLabels(label_indices))
                .batch(self.batch_size)
                .prefetch(tf.data.AUTOTUNE))

### Embedding

In [5]:
class Embeddings(tf.keras.layers.Layer):
    """Embedding layer: each feature is embedded according to type"""
    
    def __init__(self, types, hidden_dim, time_steps, name="Embeddings", **kwargs):
        super(Embeddings, self).__init__(name=name, **kwargs)
        self.types = types
        self.hidden_dim = hidden_dim
        self.time_steps = time_steps
        self.embedding_layers = self.setup_layers()
        
        
    def setup_layers(self):
        embeddings = []
        for feature, data_type in self.types.items(): 
            if data_type == "categorical":
                embedding = tf.keras.layers.Embedding(input_dim=get_number_of_categories(feature), 
                                                      output_dim=self.hidden_dim, 
                                                      input_length=self.time_steps,
                                                      name=feature)
            elif data_type == "continuous":
                embedding = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=self.hidden_dim), name=feature)
            elif data_type == "identity":
                embedding = Identity(name=feature)
            else: 
                raise LookupError(f"could not find data type {data_type}")
            embeddings.append(embedding)
        return embeddings
            
    
    
    def call(self, inputs):
        # todo: checks
        result = []
        for feature_idx, data_type in enumerate(self.types.values()):
            # keep dims for non_categorical variables.
            xs = inputs[..., feature_idx, None] 
            if data_type == "categorical":
                xs = inputs[..., feature_idx]
            result.append(self.embedding_layers[feature_idx](xs))
        return result
    
    
class Concatenation(tf.keras.layers.Layer):
    """Concatination layer [tensors] -> tensor"""
    
    def __init__(self, name="Concatenation", **kwargs):
        super(Concatenation, self).__init__(name=name, **kwargs)
    
        
    def call(self, inputs:list):
        return tf.concat(inputs, axis=-1)

In [None]:
# Usage
types = {"n_dispatched": "continuous", 
         "n_returns_expected": "continuous", 
         "bayes_return_probability_mean": "continuous", 
         "dominant_product_group": "categorical", 
         "day_of_year": "continuous", 
         "day_of_week": "continuous",
         "n_returned": "identity"} # target

# Load data
data = load_data() #type Data

# Number of categories for later embedding layer
get_number_of_categories = GetNumberOfCategories(data.train)

# scaler
scalers = Scalers(types)
scalers.fit(data.train)

# parametrize rolling window
offsets = Offsets(20, 20, 1) 
generate_ds MakeDatasetFromDataFrame(offsets=offsets, batch_size=32, labels=["n_returned"])

# apply transformations
ds = data.map(scalers.transform).map(generate_ds)

### Deep Ar 

In [6]:
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.layers import LSTM
from tensorflow import keras
from tensorflow.keras.layers import TimeDistributed

def negative_log_likelihood(y, p_y):
    """Loss function"""
    return -p_y.log_prob(y)


class Gaussian(tf.keras.layers.Layer):
    """Keras Layer for gaussion distribution"""
    
    def __init__(self,  name="probabilistic_gauss", **kwargs):
        super(GaussProbabilistic, self).__init__(name=name, **kwargs)
        
    
    def call(self, xs):
        def distribution(xs):
            loc = xs[..., 0, None]
            scale = tf.math.softplus(xs[..., 1, None])
            return tfp.distributions.Normal(loc=loc, scale=scale)
        return tfp.layers.DistributionLambda(distribution, name='normal_dist')(xs)
    
    
    
class NegativeBinomial(tf.keras.layers.Layer):
    def __init__(self,  name="probabilistic_negative_binomial", **kwargs):
        super(NegativeBinomial, self).__init__(name=name, **kwargs)
        
    def call(self, xs):
        def distribution(xs):
            total_count = tf.math.softplus(xs[..., 0, None])
            logits = xs[..., 1, None]
            return tfp.distributions.NegativeBinomial(total_count=total_count, logits=logits)
        return tfp.layers.DistributionLambda(distribution, name='negative_binomial_dist')(xs)
        
    
    
    
    
class DeepAr(keras.Model):
    
    def __init__(self, hidden_dim, out_dim=1, name="deepar", **kwargs):
        super(DeepArTfp, self).__init__(name=name, **kwargs)
        self.hidden_dim = hidden_dim
        self.out_dim = out_dim
        self.lstm_cell = keras.layers.LSTMCell(hidden_dim)
        self.distribution_params = keras.layers.Dense(2, activation="linear")
        self.distribution = NegativeBinomial()
        
        
    def encode(self, inputs):
        rnn = keras.layers.RNN(self.lstm_cell, return_state=True, return_sequences=True)
        xs, h, c = rnn(inputs)
        return xs, h, c
        
        
    def call(self, inputs):
        """Forward pass"""
        xs, _, _ = self.encode(inputs)
        xs = TimeDistributed(self.distribution_params)(xs)
        xs = self.distribution(xs)
        return xs 
    
    def sample_trace(self, inputs, warmup_period):
        """Sample single trace from likelihood function"""
        predictions = []
        warmup = inputs[:, :warmup_period, :] 
        _, *states = self.encode(warmup)
        prediction = self.distribution(self.distribution_params(states[0])).sample(1)[0, ...]
        predictions.append(prediction) # (batch, target)
        t_final = inputs.shape[1]
        for t in range(warmup_period + 1, t_final):
            covariates = tf.cast(inputs[:, t, :-1], prediction.dtype)
            xs = tf.concat([covariates, prediction], axis=1)
            ys, states = self.lstm_cell(xs, states=states)
            prediction = self.distribution(self.distribution_params(ys)).sample(1)[0, ...]
            predictions.append(prediction)
        predictions = tf.stack(predictions, axis=0) #(time, batch, target)
        predictions = tf.transpose(predictions, [1,0,2]) #(batch, time, target)
        return predictions


In [None]:
# Usage
deep_ar = DeepAr(60)
mse = tf.keras.metrics.MeanSquaredError()
mae = tf.keras.metrics.MeanAbsoluteError()
metrics = [mse, mae]
deep_ar.compile(optimizer=optimizer, loss=negative_log_likelihood, metrics=metrics)
deep_ar.fit(ds.train, validation_data=tds.validation, epochs=50)