## Metrics
I'll put any custom-made metrics here, for storage.

In [None]:
import sys
import numpy as np  # to use these methods on tensorflow tensors (converted to np arrays)
import tensorflow as tf

In [3]:
def refresh(obj):
    try:
        obj.reset_state()
    except:
        pass
    try:
        obj.reset_states()
    except:
        pass

### Generator function to handle masked sequential data

In [2]:
def gen_metric_per_cat(metric_fn=tf.keras.metrics.CategoricalAccuracy(), mask_layer=None, cat_idx = None):
    """
    A generator function for series' of datapoints that returns a metric
    which takes into account a mask of the inputs
    
    Parameters
    ----------
    metric_fn : metric-type class object
        metric to use per each caetegory
    mask_layer : layer.Masking object
        masking layer used to throw out padding datapoints
    cat_idx : integer, default None
        Sets a particular category to be returned rather than an average of all
    
    Returns
    -------
    metric_per_pt : function
        The generated metric taking into account the values of metric and mask_layer
    """
    
    def metric_per_cat(y_true, y_pred):
        """
        A metric for a series of datapoints, each of which needs its own separate evaluation.

        Parameters
        ----------
        y_true: tensorflow tensor of shape (batch size, series length, num_categories)
            The true values to compare with. For datapoint in the series,
            the category information should be one-hot encoded
            SHOULD BE MASKED AS PER MASK_LAYER'S EXPECTATIONS
        y_pred: tensorflow tensor of shape (batch size, series length, num_categories)
            The predicted values. For datapoint in the series,
            the category information should be expressed in probabilities (fractions of 1)    

        Returns
        -------
        metric_sum: tensorflow tensor of shape (num_categories,)
            The metric, averaged over the number of categories returned

        """
        eps = np.finfo(float).eps  # to avoid div by zero
        # batch or series, who cares
        num_cat = y_true.shape[-1]
        yf_true = y_true.numpy().reshape(-1,num_cat)
        yf_pred = y_pred.numpy().reshape(-1, num_cat)
        num_per_cat = [max(np.sum(yf_true[:,i] == 1), eps) for i in range(num_cat)]  # protect agst div by 0

        if cat_idx is not None:  # only calcuate metric for this single category
            num_per_cat = num_per_cat[cat_idx]
            metric_sum = 0
            
            if mask_layer is not None:
                mask = mask_layer.compute_mask(yf_true)
                y_t = tf.boolean_mask(yf_true, mask)
                y_p = tf.boolean_mask(yf_pred, mask)
                for true, pred in zip(y_t, y_p):  # iterates along first axis
                    refresh(metric_fn)
                    success = (true[cat_idx] != 0) and (metric_fn(true,pred) == 1)  # 1 indicates success
                    metric_sum += float(success)/num_per_cat 
            else:
                for true, pred in zip(yf_true, yf_pred):
                    refresh(metric_fn)
                    success = (true[cat_idx] != 0) and (metric_fn(true,pred) == 1)  # 1 indicates success
                    metric_sum += float(success)/num_per_cat    
            return metric_sum
        

        metrics = np.zeros(shape=(num_cat,))
        if mask_layer is not None:
            mask = mask_layer.compute_mask(yf_true)
            y_t = tf.boolean_mask(yf_true, mask)
            y_p = tf.boolean_mask(yf_pred, mask)
            for true, pred in zip(y_t, y_p):  # iterates along first axis
                for i in range(num_cat): # this is a huge mess
                    refresh(metric_fn)
                    success = (true[i] != 0) and (metric_fn(true,pred) == 1)  # 1 indicates success
                    metrics[i] += float(success)/num_per_cat[i]
        else:
            for true, pred in zip(yf_true, yf_pred):
                for i in range(num_cat): # this is a huge mess
                    refresh(metric_fn)
                    success = (true[i] != 0) and (metric_fn(true,pred) == 1)  # 1 indicates success

                    metrics[i] += float(success)/num_per_cat[i]
        metric_sum = np.sum(metrics)/num_cat # ranges from 0 to 1
        return metric_sum
    metric_per_cat.__name__ = 'metric_per_cat_'+str(cat_idx)
    return metric_per_cat

NameError: name 'tf' is not defined