# Creating a Recontruction Error Explainer:

### Imports:

In [31]:
import numpy as np
import pandas as pd
import tensorflow as tf

### Reconstruction Error Explainer Class:

In [32]:
class ReconstructionErrorExplainer:
    """
    Decomposed an Autoencoders reconstruction error into:
        - Feature-level errors
        - Percentage contribution per feature
        - Optional behavioral group-level aggregation
    """
    
    def __init__(self, feature_names: list, feature_groups: dict | None=None) -> None:
        """
        Initializes the explainer.
        
        Args:
            feature_names: An ordered list of feature names matching model input order
            feature_groups: A dictionary mapping of group names to feature names
            
        Returns:
            None:
        """
        self.feature_names = feature_names
        self.feature_groups = feature_groups
        self.feature_index_map = {name:idx for idx, name in enumerate(feature_names)}
        
        
    def compute_feature_error(self, input_data: np.ndarray, reconstructed_data: np.ndarray) -> np.ndarray:
        """
        Computes squared reconstruction error per feature.
        
        Args:
            input_data: The original input features of shape: (n_samples, n_features)
            reconstructed_data: The reconstructed features generated by autoencoders of shape: (n_samples, n_features)
            
        Returns:
            np.ndarray: The squared error per feature of shape: (n_samples, n_features)
        """
        return np.square(input_data - reconstructed_data)
    
    
    def compute_total_error(self, feature_error: np.ndarray) -> np.ndarray:
        """
        Computes the total reconstruction error per sample.
        
        Args:
            feature_error: An array containing feature-level errors
            
        Returns:
            np.ndarray: The total reconstruction error per sample
        """
        return np.sum(feature_error, axis=1)
    
    
    def compute_contribution_ratio(self, feature_error: np.ndarray, total_error: np.ndarray) -> np.ndarray:
        """
        Computes the percentage contributions fo each feature to the total error.
        
        Args:
            feature_error: An array containing feature-level errors
            total_errors: An array containing total reconstruction error per sample
            
        Returns:
            np.ndarray: The contribition ratio of the shape: (n_samples, n_features)
        """
        total_error_safe = np.where(total_error == 0, 1e-8, total_error)
        contribution_ratio = feature_error / total_error_safe[:, np.newaxis]
        return contribution_ratio
    
    
    def compute_group_error(self, feature_error: np.ndarray) -> np.ndarray | None:
        """
        Aggregates feature errors by behavioral groups.
        
        Args:
            feature_error: An array containing feature-level errors
            
        Returns:
            np.ndarray: An aggregate array of shape: (n_samples, n_groups)
        """
        if self.feature_groups is None:
            return None
        
        group_errors = []
        
        for group, features in self.feature_groups.items():
            indices = [self.feature_index_map[f] for f in features if f in self.feature_index_map]
            
            group_error = np.sum(feature_error[:, indices], axis=1)
            group_errors.append(group_error)
            
        return np.vstack(group_errors).T
    
    
    def explain(self, input_data: np.ndarray, model: tf.keras.Model) -> dict:
        """
        Generates a full reconstruction explaination.
        
        Args:
            input_data: The input data
            model: A trained autoencoder model
            
        Returns:
            dict: A dictionary containing feature error, total error, contribution ration, and group error
        """
        x_pred = model.predict(input_data, verbose=0)
        
        feature_error = self.compute_feature_error(input_data, x_pred)
        total_error = self.compute_total_error(feature_error)
        contribution_ratio = self.compute_contribution_ratio(feature_error, total_error)
        group_error = self.compute_group_error(feature_error)
        
        error_dict = {
            "feature_error": feature_error,
            "total_error": total_error,
            "contribution_error": contribution_ratio,
            "group_error": group_error
        }
        
        return error_dict

### Importing UEBA-Enhanced Dataset and Trained Autoencoder:

In [33]:
ueba_dataset = pd.read_csv(r"processed_datasets\ueba_dataset.csv", index_col=0)

In [None]:
feature_names = list(ueba_dataset.drop(columns=["user", "pc", "day"]).columns)
ueba_matrix = ueba_dataset.drop(columns=["user", "pc", "day"]).values

In [39]:
ae = tf.keras.models.load_model(r"encoders\encoder_model_1\autoencoder_model.keras")

### Generating Full Reconstruction Explaination:

In [36]:
explainer = ReconstructionErrorExplainer(feature_names=feature_names)

In [40]:
results = explainer.explain(ueba_matrix, ae)

In [45]:
results

{'feature_error': array([[0.54257391, 1.40332114, 0.27750547, ..., 1.17902104, 0.13667638,
         0.1153994 ],
        [1.11045698, 1.19524966, 0.00328188, ..., 0.02273014, 0.03092399,
         0.15253239],
        [0.98528747, 2.35740055, 0.12684505, ..., 0.180008  , 0.42574556,
         0.81301101],
        ...,
        [0.03874103, 0.70573429, 0.04112582, ..., 0.00595269, 0.10109683,
         0.36450937],
        [0.28353114, 0.92256466, 0.22585872, ..., 0.04736444, 0.03664206,
         0.27288927],
        [0.58773306, 1.82199373, 0.04544549, ..., 0.01182985, 0.15459207,
         0.54032999]]),
 'total_error': array([ 68.20318875,  64.12118575,  82.45024811, ..., 101.69993162,
        101.1093351 , 139.55160915]),
 'contribution_error': array([[7.95525727e-03, 2.05755943e-02, 4.06880488e-03, ...,
         1.72868902e-02, 2.00395883e-03, 1.69199420e-03],
        [1.73180980e-02, 1.86404797e-02, 5.11824561e-05, ...,
         3.54487201e-04, 4.82274130e-04, 2.37881423e-03],
        