From 0b7a9d65f6516b30a342c394a0e5f7d8d12f0d15 Mon Sep 17 00:00:00 2001
From: JohT <7671054+JohT@users.noreply.github.com>
Date: Sat, 22 Nov 2025 11:05:11 +0100
Subject: [PATCH 1/3] Document anomaly detection pipeline architecture

---
 domains/anomaly-detection/README.md           |  27 +
 .../documentation/Architecture.gv             | 194 +++++
 .../documentation/Architecture.svg            | 702 ++++++++++++++++++
 .../anomaly-detection/documentation/README.md |  11 +
 .../documentation/renderArchitecture.sh       |  20 +
 renovate.json                                 |   1 +
 scripts/visualization/renderGraphVizSVG.sh    |  53 ++
 7 files changed, 1008 insertions(+)
 create mode 100644 domains/anomaly-detection/README.md
 create mode 100644 domains/anomaly-detection/documentation/Architecture.gv
 create mode 100644 domains/anomaly-detection/documentation/Architecture.svg
 create mode 100644 domains/anomaly-detection/documentation/README.md
 create mode 100755 domains/anomaly-detection/documentation/renderArchitecture.sh
 create mode 100755 scripts/visualization/renderGraphVizSVG.sh

diff --git a/domains/anomaly-detection/README.md b/domains/anomaly-detection/README.md
new file mode 100644
index 000000000..8c5ceb754
--- /dev/null
+++ b/domains/anomaly-detection/README.md
@@ -0,0 +1,27 @@
+# Anomaly Detection Domain
+
+This directory contains the implementation and resources related to the Anomaly Detection domain within the Code Graph Analysis Pipeline project.
+
+## Entry Points
+
+The following scripts serve as entry points for various anomaly detection tasks and reports. They will be invoked by [AllReports.sh](./../../scripts/reports/compilations/AllReports.sh) an its sub-scripts dynamically by their names.
+
+- [anomalyDetectionCsv.sh](./anomalyDetectionCsv.sh): Entry point for CSV reports based solely on Graph queries.
+- [anomalyDetectionPython.sh](./anomalyDetectionPython.sh): Entry point for Python-based anomaly detection tasks and reports.
+- [anomalyDetectionVisualization.sh](./anomalyDetectionVisualization.sh): Entry point for Graph visualization reports.
+- [anomalyDetectionMarkdown.sh](./anomalyDetectionMarkdown.sh): Entry point for generating the Markdown summary report.
+
+## Folder Structure
+
+- [documentation](./documentation): Contains documentation including architecture diagrams.
+- [explore](./explore/): Jupyter notebooks for interactive, exploratory anomaly detection analysis.
+- [features](./features/): Cypher queries to extract features and run graph algorithms relevant for anomaly detection.
+- [graphs](./graphs/): Cypher queries and GraphViz templates for Graph visualizations related to anomaly detection.
+- [labels](./labels/): Cypher queries label nodes that represent specific archetypes.
+- [queries](./queries/): Cypher queries to identify anomalies based on various (deterministic/explainable) criteria.
+- [reset](./reset/): Cypher queries to reset the graph database state related to anomaly detection.
+- [summary](./summary/): Markdown templates and resources for generating the summary report.
+
+## Pipeline Architecture Overview
+
+![Anomaly Detection Architecture](./documentation/Architecture.svg)
\ No newline at end of file
diff --git a/domains/anomaly-detection/documentation/Architecture.gv b/domains/anomaly-detection/documentation/Architecture.gv
new file mode 100644
index 000000000..24ce04d5d
--- /dev/null
+++ b/domains/anomaly-detection/documentation/Architecture.gv
@@ -0,0 +1,194 @@
+digraph AnomalyDetectionPipeline {
+    rankdir=LR;
+    node [fontname="Helvetica", fontsize=10];
+ 
+    // Leiden community detection
+    subgraph cluster_leiden {
+        label="Leiden Community Detection";
+        style=filled; color=lightblue;
+        node [shape=box, style=filled, fillcolor=white];
+        
+        Tuning_Leiden    [label="Tuning\n(Optuna)"];
+        Leiden_Gamma     [label="gamma", shape=diamond]
+        Leiden_Theta     [label="theta", shape=diamond]
+        Leiden_Algorithm [label="Leiden Community Detection"];
+        CommunityId [label="Community", shape=ellipse];
+    }
+    
+    // --- Leiden Community Detection relationships ---
+    Tuning_Leiden -> Leiden_Gamma;
+    Tuning_Leiden -> Leiden_Theta;
+    Leiden_Gamma -> Leiden_Algorithm
+    Leiden_Theta -> Leiden_Algorithm
+    Leiden_Algorithm -> Tuning_Leiden [label="modularity", style="dashed"]
+    Leiden_Algorithm -> Tuning_Leiden [label="size", style="dashed"]
+    Leiden_Algorithm -> CommunityId;
+    
+    // Fast Random Projection (FastRP)
+    subgraph cluster_fastRP {
+        label="Fast Random Projection (FastRP)";
+        style=filled; color=lightpink;
+        node [shape=box, style=filled, fillcolor=white];
+
+        Tuning_FastRP    [label="Tuning\n(Optuna)"];
+        FastRP_Dimension [label="dimension", shape=diamond];
+        FastRP_Normalization_Strength [label="normalization strength", shape="diamond"];
+        FastRP_Forth_Iteration_Weight [label="forth iteration weight", shape="diamond"];
+        FastRP_Algorithm [label="FastRP"];
+        NodeEmbeddings   [label="Node Embeddings", shape=ellipse];
+    }
+
+    // --- FastRP relationships ---
+    Tuning_FastRP -> FastRP_Dimension;
+    Tuning_FastRP -> FastRP_Normalization_Strength;
+    Tuning_FastRP -> FastRP_Forth_Iteration_Weight;
+    FastRP_Dimension -> FastRP_Algorithm;
+    FastRP_Normalization_Strength -> FastRP_Algorithm;
+    FastRP_Forth_Iteration_Weight -> FastRP_Algorithm
+    FastRP_Algorithm -> Tuning_FastRP [label="adjusted mutual info score\n(incl. preview clustering)", style="dashed"]
+    FastRP_Algorithm -> NodeEmbeddings;
+    
+    // Uniform Manifold Approximation and Projection (UMAP)
+    subgraph cluster_UMAP {
+        label="Uniform Manifold Approximation and Projection (UMAP)\nDimensionality Reduction for Visualization";
+        style=filled; color=lightgrey;
+        node [shape=box, style=filled, fillcolor=white];
+
+        UMAP_Algorithm   [label="UMAP"];
+        UMAP_Coordinates [label="2D Coordinates", shape=ellipse];
+    }
+
+    // UMAP relationships
+    NodeEmbeddings -> UMAP_Algorithm
+    UMAP_Algorithm -> UMAP_Coordinates
+
+    // HDBSCAN clustering and tuning
+    subgraph cluster_hdbscan {
+        label="Hierarchical Density-Based Spatial Clustering (HDBSCAN)";
+        style=filled; color=lightgoldenrod;
+        node [shape=box, style=filled, fillcolor=white];
+
+        Tuning_HDBSCAN      [label="Tuning\n(Optuna)"];
+        HDBSCAN_Node        [label="HDBSCAN"];
+        HDBSCAN_Min_Cluster_Size [label="Min Cluster Size", shape=diamond];
+        HDBSCAN_Min_Samples [label="Min Samples", shape=diamond];
+
+        ClusterLabel                     [label="Label", shape=ellipse];
+        ClusterRadius                    [label="Radius\n(avg,max)", shape=ellipse];
+        ClusterSize                      [label="Size", shape=ellipse];
+        NormDistToMedoid                 [label="Normalized Distance\nTo Medoid", shape=ellipse];
+        ClusterNoise                     [label="Noise\n(label=-1)", shape=ellipse];
+        ClusterProbability               [label="Probability", shape=ellipse];
+        ClusterApproximationOutlierScore [label="Approximation\nOutlierScore\n(= 1 - Probability)", shape=ellipse];
+    }
+
+    // --- Inputs into HDBSCAN ---
+    CommunityId -> Tuning_HDBSCAN [label="reference"];
+    NodeEmbeddings -> HDBSCAN_Node;
+
+    Tuning_HDBSCAN -> HDBSCAN_Min_Cluster_Size
+    Tuning_HDBSCAN -> HDBSCAN_Min_Samples
+    HDBSCAN_Min_Cluster_Size -> HDBSCAN_Node;
+    HDBSCAN_Min_Samples -> HDBSCAN_Node;
+
+    HDBSCAN_Node -> Tuning_HDBSCAN [label="adjusted mutual info score", style=dashed];
+
+    // HDBSCAN outputs (cluster features)
+    HDBSCAN_Node -> ClusterLabel;
+    HDBSCAN_Node -> ClusterNoise;
+    HDBSCAN_Node -> ClusterRadius;
+    HDBSCAN_Node -> ClusterSize;
+    HDBSCAN_Node -> NormDistToMedoid;
+    HDBSCAN_Node -> ClusterProbability;
+    HDBSCAN_Node -> ClusterApproximationOutlierScore;
+
+    // Graph algorithm based features
+    subgraph cluster_graph_features {
+        label="Graph (Algorithm) Features";
+        style=filled; color=lightcyan;
+        node [shape=ellipse, style=filled, fillcolor=white];
+        
+        ArticleRank                [label="ArticleRank"];
+        PageRank                   [label="PageRank"];
+        PageRank_minus_ArticleRank [label="PageRank -\nArticleRank"];
+        BetweennessCentrality      [label="Betweenness\nCentrality"];
+        LocalClusteringCoefficient [label="Local Clustering\nCoefficient"];
+        Degree                     [label="Degree\n(in, out, sum)"];
+    }
+
+    // Anomaly detection model area
+    subgraph cluster_anomaly {
+        label="Anomaly Detection Model";
+        style=filled; color=lightgreen; penwidth=4; pencolor=green; margin="50,50";
+        node [shape=box, style=filled, fillcolor=white];
+
+        TuningAnomaly        [label="Tuning\n(Optuna)"];
+        IsolationMinCluster  [label="Min Cluster Size", shape=diamond];
+        IsolationEstimators  [label="n estimators", shape=diamond];
+        
+        ProxyEstimators      [label="n estimators", shape=diamond];
+        ProxyMaxDepth        [label="max depth", shape=diamond];
+        
+        AnomalyStandardizer  [label="Standardizer"]
+        AnomalyPCA           [label="Principal Component\nAnalysis (PCA)"]
+        IsolationForest      [label="Isolation Forest\nAnomaly Detector", margin="0.4,0.4"];
+        ProxyRandomForest    [label="RandomForest\n(Proxy)"];
+        AnomalyScore         [label="Score", shape=ellipse];
+        AnomalyLabel         [label="Label", shape=ellipse];
+    }
+
+    // Embeddings feed anomaly model
+    NodeEmbeddings -> AnomalyPCA;
+
+    // HDBSCAN-derived features feed anomaly model
+    ClusterRadius -> AnomalyStandardizer;
+    NormDistToMedoid -> AnomalyStandardizer;
+    ClusterApproximationOutlierScore -> AnomalyStandardizer;
+
+    // Graph Algorithm Features feed anomaly model
+    ArticleRank -> AnomalyStandardizer;
+    PageRank -> AnomalyStandardizer;
+    PageRank_minus_ArticleRank -> AnomalyStandardizer;
+    BetweennessCentrality -> AnomalyStandardizer;
+    LocalClusteringCoefficient -> AnomalyStandardizer;
+    Degree -> AnomalyStandardizer;
+
+    // Proxy RandomForest used as a backing/tuning model for the Isolation Forest
+    TuningAnomaly -> IsolationMinCluster;
+    TuningAnomaly -> IsolationEstimators;
+    IsolationMinCluster -> IsolationForest
+    IsolationEstimators -> IsolationForest
+
+    TuningAnomaly -> ProxyEstimators
+    TuningAnomaly -> ProxyMaxDepth
+    ProxyEstimators -> ProxyRandomForest
+    ProxyMaxDepth -> ProxyRandomForest
+
+    AnomalyStandardizer -> IsolationForest;
+    AnomalyPCA          -> IsolationForest;
+    IsolationForest     -> ProxyRandomForest [label="reference", style="dashed"];
+    ProxyRandomForest   -> TuningAnomaly [label="f1 score\n(cross validation)", style="dashed"];
+
+    IsolationForest -> AnomalyLabel
+    IsolationForest -> AnomalyScore
+
+    // Explainable AI / SHAP
+    subgraph cluster_explainability {
+        label="Explainable AI (SHAP)";
+        style=filled; color=lavender;
+        node [shape=note, style=filled, fillcolor=white];
+        
+        SHAP [label="SHAP TreeExplainer"];
+
+        SHAP_Values [label="Top SHAP Values", shape=ellipse];
+        SHAP_Features [label="Top Features", shape=ellipse];
+        SHAP_Embedding_Sum [label="Node Embeddings\nSHAP Sum", shape=ellipse];
+    }
+
+    // Explainability connections (RandomForest -> SHAP)
+    ProxyRandomForest -> SHAP;
+    SHAP -> SHAP_Values;
+    SHAP -> SHAP_Features;
+    SHAP -> SHAP_Embedding_Sum;
+
+}
\ No newline at end of file
diff --git a/domains/anomaly-detection/documentation/Architecture.svg b/domains/anomaly-detection/documentation/Architecture.svg
new file mode 100644
index 000000000..e59b1662b
--- /dev/null
+++ b/domains/anomaly-detection/documentation/Architecture.svg
@@ -0,0 +1,702 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 14.0.2 (0)
+ -->
+<!-- Title: AnomalyDetectionPipeline Pages: 1 -->
+<svg width="2355pt" height="1429pt"
+ viewBox="0.00 0.00 2355.00 1429.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1425)">
+<title>AnomalyDetectionPipeline</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-1425 2350.93,-1425 2350.93,4 -4,4"/>
+<g id="clust1" class="cluster">
+<title>cluster_leiden</title>
+<polygon fill="lightblue" stroke="lightblue" points="8,-1015 8,-1252 556.09,-1252 556.09,-1015 8,-1015"/>
+<text xml:space="preserve" text-anchor="middle" x="282.05" y="-1235.4" font-family="Times,serif" font-size="14.00">Leiden Community Detection</text>
+</g>
+<g id="clust2" class="cluster">
+<title>cluster_fastRP</title>
+<polygon fill="lightpink" stroke="lightpink" points="265.86,-353 265.86,-607 928.47,-607 928.47,-353 265.86,-353"/>
+<text xml:space="preserve" text-anchor="middle" x="597.16" y="-590.4" font-family="Times,serif" font-size="14.00">Fast Random Projection (FastRP)</text>
+</g>
+<g id="clust3" class="cluster">
+<title>cluster_UMAP</title>
+<polygon fill="lightgrey" stroke="lightgrey" points="971.34,-430 971.34,-524 1326.54,-524 1326.54,-430 971.34,-430"/>
+<text xml:space="preserve" text-anchor="middle" x="1148.94" y="-507.4" font-family="Times,serif" font-size="14.00">Uniform Manifold Approximation and Projection (UMAP)</text>
+<text xml:space="preserve" text-anchor="middle" x="1148.94" y="-490.6" font-family="Times,serif" font-size="14.00">Dimensionality Reduction for Visualization</text>
+</g>
+<g id="clust4" class="cluster">
+<title>cluster_hdbscan</title>
+<polygon fill="#eedd82" stroke="#eedd82" points="685.39,-957 685.39,-1413 1297.93,-1413 1297.93,-957 685.39,-957"/>
+<text xml:space="preserve" text-anchor="middle" x="991.66" y="-1396.4" font-family="Times,serif" font-size="14.00">Hierarchical Density&#45;Based Spatial Clustering (HDBSCAN)</text>
+</g>
+<g id="clust5" class="cluster">
+<title>cluster_graph_features</title>
+<polygon fill="lightcyan" stroke="lightcyan" points="1127.35,-8 1127.35,-392 1300.42,-392 1300.42,-8 1127.35,-8"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.88" y="-375.4" font-family="Times,serif" font-size="14.00">Graph (Algorithm) Features</text>
+</g>
+<g id="clust6" class="cluster">
+<title>cluster_anomaly</title>
+<polygon fill="lightgreen" stroke="green" stroke-width="4" points="1135.65,-532 1135.65,-915 2019.69,-915 2019.69,-532 1135.65,-532"/>
+<text xml:space="preserve" text-anchor="middle" x="1577.67" y="-898.4" font-family="Times,serif" font-size="14.00">Anomaly Detection Model</text>
+</g>
+<g id="clust7" class="cluster">
+<title>cluster_explainability</title>
+<polygon fill="lavender" stroke="lavender" points="2038.69,-668 2038.69,-862 2338.93,-862 2338.93,-668 2038.69,-668"/>
+<text xml:space="preserve" text-anchor="middle" x="2188.81" y="-845.4" font-family="Times,serif" font-size="14.00">Explainable AI (SHAP)</text>
+</g>
+<!-- Tuning_Leiden -->
+<g id="node1" class="node">
+<title>Tuning_Leiden</title>
+<polygon fill="white" stroke="black" points="71.46,-1152 16,-1152 16,-1116 71.46,-1116 71.46,-1152"/>
+<text xml:space="preserve" text-anchor="middle" x="43.73" y="-1137" font-family="Helvetica,sans-Serif" font-size="10.00">Tuning</text>
+<text xml:space="preserve" text-anchor="middle" x="43.73" y="-1125" font-family="Helvetica,sans-Serif" font-size="10.00">(Optuna)</text>
+</g>
+<!-- Leiden_Gamma -->
+<g id="node2" class="node">
+<title>Leiden_Gamma</title>
+<polygon fill="white" stroke="black" points="150.43,-1059 108.46,-1041 150.43,-1023 192.39,-1041 150.43,-1059"/>
+<text xml:space="preserve" text-anchor="middle" x="150.43" y="-1038" font-family="Helvetica,sans-Serif" font-size="10.00">gamma</text>
+</g>
+<!-- Tuning_Leiden&#45;&gt;Leiden_Gamma -->
+<g id="edge1" class="edge">
+<title>Tuning_Leiden&#45;&gt;Leiden_Gamma</title>
+<path fill="none" stroke="black" d="M60.52,-1115.57C72.86,-1101.7 90.81,-1082.65 108.46,-1068 112.74,-1064.45 117.51,-1060.95 122.23,-1057.71"/>
+<polygon fill="black" stroke="black" points="123.99,-1060.74 130.42,-1052.32 120.14,-1054.89 123.99,-1060.74"/>
+</g>
+<!-- Leiden_Theta -->
+<g id="node3" class="node">
+<title>Leiden_Theta</title>
+<polygon fill="white" stroke="black" points="150.43,-1113 117.91,-1095 150.43,-1077 182.95,-1095 150.43,-1113"/>
+<text xml:space="preserve" text-anchor="middle" x="150.43" y="-1092" font-family="Helvetica,sans-Serif" font-size="10.00">theta</text>
+</g>
+<!-- Tuning_Leiden&#45;&gt;Leiden_Theta -->
+<g id="edge2" class="edge">
+<title>Tuning_Leiden&#45;&gt;Leiden_Theta</title>
+<path fill="none" stroke="black" d="M71.69,-1123.96C86.42,-1118.47 104.68,-1111.67 119.8,-1106.04"/>
+<polygon fill="black" stroke="black" points="120.59,-1109.48 128.74,-1102.71 118.15,-1102.92 120.59,-1109.48"/>
+</g>
+<!-- Leiden_Algorithm -->
+<g id="node4" class="node">
+<title>Leiden_Algorithm</title>
+<polygon fill="white" stroke="black" points="373.79,-1140 229.39,-1140 229.39,-1104 373.79,-1104 373.79,-1140"/>
+<text xml:space="preserve" text-anchor="middle" x="301.59" y="-1119" font-family="Helvetica,sans-Serif" font-size="10.00">Leiden Community Detection</text>
+</g>
+<!-- Leiden_Gamma&#45;&gt;Leiden_Algorithm -->
+<g id="edge3" class="edge">
+<title>Leiden_Gamma&#45;&gt;Leiden_Algorithm</title>
+<path fill="none" stroke="black" d="M170.02,-1051.09C191.6,-1062.82 227.94,-1082.55 256.81,-1098.22"/>
+<polygon fill="black" stroke="black" points="254.88,-1101.16 265.34,-1102.86 258.22,-1095.01 254.88,-1101.16"/>
+</g>
+<!-- Leiden_Theta&#45;&gt;Leiden_Algorithm -->
+<g id="edge4" class="edge">
+<title>Leiden_Theta&#45;&gt;Leiden_Algorithm</title>
+<path fill="none" stroke="black" d="M175.72,-1099.4C187.48,-1101.53 202.36,-1104.22 217.72,-1107"/>
+<polygon fill="black" stroke="black" points="216.99,-1110.42 227.45,-1108.76 218.23,-1103.54 216.99,-1110.42"/>
+</g>
+<!-- Leiden_Algorithm&#45;&gt;Tuning_Leiden -->
+<g id="edge5" class="edge">
+<title>Leiden_Algorithm&#45;&gt;Tuning_Leiden</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M228.95,-1125.36C181.65,-1127.58 121.52,-1130.4 83.07,-1132.2"/>
+<polygon fill="black" stroke="black" points="82.97,-1128.7 73.14,-1132.67 83.29,-1135.7 82.97,-1128.7"/>
+<text xml:space="preserve" text-anchor="middle" x="150.43" y="-1135.09" font-family="Times,serif" font-size="14.00">modularity</text>
+</g>
+<!-- Leiden_Algorithm&#45;&gt;Tuning_Leiden -->
+<g id="edge6" class="edge">
+<title>Leiden_Algorithm&#45;&gt;Tuning_Leiden</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M245.03,-1140.5C228.31,-1145.22 209.77,-1149.63 192.39,-1152 155.43,-1157.03 145.41,-1157.12 108.46,-1152 100.01,-1150.83 91.09,-1148.82 82.7,-1146.56"/>
+<polygon fill="black" stroke="black" points="83.7,-1143.21 73.13,-1143.8 81.77,-1149.94 83.7,-1143.21"/>
+<text xml:space="preserve" text-anchor="middle" x="150.43" y="-1160.01" font-family="Times,serif" font-size="14.00">size</text>
+</g>
+<!-- CommunityId -->
+<g id="node5" class="node">
+<title>CommunityId</title>
+<ellipse fill="white" stroke="black" cx="508.06" cy="-1122" rx="40.03" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="508.06" y="-1119" font-family="Helvetica,sans-Serif" font-size="10.00">Community</text>
+</g>
+<!-- Leiden_Algorithm&#45;&gt;CommunityId -->
+<g id="edge7" class="edge">
+<title>Leiden_Algorithm&#45;&gt;CommunityId</title>
+<path fill="none" stroke="black" d="M374.1,-1122C401.25,-1122 431.57,-1122 456.4,-1122"/>
+<polygon fill="black" stroke="black" points="456.23,-1125.5 466.23,-1122 456.23,-1118.5 456.23,-1125.5"/>
+</g>
+<!-- Tuning_HDBSCAN -->
+<g id="node14" class="node">
+<title>Tuning_HDBSCAN</title>
+<polygon fill="white" stroke="black" points="748.86,-1140 693.39,-1140 693.39,-1104 748.86,-1104 748.86,-1140"/>
+<text xml:space="preserve" text-anchor="middle" x="721.12" y="-1125" font-family="Helvetica,sans-Serif" font-size="10.00">Tuning</text>
+<text xml:space="preserve" text-anchor="middle" x="721.12" y="-1113" font-family="Helvetica,sans-Serif" font-size="10.00">(Optuna)</text>
+</g>
+<!-- CommunityId&#45;&gt;Tuning_HDBSCAN -->
+<g id="edge18" class="edge">
+<title>CommunityId&#45;&gt;Tuning_HDBSCAN</title>
+<path fill="none" stroke="black" d="M548.27,-1122C586.51,-1122 644.3,-1122 682.13,-1122"/>
+<polygon fill="black" stroke="black" points="681.67,-1125.5 691.67,-1122 681.67,-1118.5 681.67,-1125.5"/>
+<text xml:space="preserve" text-anchor="middle" x="649.37" y="-1126.2" font-family="Times,serif" font-size="14.00">reference</text>
+</g>
+<!-- Tuning_FastRP -->
+<g id="node6" class="node">
+<title>Tuning_FastRP</title>
+<polygon fill="white" stroke="black" points="329.32,-478 273.86,-478 273.86,-442 329.32,-442 329.32,-478"/>
+<text xml:space="preserve" text-anchor="middle" x="301.59" y="-463" font-family="Helvetica,sans-Serif" font-size="10.00">Tuning</text>
+<text xml:space="preserve" text-anchor="middle" x="301.59" y="-451" font-family="Helvetica,sans-Serif" font-size="10.00">(Optuna)</text>
+</g>
+<!-- FastRP_Dimension -->
+<g id="node7" class="node">
+<title>FastRP_Dimension</title>
+<polygon fill="white" stroke="black" points="508.06,-397 455.69,-379 508.06,-361 560.43,-379 508.06,-397"/>
+<text xml:space="preserve" text-anchor="middle" x="508.06" y="-376" font-family="Helvetica,sans-Serif" font-size="10.00">dimension</text>
+</g>
+<!-- Tuning_FastRP&#45;&gt;FastRP_Dimension -->
+<g id="edge8" class="edge">
+<title>Tuning_FastRP&#45;&gt;FastRP_Dimension</title>
+<path fill="none" stroke="black" d="M329.51,-444.77C351.02,-433.04 382.19,-417.03 410.79,-406 428.06,-399.34 447.65,-393.54 464.63,-389.04"/>
+<polygon fill="black" stroke="black" points="465.08,-392.54 473.89,-386.66 463.33,-385.76 465.08,-392.54"/>
+</g>
+<!-- FastRP_Normalization_Strength -->
+<g id="node8" class="node">
+<title>FastRP_Normalization_Strength</title>
+<polygon fill="white" stroke="black" points="508.06,-451 410.79,-433 508.06,-415 605.34,-433 508.06,-451"/>
+<text xml:space="preserve" text-anchor="middle" x="508.06" y="-430" font-family="Helvetica,sans-Serif" font-size="10.00">normalization strength</text>
+</g>
+<!-- Tuning_FastRP&#45;&gt;FastRP_Normalization_Strength -->
+<g id="edge9" class="edge">
+<title>Tuning_FastRP&#45;&gt;FastRP_Normalization_Strength</title>
+<path fill="none" stroke="black" d="M329.74,-456.41C357.31,-452.77 401.1,-446.99 438.27,-442.08"/>
+<polygon fill="black" stroke="black" points="438.61,-445.57 448.07,-440.79 437.7,-438.63 438.61,-445.57"/>
+</g>
+<!-- FastRP_Forth_Iteration_Weight -->
+<g id="node9" class="node">
+<title>FastRP_Forth_Iteration_Weight</title>
+<polygon fill="white" stroke="black" points="508.06,-505 417.88,-487 508.06,-469 598.25,-487 508.06,-505"/>
+<text xml:space="preserve" text-anchor="middle" x="508.06" y="-484" font-family="Helvetica,sans-Serif" font-size="10.00">forth iteration weight</text>
+</g>
+<!-- Tuning_FastRP&#45;&gt;FastRP_Forth_Iteration_Weight -->
+<g id="edge10" class="edge">
+<title>Tuning_FastRP&#45;&gt;FastRP_Forth_Iteration_Weight</title>
+<path fill="none" stroke="black" d="M329.74,-463.59C358.02,-467.32 403.38,-473.31 441.14,-478.3"/>
+<polygon fill="black" stroke="black" points="440.2,-481.7 450.58,-479.54 441.12,-474.76 440.2,-481.7"/>
+</g>
+<!-- FastRP_Algorithm -->
+<g id="node10" class="node">
+<title>FastRP_Algorithm</title>
+<polygon fill="white" stroke="black" points="748.12,-474 694.12,-474 694.12,-438 748.12,-438 748.12,-474"/>
+<text xml:space="preserve" text-anchor="middle" x="721.12" y="-453" font-family="Helvetica,sans-Serif" font-size="10.00">FastRP</text>
+</g>
+<!-- FastRP_Dimension&#45;&gt;FastRP_Algorithm -->
+<g id="edge11" class="edge">
+<title>FastRP_Dimension&#45;&gt;FastRP_Algorithm</title>
+<path fill="none" stroke="black" d="M539.75,-386.46C558.83,-391.37 583.75,-398.31 605.34,-406 631.91,-415.46 661.04,-428.18 683.3,-438.42"/>
+<polygon fill="black" stroke="black" points="681.72,-441.55 692.27,-442.59 684.67,-435.2 681.72,-441.55"/>
+</g>
+<!-- FastRP_Normalization_Strength&#45;&gt;FastRP_Algorithm -->
+<g id="edge12" class="edge">
+<title>FastRP_Normalization_Strength&#45;&gt;FastRP_Algorithm</title>
+<path fill="none" stroke="black" d="M571.25,-439.78C607.37,-443.71 651.69,-448.54 682.54,-451.9"/>
+<polygon fill="black" stroke="black" points="681.83,-455.35 692.15,-452.95 682.59,-448.39 681.83,-455.35"/>
+</g>
+<!-- FastRP_Forth_Iteration_Weight&#45;&gt;FastRP_Algorithm -->
+<g id="edge13" class="edge">
+<title>FastRP_Forth_Iteration_Weight&#45;&gt;FastRP_Algorithm</title>
+<path fill="none" stroke="black" d="M561.78,-479.26C599.35,-473.74 648.96,-466.45 682.56,-461.52"/>
+<polygon fill="black" stroke="black" points="682.85,-465.01 692.24,-460.1 681.83,-458.09 682.85,-465.01"/>
+</g>
+<!-- FastRP_Algorithm&#45;&gt;Tuning_FastRP -->
+<g id="edge14" class="edge">
+<title>FastRP_Algorithm&#45;&gt;Tuning_FastRP</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M693.69,-473.89C671.28,-487.91 637.7,-506.29 605.34,-514 521.23,-534.04 494.95,-533.83 410.79,-514 385.62,-508.07 359.62,-495.34 339.41,-483.73"/>
+<polygon fill="black" stroke="black" points="341.39,-480.84 331,-478.75 337.83,-486.86 341.39,-480.84"/>
+<text xml:space="preserve" text-anchor="middle" x="508.06" y="-549.95" font-family="Times,serif" font-size="14.00">adjusted mutual info score</text>
+<text xml:space="preserve" text-anchor="middle" x="508.06" y="-533.15" font-family="Times,serif" font-size="14.00">(incl. preview clustering)</text>
+</g>
+<!-- NodeEmbeddings -->
+<g id="node11" class="node">
+<title>NodeEmbeddings</title>
+<ellipse fill="white" stroke="black" cx="861.37" cy="-456" rx="59.1" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="861.37" y="-453" font-family="Helvetica,sans-Serif" font-size="10.00">Node Embeddings</text>
+</g>
+<!-- FastRP_Algorithm&#45;&gt;NodeEmbeddings -->
+<g id="edge15" class="edge">
+<title>FastRP_Algorithm&#45;&gt;NodeEmbeddings</title>
+<path fill="none" stroke="black" d="M748.45,-456C760.5,-456 775.41,-456 790.37,-456"/>
+<polygon fill="black" stroke="black" points="790.27,-459.5 800.27,-456 790.27,-452.5 790.27,-459.5"/>
+</g>
+<!-- UMAP_Algorithm -->
+<g id="node12" class="node">
+<title>UMAP_Algorithm</title>
+<polygon fill="white" stroke="black" points="1086.09,-474 1032.09,-474 1032.09,-438 1086.09,-438 1086.09,-474"/>
+<text xml:space="preserve" text-anchor="middle" x="1059.09" y="-453" font-family="Helvetica,sans-Serif" font-size="10.00">UMAP</text>
+</g>
+<!-- NodeEmbeddings&#45;&gt;UMAP_Algorithm -->
+<g id="edge16" class="edge">
+<title>NodeEmbeddings&#45;&gt;UMAP_Algorithm</title>
+<path fill="none" stroke="black" d="M920.83,-456C953.06,-456 992.13,-456 1020.33,-456"/>
+<polygon fill="black" stroke="black" points="1020.19,-459.5 1030.19,-456 1020.19,-452.5 1020.19,-459.5"/>
+</g>
+<!-- HDBSCAN_Node -->
+<g id="node15" class="node">
+<title>HDBSCAN_Node</title>
+<polygon fill="white" stroke="black" points="1091.53,-1140 1026.64,-1140 1026.64,-1104 1091.53,-1104 1091.53,-1140"/>
+<text xml:space="preserve" text-anchor="middle" x="1059.09" y="-1119" font-family="Helvetica,sans-Serif" font-size="10.00">HDBSCAN</text>
+</g>
+<!-- NodeEmbeddings&#45;&gt;HDBSCAN_Node -->
+<g id="edge19" class="edge">
+<title>NodeEmbeddings&#45;&gt;HDBSCAN_Node</title>
+<path fill="none" stroke="black" d="M867.71,-474.16C894.66,-565.88 1015.85,-978.28 1049.43,-1092.56"/>
+<polygon fill="black" stroke="black" points="1046.06,-1093.48 1052.23,-1102.09 1052.77,-1091.51 1046.06,-1093.48"/>
+</g>
+<!-- AnomalyPCA -->
+<g id="node37" class="node">
+<title>AnomalyPCA</title>
+<polygon fill="white" stroke="black" points="1485.47,-672 1376.64,-672 1376.64,-636 1485.47,-636 1485.47,-672"/>
+<text xml:space="preserve" text-anchor="middle" x="1431.05" y="-657" font-family="Helvetica,sans-Serif" font-size="10.00">Principal Component</text>
+<text xml:space="preserve" text-anchor="middle" x="1431.05" y="-645" font-family="Helvetica,sans-Serif" font-size="10.00">Analysis (PCA)</text>
+</g>
+<!-- NodeEmbeddings&#45;&gt;AnomalyPCA -->
+<g id="edge32" class="edge">
+<title>NodeEmbeddings&#45;&gt;AnomalyPCA</title>
+<path fill="none" stroke="black" d="M901.69,-442.42C922.18,-436.06 947.81,-429.18 971.34,-426 1049.56,-415.43 1266.87,-374.32 1326.54,-426 1394.77,-485.09 1298.56,-557 1355.54,-627 1358.62,-630.78 1362.27,-634.03 1366.28,-636.83"/>
+<polygon fill="black" stroke="black" points="1364.47,-639.82 1374.88,-641.81 1367.98,-633.77 1364.47,-639.82"/>
+</g>
+<!-- UMAP_Coordinates -->
+<g id="node13" class="node">
+<title>UMAP_Coordinates</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-456" rx="51.4" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-453" font-family="Helvetica,sans-Serif" font-size="10.00">2D Coordinates</text>
+</g>
+<!-- UMAP_Algorithm&#45;&gt;UMAP_Coordinates -->
+<g id="edge17" class="edge">
+<title>UMAP_Algorithm&#45;&gt;UMAP_Coordinates</title>
+<path fill="none" stroke="black" d="M1086.27,-456C1103.87,-456 1127.84,-456 1150.16,-456"/>
+<polygon fill="black" stroke="black" points="1150.13,-459.5 1160.13,-456 1150.13,-452.5 1150.13,-459.5"/>
+</g>
+<!-- HDBSCAN_Min_Cluster_Size -->
+<g id="node16" class="node">
+<title>HDBSCAN_Min_Cluster_Size</title>
+<polygon fill="white" stroke="black" points="861.37,-1140 785.86,-1122 861.37,-1104 936.89,-1122 861.37,-1140"/>
+<text xml:space="preserve" text-anchor="middle" x="861.37" y="-1119" font-family="Helvetica,sans-Serif" font-size="10.00">Min Cluster Size</text>
+</g>
+<!-- Tuning_HDBSCAN&#45;&gt;HDBSCAN_Min_Cluster_Size -->
+<g id="edge20" class="edge">
+<title>Tuning_HDBSCAN&#45;&gt;HDBSCAN_Min_Cluster_Size</title>
+<path fill="none" stroke="black" d="M749.11,-1122C756.08,-1122 763.97,-1122 772.26,-1122"/>
+<polygon fill="black" stroke="black" points="772.24,-1125.5 782.24,-1122 772.24,-1118.5 772.24,-1125.5"/>
+</g>
+<!-- HDBSCAN_Min_Samples -->
+<g id="node17" class="node">
+<title>HDBSCAN_Min_Samples</title>
+<polygon fill="white" stroke="black" points="861.37,-1194 798.61,-1176 861.37,-1158 924.13,-1176 861.37,-1194"/>
+<text xml:space="preserve" text-anchor="middle" x="861.37" y="-1173" font-family="Helvetica,sans-Serif" font-size="10.00">Min Samples</text>
+</g>
+<!-- Tuning_HDBSCAN&#45;&gt;HDBSCAN_Min_Samples -->
+<g id="edge21" class="edge">
+<title>Tuning_HDBSCAN&#45;&gt;HDBSCAN_Min_Samples</title>
+<path fill="none" stroke="black" d="M749.29,-1133.88C760.55,-1138.71 773.78,-1144.26 785.86,-1149 797.31,-1153.5 809.85,-1158.15 821.33,-1162.29"/>
+<polygon fill="black" stroke="black" points="819.91,-1165.5 830.51,-1165.58 822.27,-1158.92 819.91,-1165.5"/>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;Tuning_HDBSCAN -->
+<g id="edge24" class="edge">
+<title>HDBSCAN_Node&#45;&gt;Tuning_HDBSCAN</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1026.26,-1107.05C1002.29,-1096.65 968.29,-1083.7 936.89,-1078.2 870.77,-1066.62 849.96,-1058.3 785.86,-1078.2 773.39,-1082.07 761.2,-1089.32 750.87,-1096.82"/>
+<polygon fill="black" stroke="black" points="748.94,-1093.89 743.16,-1102.77 753.21,-1099.44 748.94,-1093.89"/>
+<text xml:space="preserve" text-anchor="middle" x="861.37" y="-1082.4" font-family="Times,serif" font-size="14.00">adjusted mutual info score</text>
+</g>
+<!-- ClusterLabel -->
+<g id="node18" class="node">
+<title>ClusterLabel</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-1165" rx="27" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1162" font-family="Helvetica,sans-Serif" font-size="10.00">Label</text>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;ClusterLabel -->
+<g id="edge25" class="edge">
+<title>HDBSCAN_Node&#45;&gt;ClusterLabel</title>
+<path fill="none" stroke="black" d="M1092.01,-1131.01C1116.93,-1138.05 1151.53,-1147.82 1177.24,-1155.08"/>
+<polygon fill="black" stroke="black" points="1175.97,-1158.36 1186.55,-1157.71 1177.88,-1151.62 1175.97,-1158.36"/>
+</g>
+<!-- ClusterRadius -->
+<g id="node19" class="node">
+<title>ClusterRadius</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-1106" rx="42.75" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1109" font-family="Helvetica,sans-Serif" font-size="10.00">Radius</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1097" font-family="Helvetica,sans-Serif" font-size="10.00">(avg,max)</text>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;ClusterRadius -->
+<g id="edge27" class="edge">
+<title>HDBSCAN_Node&#45;&gt;ClusterRadius</title>
+<path fill="none" stroke="black" d="M1092.01,-1118.65C1111.55,-1116.59 1137.02,-1113.92 1159.51,-1111.56"/>
+<polygon fill="black" stroke="black" points="1159.86,-1115.04 1169.44,-1110.51 1159.13,-1108.08 1159.86,-1115.04"/>
+</g>
+<!-- ClusterSize -->
+<g id="node20" class="node">
+<title>ClusterSize</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-1299" rx="27" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1296" font-family="Helvetica,sans-Serif" font-size="10.00">Size</text>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;ClusterSize -->
+<g id="edge28" class="edge">
+<title>HDBSCAN_Node&#45;&gt;ClusterSize</title>
+<path fill="none" stroke="black" d="M1063.22,-1140.07C1069.62,-1171.45 1087.1,-1236.09 1127.35,-1272 1140.59,-1283.81 1159.01,-1290.46 1175.32,-1294.2"/>
+<polygon fill="black" stroke="black" points="1174.22,-1297.55 1184.7,-1296.06 1175.58,-1290.68 1174.22,-1297.55"/>
+</g>
+<!-- NormDistToMedoid -->
+<g id="node21" class="node">
+<title>NormDistToMedoid</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-1358" rx="76.55" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1361" font-family="Helvetica,sans-Serif" font-size="10.00">Normalized Distance</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1349" font-family="Helvetica,sans-Serif" font-size="10.00">To Medoid</text>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;NormDistToMedoid -->
+<g id="edge29" class="edge">
+<title>HDBSCAN_Node&#45;&gt;NormDistToMedoid</title>
+<path fill="none" stroke="black" d="M1061.01,-1140.46C1063.95,-1179.82 1076.23,-1272.72 1127.35,-1326 1131.97,-1330.81 1137.38,-1334.91 1143.2,-1338.39"/>
+<polygon fill="black" stroke="black" points="1141.48,-1341.45 1151.96,-1342.99 1144.74,-1335.25 1141.48,-1341.45"/>
+</g>
+<!-- ClusterNoise -->
+<g id="node22" class="node">
+<title>ClusterNoise</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-988" rx="41.38" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-991" font-family="Helvetica,sans-Serif" font-size="10.00">Noise</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-979" font-family="Helvetica,sans-Serif" font-size="10.00">(label=&#45;1)</text>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;ClusterNoise -->
+<g id="edge26" class="edge">
+<title>HDBSCAN_Node&#45;&gt;ClusterNoise</title>
+<path fill="none" stroke="black" d="M1067.85,-1103.61C1078.28,-1080.99 1098.84,-1042.77 1127.35,-1020 1138.04,-1011.46 1151.24,-1005.04 1164.03,-1000.28"/>
+<polygon fill="black" stroke="black" points="1164.86,-1003.69 1173.18,-997.14 1162.59,-997.07 1164.86,-1003.69"/>
+</g>
+<!-- ClusterProbability -->
+<g id="node23" class="node">
+<title>ClusterProbability</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-1047" rx="37.7" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1044" font-family="Helvetica,sans-Serif" font-size="10.00">Probability</text>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;ClusterProbability -->
+<g id="edge30" class="edge">
+<title>HDBSCAN_Node&#45;&gt;ClusterProbability</title>
+<path fill="none" stroke="black" d="M1082,-1103.69C1094.63,-1093.84 1111.18,-1082.08 1127.35,-1074 1140.12,-1067.61 1154.72,-1062.33 1168.18,-1058.17"/>
+<polygon fill="black" stroke="black" points="1169.09,-1061.56 1177.69,-1055.38 1167.11,-1054.84 1169.09,-1061.56"/>
+</g>
+<!-- ClusterApproximationOutlierScore -->
+<g id="node24" class="node">
+<title>ClusterApproximationOutlierScore</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-1232" rx="65.35" ry="31.11"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1241" font-family="Helvetica,sans-Serif" font-size="10.00">Approximation</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1229" font-family="Helvetica,sans-Serif" font-size="10.00">OutlierScore</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-1217" font-family="Helvetica,sans-Serif" font-size="10.00">(= 1 &#45; Probability)</text>
+</g>
+<!-- HDBSCAN_Node&#45;&gt;ClusterApproximationOutlierScore -->
+<g id="edge31" class="edge">
+<title>HDBSCAN_Node&#45;&gt;ClusterApproximationOutlierScore</title>
+<path fill="none" stroke="black" d="M1074.57,-1140.48C1087.25,-1155.76 1106.81,-1177.26 1127.35,-1192 1135.07,-1197.54 1143.67,-1202.63 1152.36,-1207.18"/>
+<polygon fill="black" stroke="black" points="1150.69,-1210.26 1161.2,-1211.6 1153.82,-1204 1150.69,-1210.26"/>
+</g>
+<!-- HDBSCAN_Min_Cluster_Size&#45;&gt;HDBSCAN_Node -->
+<g id="edge22" class="edge">
+<title>HDBSCAN_Min_Cluster_Size&#45;&gt;HDBSCAN_Node</title>
+<path fill="none" stroke="black" d="M938.88,-1122C964.69,-1122 992.58,-1122 1014.89,-1122"/>
+<polygon fill="black" stroke="black" points="1014.82,-1125.5 1024.82,-1122 1014.82,-1118.5 1014.82,-1125.5"/>
+</g>
+<!-- HDBSCAN_Min_Samples&#45;&gt;HDBSCAN_Node -->
+<g id="edge23" class="edge">
+<title>HDBSCAN_Min_Samples&#45;&gt;HDBSCAN_Node</title>
+<path fill="none" stroke="black" d="M894.69,-1167.08C927.75,-1157.96 979.28,-1143.74 1015.62,-1133.72"/>
+<polygon fill="black" stroke="black" points="1016.2,-1137.19 1024.91,-1131.15 1014.34,-1130.44 1016.2,-1137.19"/>
+</g>
+<!-- AnomalyStandardizer -->
+<g id="node36" class="node">
+<title>AnomalyStandardizer</title>
+<polygon fill="white" stroke="black" points="1467.4,-618 1394.7,-618 1394.7,-582 1467.4,-582 1467.4,-618"/>
+<text xml:space="preserve" text-anchor="middle" x="1431.05" y="-597" font-family="Helvetica,sans-Serif" font-size="10.00">Standardizer</text>
+</g>
+<!-- ClusterRadius&#45;&gt;AnomalyStandardizer -->
+<g id="edge33" class="edge">
+<title>ClusterRadius&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1256.58,-1105.33C1280.91,-1102.54 1309.88,-1094.64 1326.54,-1074 1389.04,-996.53 1295.78,-706.61 1355.54,-627 1362.42,-617.84 1372.7,-611.78 1383.45,-607.78"/>
+<polygon fill="black" stroke="black" points="1384.29,-611.18 1392.8,-604.87 1382.21,-604.5 1384.29,-611.18"/>
+</g>
+<!-- NormDistToMedoid&#45;&gt;AnomalyStandardizer -->
+<g id="edge34" class="edge">
+<title>NormDistToMedoid&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1286.63,-1351.07C1301.92,-1346.22 1316.46,-1338.42 1326.54,-1326 1375.53,-1265.65 1309.86,-689.9 1355.54,-627 1362.33,-617.65 1372.69,-611.53 1383.54,-607.52"/>
+<polygon fill="black" stroke="black" points="1384.46,-610.9 1393,-604.63 1382.41,-604.21 1384.46,-610.9"/>
+</g>
+<!-- ClusterApproximationOutlierScore&#45;&gt;AnomalyStandardizer -->
+<g id="edge35" class="edge">
+<title>ClusterApproximationOutlierScore&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1276.58,-1223.49C1295.57,-1217.73 1314.57,-1208.09 1326.54,-1192 1364.07,-1141.57 1318.14,-677.52 1355.54,-627 1362.42,-617.71 1372.8,-611.61 1383.65,-607.61"/>
+<polygon fill="black" stroke="black" points="1384.57,-610.99 1393.1,-604.7 1382.52,-604.29 1384.57,-610.99"/>
+</g>
+<!-- ArticleRank -->
+<g id="node25" class="node">
+<title>ArticleRank</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-341" rx="40.37" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-338" font-family="Helvetica,sans-Serif" font-size="10.00">ArticleRank</text>
+</g>
+<!-- ArticleRank&#45;&gt;AnomalyStandardizer -->
+<g id="edge36" class="edge">
+<title>ArticleRank&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1249.63,-349.39C1276.65,-357.4 1313.3,-371.9 1337.54,-396 1387.45,-445.62 1413.03,-527.04 1423.73,-570.5"/>
+<polygon fill="black" stroke="black" points="1420.32,-571.28 1426.02,-580.22 1427.14,-569.68 1420.32,-571.28"/>
+</g>
+<!-- PageRank -->
+<g id="node26" class="node">
+<title>PageRank</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-287" rx="37.7" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-284" font-family="Helvetica,sans-Serif" font-size="10.00">PageRank</text>
+</g>
+<!-- PageRank&#45;&gt;AnomalyStandardizer -->
+<g id="edge37" class="edge">
+<title>PageRank&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1251.57,-287.24C1275.63,-289.31 1306.17,-295.77 1326.54,-314 1403.14,-382.54 1423.1,-512.2 1428.27,-570.41"/>
+<polygon fill="black" stroke="black" points="1424.76,-570.5 1429.04,-580.19 1431.74,-569.94 1424.76,-570.5"/>
+</g>
+<!-- PageRank_minus_ArticleRank -->
+<g id="node27" class="node">
+<title>PageRank_minus_ArticleRank</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-228" rx="48.65" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-231" font-family="Helvetica,sans-Serif" font-size="10.00">PageRank &#45;</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-219" font-family="Helvetica,sans-Serif" font-size="10.00">ArticleRank</text>
+</g>
+<!-- PageRank_minus_ArticleRank&#45;&gt;AnomalyStandardizer -->
+<g id="edge38" class="edge">
+<title>PageRank_minus_ArticleRank&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1261.88,-231.22C1284.12,-235.08 1309.42,-243.18 1326.54,-260 1414.4,-346.33 1428.18,-504.78 1429.99,-570.37"/>
+<polygon fill="black" stroke="black" points="1426.49,-570.4 1430.18,-580.33 1433.49,-570.27 1426.49,-570.4"/>
+</g>
+<!-- BetweennessCentrality -->
+<g id="node28" class="node">
+<title>BetweennessCentrality</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-165" rx="53.77" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-168" font-family="Helvetica,sans-Serif" font-size="10.00">Betweenness</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-156" font-family="Helvetica,sans-Serif" font-size="10.00">Centrality</text>
+</g>
+<!-- BetweennessCentrality&#45;&gt;AnomalyStandardizer -->
+<g id="edge39" class="edge">
+<title>BetweennessCentrality&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1267.03,-168.27C1288.08,-172.19 1310.97,-180.14 1326.54,-196 1379.71,-250.18 1415.23,-486.65 1426.34,-570.44"/>
+<polygon fill="black" stroke="black" points="1422.84,-570.69 1427.6,-580.15 1429.78,-569.78 1422.84,-570.69"/>
+</g>
+<!-- LocalClusteringCoefficient -->
+<g id="node29" class="node">
+<title>LocalClusteringCoefficient</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-102" rx="62.02" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-105" font-family="Helvetica,sans-Serif" font-size="10.00">Local Clustering</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-93" font-family="Helvetica,sans-Serif" font-size="10.00">Coefficient</text>
+</g>
+<!-- LocalClusteringCoefficient&#45;&gt;AnomalyStandardizer -->
+<g id="edge40" class="edge">
+<title>LocalClusteringCoefficient&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1274.53,-106.51C1293.47,-110.82 1312.97,-118.72 1326.54,-133 1387.43,-197.11 1418.64,-477.68 1427.42,-570.25"/>
+<polygon fill="black" stroke="black" points="1423.92,-570.4 1428.33,-580.04 1430.89,-569.76 1423.92,-570.4"/>
+</g>
+<!-- Degree -->
+<g id="node30" class="node">
+<title>Degree</title>
+<ellipse fill="white" stroke="black" cx="1213.38" cy="-39" rx="52.57" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-42" font-family="Helvetica,sans-Serif" font-size="10.00">Degree</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-30" font-family="Helvetica,sans-Serif" font-size="10.00">(in, out, sum)</text>
+</g>
+<!-- Degree&#45;&gt;AnomalyStandardizer -->
+<g id="edge41" class="edge">
+<title>Degree&#45;&gt;AnomalyStandardizer</title>
+<path fill="none" stroke="black" d="M1266.04,-41.58C1287.57,-45.32 1311.1,-53.32 1326.54,-70 1395.25,-144.21 1421.67,-469.78 1428.28,-570.32"/>
+<polygon fill="black" stroke="black" points="1424.78,-570.42 1428.91,-580.18 1431.76,-569.98 1424.78,-570.42"/>
+</g>
+<!-- TuningAnomaly -->
+<g id="node31" class="node">
+<title>TuningAnomaly</title>
+<polygon fill="white" stroke="black" points="1241.12,-817 1185.65,-817 1185.65,-781 1241.12,-781 1241.12,-817"/>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-802" font-family="Helvetica,sans-Serif" font-size="10.00">Tuning</text>
+<text xml:space="preserve" text-anchor="middle" x="1213.38" y="-790" font-family="Helvetica,sans-Serif" font-size="10.00">(Optuna)</text>
+</g>
+<!-- IsolationMinCluster -->
+<g id="node32" class="node">
+<title>IsolationMinCluster</title>
+<polygon fill="white" stroke="black" points="1431.05,-726 1355.54,-708 1431.05,-690 1506.57,-708 1431.05,-726"/>
+<text xml:space="preserve" text-anchor="middle" x="1431.05" y="-705" font-family="Helvetica,sans-Serif" font-size="10.00">Min Cluster Size</text>
+</g>
+<!-- TuningAnomaly&#45;&gt;IsolationMinCluster -->
+<g id="edge42" class="edge">
+<title>TuningAnomaly&#45;&gt;IsolationMinCluster</title>
+<path fill="none" stroke="black" d="M1241.43,-785.97C1269.65,-772.57 1315.29,-751.39 1355.54,-735 1365.94,-730.76 1377.29,-726.48 1387.92,-722.62"/>
+<polygon fill="black" stroke="black" points="1389.01,-725.94 1397.24,-719.27 1386.65,-719.36 1389.01,-725.94"/>
+</g>
+<!-- IsolationEstimators -->
+<g id="node33" class="node">
+<title>IsolationEstimators</title>
+<polygon fill="white" stroke="black" points="1431.05,-780 1371.13,-762 1431.05,-744 1490.98,-762 1431.05,-780"/>
+<text xml:space="preserve" text-anchor="middle" x="1431.05" y="-759" font-family="Helvetica,sans-Serif" font-size="10.00">n estimators</text>
+</g>
+<!-- TuningAnomaly&#45;&gt;IsolationEstimators -->
+<g id="edge43" class="edge">
+<title>TuningAnomaly&#45;&gt;IsolationEstimators</title>
+<path fill="none" stroke="black" d="M1241.26,-794.39C1275.99,-788.43 1337.11,-777.95 1380.39,-770.52"/>
+<polygon fill="black" stroke="black" points="1380.77,-774.01 1390.04,-768.87 1379.59,-767.11 1380.77,-774.01"/>
+</g>
+<!-- ProxyEstimators -->
+<g id="node34" class="node">
+<title>ProxyEstimators</title>
+<polygon fill="white" stroke="black" points="1584.49,-840 1524.57,-822 1584.49,-804 1644.42,-822 1584.49,-840"/>
+<text xml:space="preserve" text-anchor="middle" x="1584.49" y="-819" font-family="Helvetica,sans-Serif" font-size="10.00">n estimators</text>
+</g>
+<!-- TuningAnomaly&#45;&gt;ProxyEstimators -->
+<g id="edge46" class="edge">
+<title>TuningAnomaly&#45;&gt;ProxyEstimators</title>
+<path fill="none" stroke="black" d="M1241.31,-806.14C1265.9,-812.28 1303.37,-820.64 1336.54,-824 1411.72,-831.61 1431.01,-824.98 1506.57,-824 1510,-823.96 1513.51,-823.9 1517.06,-823.84"/>
+<polygon fill="black" stroke="black" points="1517.01,-827.34 1526.94,-823.64 1516.87,-820.34 1517.01,-827.34"/>
+</g>
+<!-- ProxyMaxDepth -->
+<g id="node35" class="node">
+<title>ProxyMaxDepth</title>
+<polygon fill="white" stroke="black" points="1584.49,-742 1531.18,-724 1584.49,-706 1637.81,-724 1584.49,-742"/>
+<text xml:space="preserve" text-anchor="middle" x="1584.49" y="-721" font-family="Helvetica,sans-Serif" font-size="10.00">max depth</text>
+</g>
+<!-- TuningAnomaly&#45;&gt;ProxyMaxDepth -->
+<g id="edge47" class="edge">
+<title>TuningAnomaly&#45;&gt;ProxyMaxDepth</title>
+<path fill="none" stroke="black" d="M1241.32,-801.66C1305.29,-807.41 1464.18,-818.27 1506.57,-789 1523.62,-777.23 1509.64,-760.36 1524.57,-746 1529.05,-741.7 1534.44,-738.22 1540.13,-735.41"/>
+<polygon fill="black" stroke="black" points="1541.27,-738.73 1549.14,-731.64 1538.57,-732.27 1541.27,-738.73"/>
+</g>
+<!-- IsolationForest -->
+<g id="node38" class="node">
+<title>IsolationForest</title>
+<polygon fill="white" stroke="black" points="1799.49,-725.8 1662.42,-725.8 1662.42,-644.2 1799.49,-644.2 1799.49,-725.8"/>
+<text xml:space="preserve" text-anchor="middle" x="1730.95" y="-688" font-family="Helvetica,sans-Serif" font-size="10.00">Isolation Forest</text>
+<text xml:space="preserve" text-anchor="middle" x="1730.95" y="-676" font-family="Helvetica,sans-Serif" font-size="10.00">Anomaly Detector</text>
+</g>
+<!-- IsolationMinCluster&#45;&gt;IsolationForest -->
+<g id="edge44" class="edge">
+<title>IsolationMinCluster&#45;&gt;IsolationForest</title>
+<path fill="none" stroke="black" d="M1474.75,-699.96C1490.37,-697.32 1508.22,-694.64 1524.57,-693 1566.26,-688.82 1612.91,-686.8 1651.2,-685.82"/>
+<polygon fill="black" stroke="black" points="1650.85,-689.33 1660.77,-685.6 1650.69,-682.33 1650.85,-689.33"/>
+</g>
+<!-- IsolationEstimators&#45;&gt;IsolationForest -->
+<g id="edge45" class="edge">
+<title>IsolationEstimators&#45;&gt;IsolationForest</title>
+<path fill="none" stroke="black" d="M1467.74,-754.65C1481.03,-750.56 1495.55,-744.34 1506.57,-735 1519.02,-724.45 1510.84,-711.82 1524.57,-703 1547.06,-688.56 1617.79,-695.36 1644.42,-693 1646.59,-692.81 1648.79,-692.61 1651.02,-692.41"/>
+<polygon fill="black" stroke="black" points="1651.11,-695.92 1660.75,-691.53 1650.47,-688.95 1651.11,-695.92"/>
+</g>
+<!-- ProxyRandomForest -->
+<g id="node39" class="node">
+<title>ProxyRandomForest</title>
+<polygon fill="white" stroke="black" points="1969.69,-766 1887.55,-766 1887.55,-730 1969.69,-730 1969.69,-766"/>
+<text xml:space="preserve" text-anchor="middle" x="1928.62" y="-751" font-family="Helvetica,sans-Serif" font-size="10.00">RandomForest</text>
+<text xml:space="preserve" text-anchor="middle" x="1928.62" y="-739" font-family="Helvetica,sans-Serif" font-size="10.00">(Proxy)</text>
+</g>
+<!-- ProxyEstimators&#45;&gt;ProxyRandomForest -->
+<g id="edge48" class="edge">
+<title>ProxyEstimators&#45;&gt;ProxyRandomForest</title>
+<path fill="none" stroke="black" d="M1627.91,-816.68C1683.91,-809.1 1785.27,-793.47 1869.55,-770 1871.82,-769.37 1874.13,-768.68 1876.45,-767.96"/>
+<polygon fill="black" stroke="black" points="1877.5,-771.3 1885.88,-764.82 1875.29,-764.66 1877.5,-771.3"/>
+</g>
+<!-- ProxyMaxDepth&#45;&gt;ProxyRandomForest -->
+<g id="edge49" class="edge">
+<title>ProxyMaxDepth&#45;&gt;ProxyRandomForest</title>
+<path fill="none" stroke="black" d="M1621.8,-729.9C1634.6,-731.79 1649.12,-733.73 1662.42,-735 1736.37,-742.09 1822.33,-745.4 1875.85,-746.89"/>
+<polygon fill="black" stroke="black" points="1875.49,-750.38 1885.58,-747.14 1875.68,-743.38 1875.49,-750.38"/>
+</g>
+<!-- AnomalyStandardizer&#45;&gt;IsolationForest -->
+<g id="edge50" class="edge">
+<title>AnomalyStandardizer&#45;&gt;IsolationForest</title>
+<path fill="none" stroke="black" d="M1467.78,-609.99C1510,-621.82 1582.37,-642.17 1644.42,-660 1646.68,-660.65 1648.98,-661.31 1651.3,-661.98"/>
+<polygon fill="black" stroke="black" points="1650.23,-665.32 1660.81,-664.74 1652.18,-658.59 1650.23,-665.32"/>
+</g>
+<!-- AnomalyPCA&#45;&gt;IsolationForest -->
+<g id="edge51" class="edge">
+<title>AnomalyPCA&#45;&gt;IsolationForest</title>
+<path fill="none" stroke="black" d="M1485.66,-659.58C1531.7,-664.37 1598.7,-671.34 1650.9,-676.77"/>
+<polygon fill="black" stroke="black" points="1650.32,-680.23 1660.63,-677.79 1651.04,-673.27 1650.32,-680.23"/>
+</g>
+<!-- IsolationForest&#45;&gt;ProxyRandomForest -->
+<g id="edge52" class="edge">
+<title>IsolationForest&#45;&gt;ProxyRandomForest</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1799.69,-701.89C1822.13,-707.99 1847.09,-715.33 1869.55,-723.2 1871.9,-724.03 1874.3,-724.9 1876.7,-725.81"/>
+<polygon fill="black" stroke="black" points="1875.28,-729.01 1885.87,-729.41 1877.84,-722.49 1875.28,-729.01"/>
+<text xml:space="preserve" text-anchor="middle" x="1843.52" y="-727.4" font-family="Times,serif" font-size="14.00">reference</text>
+</g>
+<!-- AnomalyScore -->
+<g id="node40" class="node">
+<title>AnomalyScore</title>
+<ellipse fill="white" stroke="black" cx="1928.62" cy="-637" rx="27" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1928.62" y="-634" font-family="Helvetica,sans-Serif" font-size="10.00">Score</text>
+</g>
+<!-- IsolationForest&#45;&gt;AnomalyScore -->
+<g id="edge55" class="edge">
+<title>IsolationForest&#45;&gt;AnomalyScore</title>
+<path fill="none" stroke="black" d="M1799.84,-668.35C1830.85,-660.74 1866.31,-652.04 1891.99,-645.74"/>
+<polygon fill="black" stroke="black" points="1892.7,-649.17 1901.58,-643.39 1891.03,-642.37 1892.7,-649.17"/>
+</g>
+<!-- AnomalyLabel -->
+<g id="node41" class="node">
+<title>AnomalyLabel</title>
+<ellipse fill="white" stroke="black" cx="1928.62" cy="-691" rx="27" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="1928.62" y="-688" font-family="Helvetica,sans-Serif" font-size="10.00">Label</text>
+</g>
+<!-- IsolationForest&#45;&gt;AnomalyLabel -->
+<g id="edge54" class="edge">
+<title>IsolationForest&#45;&gt;AnomalyLabel</title>
+<path fill="none" stroke="black" d="M1799.84,-687.08C1830.14,-688.01 1864.68,-689.07 1890.21,-689.85"/>
+<polygon fill="black" stroke="black" points="1889.99,-693.35 1900.09,-690.16 1890.2,-686.35 1889.99,-693.35"/>
+</g>
+<!-- ProxyRandomForest&#45;&gt;TuningAnomaly -->
+<g id="edge53" class="edge">
+<title>ProxyRandomForest&#45;&gt;TuningAnomaly</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1887.33,-750.19C1881.37,-750.49 1875.3,-750.76 1869.55,-751 1850.39,-751.78 1540.39,-750.56 1524.57,-761.4 1507.61,-773.02 1523.39,-792.19 1506.57,-804 1475.78,-825.61 1375.14,-812 1337.54,-813 1308.89,-813.76 1276.71,-809.94 1252.43,-806.1"/>
+<polygon fill="black" stroke="black" points="1253.13,-802.67 1242.69,-804.48 1251.98,-809.57 1253.13,-802.67"/>
+<text xml:space="preserve" text-anchor="middle" x="1584.49" y="-782.4" font-family="Times,serif" font-size="14.00">f1 score</text>
+<text xml:space="preserve" text-anchor="middle" x="1584.49" y="-765.6" font-family="Times,serif" font-size="14.00">(cross validation)</text>
+</g>
+<!-- SHAP -->
+<g id="node42" class="node">
+<title>SHAP</title>
+<polygon fill="white" stroke="black" points="2148.95,-766 2046.69,-766 2046.69,-730 2154.95,-730 2154.95,-760 2148.95,-766"/>
+<polyline fill="none" stroke="black" points="2148.95,-766 2148.95,-760"/>
+<polyline fill="none" stroke="black" points="2154.95,-760 2148.95,-760"/>
+<text xml:space="preserve" text-anchor="middle" x="2100.82" y="-745" font-family="Helvetica,sans-Serif" font-size="10.00">SHAP TreeExplainer</text>
+</g>
+<!-- ProxyRandomForest&#45;&gt;SHAP -->
+<g id="edge56" class="edge">
+<title>ProxyRandomForest&#45;&gt;SHAP</title>
+<path fill="none" stroke="black" d="M1969.95,-748C1989.37,-748 2013.14,-748 2035.01,-748"/>
+<polygon fill="black" stroke="black" points="2034.73,-751.5 2044.73,-748 2034.73,-744.5 2034.73,-751.5"/>
+</g>
+<!-- SHAP_Values -->
+<g id="node43" class="node">
+<title>SHAP_Values</title>
+<ellipse fill="white" stroke="black" cx="2261.44" cy="-694" rx="58.09" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="2261.44" y="-691" font-family="Helvetica,sans-Serif" font-size="10.00">Top SHAP Values</text>
+</g>
+<!-- SHAP&#45;&gt;SHAP_Values -->
+<g id="edge57" class="edge">
+<title>SHAP&#45;&gt;SHAP_Values</title>
+<path fill="none" stroke="black" d="M2155.08,-729.87C2172.96,-723.78 2192.84,-717.01 2210.47,-711.01"/>
+<polygon fill="black" stroke="black" points="2211.34,-714.41 2219.68,-707.88 2209.09,-707.78 2211.34,-714.41"/>
+</g>
+<!-- SHAP_Features -->
+<g id="node44" class="node">
+<title>SHAP_Features</title>
+<ellipse fill="white" stroke="black" cx="2261.44" cy="-748" rx="45.38" ry="18"/>
+<text xml:space="preserve" text-anchor="middle" x="2261.44" y="-745" font-family="Helvetica,sans-Serif" font-size="10.00">Top Features</text>
+</g>
+<!-- SHAP&#45;&gt;SHAP_Features -->
+<g id="edge58" class="edge">
+<title>SHAP&#45;&gt;SHAP_Features</title>
+<path fill="none" stroke="black" d="M2155.08,-748C2170.94,-748 2188.36,-748 2204.4,-748"/>
+<polygon fill="black" stroke="black" points="2204.27,-751.5 2214.27,-748 2204.27,-744.5 2204.27,-751.5"/>
+</g>
+<!-- SHAP_Embedding_Sum -->
+<g id="node45" class="node">
+<title>SHAP_Embedding_Sum</title>
+<ellipse fill="white" stroke="black" cx="2261.44" cy="-807" rx="69.49" ry="22.63"/>
+<text xml:space="preserve" text-anchor="middle" x="2261.44" y="-810" font-family="Helvetica,sans-Serif" font-size="10.00">Node Embeddings</text>
+<text xml:space="preserve" text-anchor="middle" x="2261.44" y="-798" font-family="Helvetica,sans-Serif" font-size="10.00">SHAP Sum</text>
+</g>
+<!-- SHAP&#45;&gt;SHAP_Embedding_Sum -->
+<g id="edge59" class="edge">
+<title>SHAP&#45;&gt;SHAP_Embedding_Sum</title>
+<path fill="none" stroke="black" d="M2151.14,-766.34C2167.93,-772.59 2186.83,-779.62 2204.13,-786.06"/>
+<polygon fill="black" stroke="black" points="2202.65,-789.24 2213.24,-789.45 2205.09,-782.68 2202.65,-789.24"/>
+</g>
+</g>
+</svg>
+
diff --git a/domains/anomaly-detection/documentation/README.md b/domains/anomaly-detection/documentation/README.md
new file mode 100644
index 000000000..0432ff144
--- /dev/null
+++ b/domains/anomaly-detection/documentation/README.md
@@ -0,0 +1,11 @@
+# Documentation for Anomaly Detection Domain
+
+This directory contains resources and documentation related to the Anomaly Detection domain within the Code Graph Analysis Pipeline project.
+
+## Generate Architecture Diagram
+
+To generate the architecture diagram for the Anomaly Detection domain, you can use the [renderArchitecture.sh](./renderArchitecture.sh) script in this directory. It utilizes Graphviz to create a visual representations of the anomaly detection pipeline architecture described in [Architecture.gv](./Architecture.gv) to render a SVG file.
+
+The generated SVG file will also be added to the summary report Appendix section.
+
+:warning: Currently, the architecture description in `Architecture.gv` is manually maintained. The same applies to the SVG file, that needs to be regenerated manually when changes are made to the `.gv` file.
diff --git a/domains/anomaly-detection/documentation/renderArchitecture.sh b/domains/anomaly-detection/documentation/renderArchitecture.sh
new file mode 100755
index 000000000..73ef7142f
--- /dev/null
+++ b/domains/anomaly-detection/documentation/renderArchitecture.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+# Renders the described Graph in Architecture.gv as a SVG image.
+#
+# Requires renderGraphVizSVG.sh
+#
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -o errexit -o pipefail
+
+## Get this "scripts/reports" directory if not already set
+# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
+# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
+# This way non-standard tools like readlink aren't needed.
+ANOMALY_DETECTION_DOCS_DIR=${ANOMALY_DETECTION_DOCS_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)} # Directory containing documentation for the anomaly detection
+# Get the "scripts" directory by taking the path of this script and going one directory up.
+SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_DOCS_DIR}/../../../scripts"} # Repository directory containing the shell scripts
+# Get the "scripts/visualization" directory.
+VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR:-"${SCRIPTS_DIR}/visualization"} # Repository directory containing the shell scripts for visualization
+
+source "${VISUALIZATION_SCRIPTS_DIR}/renderGraphVizSVG.sh" "${ANOMALY_DETECTION_DOCS_DIR}/Architecture.gv"
\ No newline at end of file
diff --git a/renovate.json b/renovate.json
index 1fbc4eec6..ba0a808af 100644
--- a/renovate.json
+++ b/renovate.json
@@ -178,6 +178,7 @@
       "fileMatch": [
         "^scripts/[^/]*\\.sh$",
         "^scripts/visualization/[^/]*\\.sh$",
+        "^domains/anomaly-detection/documentation/[^/]*\\.sh$",
         "^(workflow-templates|\\.github/workflows)\\/[^/]+\\.ya?ml$",
         "(^|\\/)action\\.ya?ml$]"
       ],
diff --git a/scripts/visualization/renderGraphVizSVG.sh b/scripts/visualization/renderGraphVizSVG.sh
new file mode 100755
index 000000000..164cb7ce2
--- /dev/null
+++ b/scripts/visualization/renderGraphVizSVG.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+# Renders the given GraphViz file as a SVG image.
+#
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -o errexit -o pipefail
+
+# Local constants
+SCRIPT_NAME=$(basename "${0}")
+
+# Read the first unnamed input argument containing the version of the project
+inputGvFileName="${1}"
+
+if [ -z "${inputGvFileName}" ]; then
+  echo "${SCRIPT_NAME}: Error: Please specify the GraphViz *.gv file as input parameter."
+  exit 1
+fi
+
+if [ ! -f "${inputGvFileName}" ]; then
+  echo "${SCRIPT_NAME}: Error: GraphViz file not found: ${inputGvFileName}"
+  exit 1
+fi
+
+number_of_input_file_lines=$(wc -l < "${inputGvFileName}" | awk '{print $1}')
+if [ "${number_of_input_file_lines}" -le 1 ]; then
+  echo "${SCRIPT_NAME}: Info: Input file is empty. Skipping *.svg file generation."
+  return 0
+fi
+
+echo "${SCRIPT_NAME}: Rendering ${inputGvFileName}..."
+
+graphName=$(basename -- "${inputGvFileName}")
+graphName="${graphName%.*}" # Remove file extension
+graphName=${graphName//-/_} # Replace all dashes in the graphName by underscores
+inputGvFilePath=$(dirname "${inputGvFileName}")
+
+if command -v "dot" &> /dev/null ; then
+    echo "${SCRIPT_NAME}: Info: Rendering ${inputGvFileName} using preinstalled GraphViz dot command line interface..."
+    dot -T svg "${inputGvFilePath}/${graphName}.gv" > "${inputGvFilePath}/${graphName}.svg"
+    return 0
+fi
+
+if ! command -v "npx" &> /dev/null ; then
+    echo "${SCRIPT_NAME}: Error: Command npx (to run npm locally) not found. It's needed for Graph visualization with GraphViz." >&2
+    exit 1
+fi
+
+# Run GraphViz command line interface (CLI) wrapped utilizing WASM (WebAssembly) 
+# to convert the DOT file to SVG operating system independently.
+# Use "npm install" first to create local "node_modules" and be able to run it after that in offline mode.
+echo "${SCRIPT_NAME}: Info: Rendering ${inputGvFileName} using npx to run GraphViz CLI Web Assembly Wrapper..."
+npm install @hpcc-js/wasm-graphviz-cli@1.2.6 --silent --no-progress --loglevel=error > /dev/null
+npx --yes @hpcc-js/wasm-graphviz-cli@1.6.0 -T svg "${inputGvFilePath}/${graphName}.gv" > "${inputGvFilePath}/${graphName}.svg"
\ No newline at end of file

From 98c842564291b63ede4d9ab5aac6e9b67bce68f0 Mon Sep 17 00:00:00 2001
From: JohT <7671054+JohT@users.noreply.github.com>
Date: Sat, 22 Nov 2025 20:47:03 +0100
Subject: [PATCH 2/3] Add pipeline architecture overview to Markdown summary
 report

---
 domains/anomaly-detection/summary/anomalyDetectionSummary.sh | 3 +++
 domains/anomaly-detection/summary/report.template.md         | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/domains/anomaly-detection/summary/anomalyDetectionSummary.sh b/domains/anomaly-detection/summary/anomalyDetectionSummary.sh
index c542c22cd..429b44d19 100755
--- a/domains/anomaly-detection/summary/anomalyDetectionSummary.sh
+++ b/domains/anomaly-detection/summary/anomalyDetectionSummary.sh
@@ -23,6 +23,8 @@ MARKDOWN_INCLUDES_DIRECTORY=${MARKDOWN_INCLUDES_DIRECTORY:-"includes"} # Subdire
 # This way non-standard tools like readlink aren't needed.
 ANOMALY_DETECTION_SUMMARY_DIR=${ANOMALY_DETECTION_SUMMARY_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
 #echo "anomalyDetectionSummary: ANOMALY_DETECTION_SUMMARY_DIR=${ANOMALY_DETECTION_SUMMARY_DIR}"
+ANOMALY_DETECTION_DOCS_DIR="${ANOMALY_DETECTION_SUMMARY_DIR}/../documentation"
+#echo "anomalyDetectionSummary: ANOMALY_DETECTION_DOCS_DIR=${ANOMALY_DETECTION_DOCS_DIR}"
 # Get the "scripts" directory by taking the path of this script and going one directory up.
 SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_SUMMARY_DIR}/../../../scripts"} # Repository directory containing the shell scripts
 
@@ -180,6 +182,7 @@ anomaly_detection_finalize_report() {
     # Collect static Markdown includes (after cleanup to not remove one-liner)
     cp -f "${ANOMALY_DETECTION_SUMMARY_DIR}/report_no_dependency_data.template.md" "${report_include_directory}/report_no_dependency_data.md"
     cp -f "${ANOMALY_DETECTION_SUMMARY_DIR}/report_no_anomaly_detection_treemaps.template.md" "${report_include_directory}/report_no_anomaly_detection_treemaps.md"
+    cp -f "${ANOMALY_DETECTION_DOCS_DIR}/Architecture.svg" "${FULL_REPORT_DIRECTORY}/AnomalyDetectionArchitecture.svg"
 
     # Assemble final report by applying includes to the main template
     cp -f "${ANOMALY_DETECTION_SUMMARY_DIR}/report.template.md" "${FULL_REPORT_DIRECTORY}/report.template.md"
diff --git a/domains/anomaly-detection/summary/report.template.md b/domains/anomaly-detection/summary/report.template.md
index 407cfc5a1..164bec860 100644
--- a/domains/anomaly-detection/summary/report.template.md
+++ b/domains/anomaly-detection/summary/report.template.md
@@ -268,3 +268,7 @@ archetypes:
 * Cluster Radius (avg, max)
 * Cluster Size
 * Node Embedding (PCA 20–35 dims)
+
+### 6.3 Architecture Diagram
+
+![Anomaly Detection Architecture](./AnomalyDetectionArchitecture.svg)
\ No newline at end of file

From a53aa52deca0a9a6193e7dcff4c2157ddd0a716c Mon Sep 17 00:00:00 2001
From: JohT <7671054+JohT@users.noreply.github.com>
Date: Sun, 23 Nov 2025 10:40:59 +0100
Subject: [PATCH 3/3] Add anomaly detector input feature visualization

---
 ...yDetectionIsolationForestExploration.ipynb | 289 +++++++++++++++++-
 .../tunedAnomalyDetectionExplained.py         | 142 ++++++++-
 2 files changed, 419 insertions(+), 12 deletions(-)

diff --git a/domains/anomaly-detection/explore/AnomalyDetectionIsolationForestExploration.ipynb b/domains/anomaly-detection/explore/AnomalyDetectionIsolationForestExploration.ipynb
index 757966892..c354ba369 100644
--- a/domains/anomaly-detection/explore/AnomalyDetectionIsolationForestExploration.ipynb
+++ b/domains/anomaly-detection/explore/AnomalyDetectionIsolationForestExploration.ipynb
@@ -67,6 +67,7 @@
     "from optuna import Study, create_study\n",
     "\n",
     "import shap # Explainable AI tool\n",
+    "import umap\n",
     "\n",
     "import matplotlib.pyplot as plot"
    ]
@@ -921,6 +922,7 @@
     "    cluster_label_column: str = \"clusterLabel\",\n",
     "    cluster_medoid_column: str = \"clusterMedoid\",\n",
     "    cluster_size_column: str = \"clusterSize\",\n",
+    "    cluster_color_map: str = \"tab20\",\n",
     "    anomaly_label_column: str = \"anomalyLabel\",\n",
     "    anomaly_score_column: str = \"anomalyScore\",\n",
     "    size_column: str = \"articleRank\",\n",
@@ -929,6 +931,8 @@
     "    annotate_top_n_anomalies: int = 10,\n",
     "    annotate_top_n_non_anomalies: int = 5,\n",
     "    annotate_top_n_clusters: int = 20,\n",
+    "    percentile_of_distance_to_center: float = 0.8,\n",
+    "    no_cluster_coloring: bool = False,\n",
     ") -> None:\n",
     "    \n",
     "    if clustering_visualization_dataframe.empty:\n",
@@ -966,7 +970,7 @@
     "\n",
     "    distances_to_center = calculate_distances_to_center(clustering_visualization_dataframe, x_position_column, y_position_column)\n",
     "    top_anomaly_columns_mask = mask_top_anomaly_columns(clustering_visualization_dataframe, anomaly_score_column, annotate_top_n_anomalies)\n",
-    "    clustering_visualization_dataframe_zoomed = zoom_into_center_while_preserving_masked_rows(clustering_visualization_dataframe, distances_to_center, top_anomaly_columns_mask)\n",
+    "    clustering_visualization_dataframe_zoomed = zoom_into_center_while_preserving_masked_rows(clustering_visualization_dataframe, distances_to_center, top_anomaly_columns_mask, percentile_of_distance_to_center)\n",
     "\n",
     "    cluster_anomalies = clustering_visualization_dataframe_zoomed[clustering_visualization_dataframe_zoomed[anomaly_label_column] == 1]\n",
     "    cluster_without_anomalies = clustering_visualization_dataframe_zoomed[clustering_visualization_dataframe_zoomed[anomaly_label_column] != 1]\n",
@@ -982,7 +986,7 @@
     "        y=cluster_noise[y_position_column],\n",
     "        s=cluster_noise[size_column] * 60 + 2,\n",
     "        color='lightgrey',\n",
-    "        alpha=0.4,\n",
+    "        alpha=0.3,\n",
     "        label='Noise'\n",
     "    )\n",
     "\n",
@@ -991,9 +995,9 @@
     "        x=cluster_non_noise[x_position_column],\n",
     "        y=cluster_non_noise[y_position_column],\n",
     "        s=cluster_non_noise[size_column] * 60 + 2,\n",
-    "        c=cluster_non_noise[cluster_label_column],\n",
-    "        cmap='tab20',\n",
-    "        alpha=0.7,\n",
+    "        c=cluster_non_noise[cluster_label_column] if not no_cluster_coloring else 'silver',\n",
+    "        cmap=cluster_color_map if not no_cluster_coloring else None,\n",
+    "        alpha=0.5,\n",
     "        label='Clusters'\n",
     "    )\n",
     "\n",
@@ -1085,7 +1089,7 @@
     "        plot.annotate(\n",
     "            text=f\"#{index + 1}: {truncate(row[code_unit_column])} ({row[anomaly_score_column]:.3f})\",\n",
     "            xy=(row[x_position_column], row[y_position_column]),\n",
-    "            xytext=(5, 5 + (index % 5) * 10),\n",
+    "            xytext=(5, 5 + (index % 5) * 15),\n",
     "            color='red',\n",
     "            **plot_annotation_style\n",
     "        )\n",
@@ -1103,12 +1107,210 @@
     "plot_anomalies(java_package_anomaly_detection_features, title_prefix=\"Java Package Anomalies\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "77dee89a",
+   "metadata": {},
+   "source": [
+    "#### 1.4b Plot features with highlighted top anomalies in a 2D scatter plot (UMAP reduction)\n",
+    "\n",
+    "This plot visualizes the input features used by the Isolation Forest anomaly detector in a 2D scatter plot. Dimensionality reduction is performed with UMAP to illustrate how the detector \"sees\" the data.\n",
+    "\n",
+    "- Red: detected anomalies  \n",
+    "- Lightgrey: code units labeled as noise by HDBSCAN  \n",
+    "- Greys: cluster labels  \n",
+    "- Size: Article Rank (larger = more important)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c30a29f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_features_for_2d_visualization(features: np.ndarray, anomaly_detection_results: pd.DataFrame) -> pd.DataFrame:\n",
+    "    \"\"\"\n",
+    "    Reduces the dimensionality of the features down to two dimensions for 2D visualization using UMAP.\n",
+    "    see https://umap-learn.readthedocs.io\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Check if features are empty\n",
+    "    if features is None or len(features) == 0:\n",
+    "        print(\"No feature data available\")\n",
+    "        return anomaly_detection_results\n",
+    "\n",
+    "    # Check if features and anomaly_detection_results have compatible lengths\n",
+    "    if features.shape[0] != anomaly_detection_results.shape[0]:\n",
+    "        raise ValueError(\"Features and anomaly_detection_results must have the same number of samples.\")\n",
+    "\n",
+    "    # Use UMAP to reduce the dimensionality to 2D for visualization\n",
+    "    umap_reducer = umap.UMAP(n_components=2, min_dist=0.3, random_state=47, n_jobs=1)\n",
+    "    two_dimensional_features = umap_reducer.fit_transform(features)\n",
+    "    \n",
+    "    # Convert to dense numpy array (works for both sparse and dense input)\n",
+    "    feature_coordinates = np.asarray(two_dimensional_features)\n",
+    "\n",
+    "    anomaly_detection_results['featureVisualizationX'] = feature_coordinates[:, 0]\n",
+    "    anomaly_detection_results['featureVisualizationY'] = feature_coordinates[:, 1]\n",
+    "\n",
+    "    return anomaly_detection_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f02b5dec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_features_with_anomalies(\n",
+    "    clustering_visualization_dataframe: pd.DataFrame,\n",
+    "    title_prefix: str,\n",
+    "    code_unit_column: str = \"shortCodeUnitName\",\n",
+    "    cluster_label_column: str = \"clusterLabel\",\n",
+    "    anomaly_label_column: str = \"anomalyLabel\",\n",
+    "    anomaly_score_column: str = \"anomalyScore\",\n",
+    "    size_column: str = \"articleRank\",\n",
+    "    x_position_column: str = 'embeddingVisualizationX',\n",
+    "    y_position_column: str = 'embeddingVisualizationY',\n",
+    "    annotate_top_n_anomalies: int = 10,\n",
+    "    annotate_fully_top_n_anomalies: int = 3,\n",
+    ") -> None:\n",
+    "    \n",
+    "    if clustering_visualization_dataframe.empty:\n",
+    "        print(\"No projected data to plot available\")\n",
+    "        return\n",
+    "    \n",
+    "    def truncate(text: str, max_length: int = 22):\n",
+    "        if len(text) <= max_length:\n",
+    "            return text\n",
+    "        return text[:max_length - 3] + \"...\"\n",
+    "\n",
+    "\n",
+    "    cluster_anomalies = clustering_visualization_dataframe[clustering_visualization_dataframe[anomaly_label_column] == 1]\n",
+    "    cluster_without_anomalies = clustering_visualization_dataframe[clustering_visualization_dataframe[anomaly_label_column] != 1]\n",
+    "    cluster_noise = cluster_without_anomalies[cluster_without_anomalies[cluster_label_column] == -1]\n",
+    "    cluster_non_noise = cluster_without_anomalies[cluster_without_anomalies[cluster_label_column] != -1]\n",
+    "\n",
+    "    plot.figure(figsize=(10, 10))\n",
+    "    plot.title(f\"{title_prefix} (size={size_column}, red=anomaly, blue=noise)\", pad=20)\n",
+    "\n",
+    "    # Plot noise (from clustering)\n",
+    "    plot.scatter(\n",
+    "        x=cluster_noise[x_position_column],\n",
+    "        y=cluster_noise[y_position_column],\n",
+    "        s=cluster_noise[size_column] * 20 + 2,\n",
+    "        color='lightblue',\n",
+    "        alpha=0.4,\n",
+    "        label='Noise'\n",
+    "    )\n",
+    "\n",
+    "    # Plot clusters\n",
+    "    plot.scatter(\n",
+    "        x=cluster_non_noise[x_position_column],\n",
+    "        y=cluster_non_noise[y_position_column],\n",
+    "        s=cluster_non_noise[size_column] * 20 + 2,\n",
+    "        color='lightgrey',\n",
+    "        alpha=0.6,\n",
+    "        label='Clusters'\n",
+    "    )\n",
+    "\n",
+    "    # Plot anomalies\n",
+    "    plot.scatter(\n",
+    "        x=cluster_anomalies[x_position_column],\n",
+    "        y=cluster_anomalies[y_position_column],\n",
+    "        s=cluster_anomalies[size_column] * 10 + 2,\n",
+    "        c=cluster_anomalies[anomaly_score_column],\n",
+    "        cmap=\"Reds\",\n",
+    "        alpha=0.95,\n",
+    "        label='Anomaly',\n",
+    "    )\n",
+    "\n",
+    "    # Annotate top anomalies\n",
+    "    anomalies = cluster_anomalies.sort_values(by=anomaly_score_column, ascending=False).reset_index(drop=True).head(annotate_top_n_anomalies)\n",
+    "    anomalies_in_reversed_order = anomalies.iloc[::-1] # plot most important annotations last to overlap less important ones\n",
+    "    for dataframe_index, row in anomalies_in_reversed_order.iterrows():\n",
+    "        index = typing.cast(int, dataframe_index)\n",
+    "        text = f\"{index + 1}\"\n",
+    "        xytext = (5, 5)\n",
+    "        if index < annotate_fully_top_n_anomalies:\n",
+    "            text = f\"{text}: {truncate(row[code_unit_column])}\"\n",
+    "            xytext = (5, 5 + (index % 4) * 12)\n",
+    "\n",
+    "        plot.annotate(\n",
+    "            text=text,\n",
+    "            xy=(row[x_position_column], row[y_position_column]),\n",
+    "            xytext=xytext,\n",
+    "            color='red',\n",
+    "            **plot_annotation_style\n",
+    "        )\n",
+    "\n",
+    "    plot.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6af9eb9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "java_package_anomaly_detection_features = prepare_features_for_2d_visualization(\n",
+    "    java_package_anomaly_detection_features_prepared,\n",
+    "    java_package_anomaly_detection_features\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a679562",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_features_with_anomalies(\n",
+    "    java_package_anomaly_detection_features,\n",
+    "    title_prefix=\"Java Package Anomalies (2D Feature Visualization)\",\n",
+    "    x_position_column='featureVisualizationX',\n",
+    "    y_position_column='featureVisualizationY',\n",
+    "    annotate_top_n_anomalies=5,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f9832cc9",
+   "metadata": {},
+   "source": [
+    "##### 1.4b/2 Plot features zoomed with highlighted top anomalies in a 2D scatter plot (UMAP reduction)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acbe2034",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_anomalies(\n",
+    "    java_package_anomaly_detection_features,\n",
+    "    title_prefix=\"Java Package Anomalies (2D Feature Visualization Zoomed)\",\n",
+    "    x_position_column='featureVisualizationX',\n",
+    "    y_position_column='featureVisualizationY',\n",
+    "    annotate_top_n_clusters=0,\n",
+    "    annotate_top_n_non_anomalies=0,\n",
+    "    percentile_of_distance_to_center=0.7,\n",
+    "    no_cluster_coloring=True\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "0f1b08b6",
    "metadata": {},
    "source": [
-    "#### 1.4b Plot anomalies solely based on embeddings"
+    "#### 1.4c Plot anomalies solely based on embeddings"
    ]
   },
   {
@@ -1914,12 +2116,83 @@
     "plot_anomalies(java_type_anomaly_detection_features, title_prefix=\"Java Type Anomalies\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "6eb52ab0",
+   "metadata": {},
+   "source": [
+    "#### 2.4b Plot features with highlighted top anomalies in a 2D scatter plot (UMAP reduction)\n",
+    "\n",
+    "This plot visualizes the input features used by the Isolation Forest anomaly detector in a 2D scatter plot. Dimensionality reduction is performed with UMAP to illustrate how the detector \"sees\" the data.\n",
+    "\n",
+    "- Red: detected anomalies  \n",
+    "- Lightgrey: code units labeled as noise by HDBSCAN  \n",
+    "- Greys: cluster labels  \n",
+    "- Size: Article Rank (larger = more important)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "129cced0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "java_type_anomaly_detection_features = prepare_features_for_2d_visualization(\n",
+    "    java_type_anomaly_detection_features_prepared,\n",
+    "    java_type_anomaly_detection_features\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f05ef08c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_features_with_anomalies(\n",
+    "    java_type_anomaly_detection_features,\n",
+    "    title_prefix=\"Java Type Anomalies (2D Feature Visualization)\",\n",
+    "    x_position_column='featureVisualizationX',\n",
+    "    y_position_column='featureVisualizationY',\n",
+    "    annotate_top_n_anomalies=30\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3472efed",
+   "metadata": {},
+   "source": [
+    "##### 2.4b/2 Plot features zoomed with highlighted top anomalies in a 2D scatter plot (UMAP reduction)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c44f04e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_anomalies(\n",
+    "    java_type_anomaly_detection_features,\n",
+    "    title_prefix=\"Java Type Anomalies (2D Feature Visualization Zoomed)\",\n",
+    "    x_position_column='featureVisualizationX',\n",
+    "    y_position_column='featureVisualizationY',\n",
+    "    annotate_top_n_clusters=0,\n",
+    "    annotate_top_n_non_anomalies=0,\n",
+    "    percentile_of_distance_to_center=0.7,\n",
+    "    no_cluster_coloring=True\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "05275be7",
    "metadata": {},
    "source": [
-    "#### 2.4.b Plot anomalies solely based on embeddings"
+    "#### 2.4c Plot anomalies solely based on embeddings"
    ]
   },
   {
diff --git a/domains/anomaly-detection/tunedAnomalyDetectionExplained.py b/domains/anomaly-detection/tunedAnomalyDetectionExplained.py
index 722aa997d..83daea96c 100755
--- a/domains/anomaly-detection/tunedAnomalyDetectionExplained.py
+++ b/domains/anomaly-detection/tunedAnomalyDetectionExplained.py
@@ -37,9 +37,10 @@
 
 import shap  # Explainable AI tool
 
+import umap # Dimensionality reduction for visualization
 import matplotlib.pyplot as plot
 
-from visualization import annotate_each, annotate_each_with_index, scale_marker_sizes, zoom_into_center_while_preserving_top_scores
+from visualization import annotate_each, annotate_each_with_index, scale_marker_sizes, zoom_into_center_while_preserving_top_scores, plot_annotation_style
 
 class Parameters:
     required_parameters_ = ["projection_node_label"]
@@ -149,10 +150,17 @@ def get_file_path(name: str, parameters: Parameters, extension: str = 'svg') ->
     return name
 
 
+def get_neo4j_password() -> str:
+    password = os.environ.get("NEO4J_INITIAL_PASSWORD")
+    if password is None:
+        raise RuntimeError("Environment variable NEO4J_INITIAL_PASSWORD is not set. Please set it to the Neo4j password.")
+    return password
+
+
 def get_graph_database_driver() -> Driver:
     driver = GraphDatabase.driver(
         uri="bolt://localhost:7687",
-        auth=("neo4j", os.environ.get("NEO4J_INITIAL_PASSWORD"))
+        auth=("neo4j", get_neo4j_password())
     )
     driver.verify_connectivity()
     return driver
@@ -544,6 +552,34 @@ def add_anomaly_detection_results_to_features(
     return features
 
 
+def prepare_features_for_2d_visualization(features: np.ndarray, anomaly_detection_results: pd.DataFrame) -> pd.DataFrame:
+    """
+    Reduces the dimensionality of the features down to two dimensions for 2D visualization using UMAP.
+    see https://umap-learn.readthedocs.io
+    """
+
+    # Check if features are empty
+    if features is None or len(features) == 0:
+        print("No feature data available")
+        return anomaly_detection_results
+
+    # Check if features and anomaly_detection_results have compatible lengths
+    if features.shape[0] != anomaly_detection_results.shape[0]:
+        raise ValueError("Features and anomaly_detection_results must have the same number of samples.")
+
+    # Use UMAP to reduce the dimensionality to 2D for visualization
+    umap_reducer = umap.UMAP(n_components=2, min_dist=0.3, random_state=47, n_jobs=1)
+    two_dimensional_features = umap_reducer.fit_transform(features)
+    
+    # Convert to dense numpy array (works for both sparse and dense input)
+    feature_coordinates = np.asarray(two_dimensional_features)
+
+    anomaly_detection_results['featureVisualizationX'] = feature_coordinates[:, 0]
+    anomaly_detection_results['featureVisualizationY'] = feature_coordinates[:, 1]
+
+    return anomaly_detection_results
+
+
 def get_top_10_anomalies(
         anomaly_detected_features: pd.DataFrame,
         anomaly_label_column: str = "anomalyLabel",
@@ -609,7 +645,7 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
     cluster_non_noise = cluster_without_anomalies[cluster_without_anomalies[cluster_label_column] != -1]
 
     plot.figure(figsize=(10, 10))
-    plot.title(f"{title_prefix} (size={size_column}, main-color=cluster, red=anomaly, green=non-anomaly)", pad=20)
+    plot.title(f"{title_prefix} Anomalies (size={size_column}, main-color=cluster, red=anomaly, green=non-anomaly)", pad=20)
 
     # Plot noise (from clustering)
     plot.scatter(
@@ -678,6 +714,93 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
     plot.close()
 
 
+def plot_features_with_anomalies(
+    features_to_visualize: pd.DataFrame,
+    title_prefix: str,
+    plot_file_path: str,
+    code_unit_column: str = "shortCodeUnitName",
+    cluster_label_column: str = "clusterLabel",
+    anomaly_label_column: str = "anomalyLabel",
+    anomaly_score_column: str = "anomalyScore",
+    size_column: str = "articleRank",
+    x_position_column: str = 'featureVisualizationX',
+    y_position_column: str = 'featureVisualizationY',
+    annotate_top_n_anomalies: int = 5,
+    annotate_fully_top_n_anomalies: int = 3,
+) -> None:
+    
+    if features_to_visualize.empty:
+        print("No projected data to plot available")
+        return
+    
+    def truncate(text: str, max_length: int = 22):
+        if len(text) <= max_length:
+            return text
+        return text[:max_length - 3] + "..."
+
+    features_to_visualize.loc[:, size_column + '_scaled'] = scale_marker_sizes(features_to_visualize[size_column])
+    def get_common_plot_parameters(data: pd.DataFrame) -> dict:
+        return {
+            "x": data[x_position_column],
+            "y": data[y_position_column],
+            "s": data[size_column + '_scaled'],
+        }
+    cluster_anomalies = features_to_visualize[features_to_visualize[anomaly_label_column] == 1]
+    cluster_without_anomalies = features_to_visualize[features_to_visualize[anomaly_label_column] != 1]
+    cluster_noise = cluster_without_anomalies[cluster_without_anomalies[cluster_label_column] == -1]
+    cluster_non_noise = cluster_without_anomalies[cluster_without_anomalies[cluster_label_column] != -1]
+
+    plot.figure(figsize=(10, 10))
+    plot.title(f"{title_prefix} Anomaly Detection Features (size={size_column}, red=anomaly, blue=noise)", pad=20)
+
+    # Plot noise (from clustering)
+    plot.scatter(
+        **get_common_plot_parameters(cluster_noise),
+        color='lightblue',
+        alpha=0.4,
+        label='Noise'
+    )
+
+    # Plot clusters
+    plot.scatter(
+        **get_common_plot_parameters(cluster_non_noise),
+        color='lightgrey',
+        alpha=0.6,
+        label='Clusters'
+    )
+
+    # Plot anomalies
+    plot.scatter(
+        **get_common_plot_parameters(cluster_anomalies),
+        c=cluster_anomalies[anomaly_score_column],
+        cmap="Reds",
+        alpha=0.95,
+        label='Anomaly',
+    )
+
+    # Annotate top anomalies
+    anomalies = cluster_anomalies.sort_values(by=anomaly_score_column, ascending=False).reset_index(drop=True).head(annotate_top_n_anomalies)
+    anomalies_in_reversed_order = anomalies.iloc[::-1] # plot most important annotations last to overlap less important ones
+    for dataframe_index, row in anomalies_in_reversed_order.iterrows():
+        index = typing.cast(int, dataframe_index)
+        text = f"{index + 1}"
+        xytext = (5, 5)
+        if index < annotate_fully_top_n_anomalies:
+            text = f"{text}: {truncate(row[code_unit_column])}"
+            xytext = (5, 5 + (index % 4) * 12)
+
+        plot.annotate(
+            text=text,
+            xy=(row[x_position_column], row[y_position_column]),
+            xytext=xytext,
+            color='red',
+            **plot_annotation_style
+        )
+
+    plot.savefig(plot_file_path)
+    plot.close()
+
+    
 DType = typing.TypeVar("DType", bound=np.generic)
 Numpy_Array = numpy_typing.NDArray[DType]
 Two_Dimensional_Vector = typing.Annotated[Numpy_Array, typing.Literal[2]]
@@ -1050,10 +1173,21 @@ def output_top_shap_explained_global_features_as_markdown_table(
 
 plot_anomalies(
     features_to_visualize=features,
-    title_prefix="Java Package Anomalies",
+    title_prefix=parameters.get_title_prefix(),
     plot_file_path=get_file_path("Anomalies", parameters)
 )
 
+features = prepare_features_for_2d_visualization(
+    features_prepared,
+    features
+)
+
+plot_features_with_anomalies(
+    features_to_visualize=features,
+    title_prefix=parameters.get_title_prefix(),
+    plot_file_path=get_file_path("AnomalyDetectionFeatures", parameters),
+)
+
 if parameters.is_verbose():
     feature_importances = pd.Series(anomaly_detection_results.feature_importances, index=feature_names).sort_values(ascending=False)
     print("tunedAnomalyDetectionExplained: Most influential features for anomaly detection according to the proxy model directly without SHAP (top 10):")