In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Anomaly Detection Exploration\n",
    "\n",
    "This notebook explores different anomaly detection techniques on time series data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "import sys\n",
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Add project root to path\n",
    "module_path = os.path.abspath(os.path.join('..'))\n",
    "if module_path not in sys.path:\n",
    "    sys.path.append(module_path)\n",
    "\n",
    "from anomaly_detection.utils.data_loader import generate_synthetic_data\n",
    "from anomaly_detection.algorithms.statistical import z_score_detection, iqr_detection, moving_average_detection\n",
    "from anomaly_detection.visualization.plots import plot_time_series_with_anomalies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate Synthetic Data for Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Generate synthetic time series data with known anomalies\n",
    "df, true_anomalies = generate_synthetic_data(n_points=1000, anomaly_percentage=0.05)\n",
    "\n",
    "# Display first few rows\n",
    "print(f\"Data shape: {df.shape}\")\n",
    "print(f\"Number of true anomalies: {sum(true_anomalies)}\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualize the Original Data with True Anomalies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Plot the time series with known anomalies\n",
    "fig = plot_time_series_with_anomalies(\n",
    "    df, \n",
    "    true_anomalies,\n",
    "    title=\"Synthetic Time Series with True Anomalies\",\n",
    "    use_plotly=False\n",
    ")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test Different Anomaly Detection Algorithms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Z-Score Method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Detect anomalies using Z-score method\n",
    "z_anomalies = z_score_detection(df['value'], threshold=3.0)\n",
    "\n",
    "# Plot results\n",
    "fig = plot_time_series_with_anomalies(\n",
    "    df, \n",
    "    z_anomalies,\n",
    "    title=\"Anomaly Detection using Z-Score Method (threshold=3.0)\",\n",
    "    use_plotly=False\n",
    ")\n",
    "plt.show()\n",
    "\n",
    "# Calculate performance metrics\n",
    "from sklearn.metrics import precision_score, recall_score, f1_score\n",
    "\n",
    "precision = precision_score(true_anomalies, z_anomalies)\n",
    "recall = recall_score(true_anomalies, z_anomalies)\n",
    "f1 = f1_score(true_anomalies, z_anomalies)\n",
    "\n",
    "print(f\"Z-Score Detection Performance:\")\n",
    "print(f\"Precision: {precision:.4f}\")\n",
    "print(f\"Recall: {recall:.4f}\")\n",
    "print(f\"F1 Score: {f1:.4f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. IQR Method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Detect anomalies using IQR method\n",
    "iqr_anomalies = iqr_detection(df['value'], k=1.5)\n",
    "\n",
    "# Plot results\n",
    "fig = plot_time_series_with_anomalies(\n",
    "    df, \n",
    "    iqr_anomalies,\n",
    "    title=\"Anomaly Detection using IQR Method (k=1.5)\",\n",
    "    use_plotly=False\n",
    ")\n",
    "plt.show()\n",
    "\n",
    "# Calculate performance metrics\n",
    "precision = precision_score(true_anomalies, iqr_anomalies)\n",
    "recall = recall_score(true_anomalies, iqr_anomalies)\n",
    "f1 = f1_score(true_anomalies, iqr_anomalies)\n",
    "\n",
    "print(f\"IQR Detection Performance:\")\n",
    "print(f\"Precision: {precision:.4f}\")\n",
    "print(f\"Recall: {recall:.4f}\")\n",
    "print(f\"F1 Score: {f1:.4f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Moving Average Method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Detect anomalies using Moving Average method\n",
    "ma_anomalies = moving_average_detection(df['value'], window=20, threshold=2.0)\n",
    "\n",
    "# Plot results\n",
    "fig = plot_time_series_with_anomalies(\n",
    "    df, \n",
    "    ma_anomalies,\n",
    "    title=\"Anomaly Detection using Moving Average Method (window=20, threshold=2.0)\",\n",
    "    use_plotly=False\n",
    ")\n",
    "plt.show()\n",
    "\n",
    "# Calculate performance metrics\n",
    "precision = precision_score(true_anomalies, ma_anomalies)\n",
    "recall = recall_score(true_anomalies, ma_anomalies)\n",
    "f1 = f1_score(true_anomalies, ma_anomalies)\n",
    "\n",
    "print(f\"Moving Average Detection Performance:\")\n",
    "print(f\"Precision: {precision:.4f}\")\n",
    "print(f\"Recall: {recall:.4f}\")\n",
    "print(f\"F1 Score: {f1:.4f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comparison of Methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Compare detection results\n",
    "results = pd.DataFrame({\n",
    "    'True Anomalies': true_anomalies,\n",
    "    'Z-Score': z_anomalies,\n",
    "    'IQR': iqr_anomalies,\n",
    "    'Moving Average': ma_anomalies\n",
    "})\n",
    "\n",
    "# Summarize detection counts\n",
    "print(\"Number of anomalies detected by each method:\")\n",
    "print(f\"True anomalies: {results['True Anomalies'].sum()}\")\n",
    "print(f\"Z-Score: {results['Z-Score'].sum()}\")\n",
    "print(f\"IQR: {results['IQR'].sum()}\")\n",
    "print(f\"Moving Average: {results['Moving Average'].sum()}\")\n",
    "\n",
    "# Plot comparison of methods\n",
    "fig, axes = plt.subplots(4, 1, figsize=(14, 16), sharex=True)\n",
    "\n",
    "# Plot time series\n",
    "axes[0].plot(df.index, df['value'])\n",
    "axes[0].scatter(df.index[true_anomalies], df.iloc[true_anomalies]['value'], color='red')\n",
    "axes[0].set_title('True Anomalies')\n",
    "\n",
    "axes[1].plot(df.index, df['value'])\n",
    "axes[1].scatter(df.index[z_anomalies], df.iloc[z_anomalies]['value'], color='green')\n",
    "axes[1].set_title('Z-Score Detected Anomalies')\n",
    "\n",
    "axes[2].plot(df.index, df['value'])\n",
    "axes[2].scatter(df.index[iqr_anomalies], df.iloc[iqr_anomalies]['value'], color='purple')\n",
    "axes[2].set_title('IQR Detected Anomalies')\n",
    "\n",
    "axes[3].plot(df.index, df['value'])\n",
    "axes[3].scatter(df.index[ma_anomalies], df.iloc[ma_anomalies]['value'], color='orange')\n",
    "axes[3].set_title('Moving Average Detected Anomalies')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}