# üöÄ Advanced Anomaly Detection Pipeline (Direct Reporting)

This notebook implements a robust anomaly detection pipeline with direct reporting:
1. **Transactional Analysis**: Anomalies detected at the individual record level.
2. **Monthly Analysis**: Anomalies detected in monthly aggregated spending.
3. **7-Day Unit Analysis**: Anomalies detected in 7-day rolling aggregates.
4. **Direct Reporting**: Results are extracted directly from their respective analysis dataframes.

In [None]:
import pandas as pd
import numpy as np
import json
from preprocess import preprocess_data
from model_dbscan import detect_anomalies, clean_and_format_records

# Load raw dataset
df_raw = pd.read_csv('data/RawDataset.csv')
df_raw['Txn Date'] = pd.to_datetime(df_raw['Txn Date'], dayfirst=True)

print(f"Loaded {len(df_raw)} records.")

## 1Ô∏è‚É£ Individual Transaction Analysis

In [None]:
# 1. Preprocess every transaction
df_transactional_final = preprocess_data(df_raw)

# 2. Detect Anomalies and attach directly
df_raw['is_anomaly'] = detect_anomalies(df_transactional_final, eps=0.3, min_samples=10)

print(f"Anomalous individual transactions: {len(df_raw[df_raw['is_anomaly'] == -1])}")

## 2Ô∏è‚É£ Monthly Analysis

In [None]:
# 1. Group Raw Data
df_monthly_grouped = df_raw.groupby(pd.Grouper(key='Txn Date', freq='ME')).agg({
    'Debit': 'sum',
    'Credit': 'sum',
    'Description': 'first'
}).reset_index()

# 2. Preprocess and Detect
df_monthly_processed = preprocess_data(df_monthly_grouped)
df_monthly_grouped['is_anomaly'] = detect_anomalies(df_monthly_processed, eps=0.5, min_samples=2)

print(f"Anomalous Months identified: {len(df_monthly_grouped[df_monthly_grouped['is_anomaly'] == -1])}")

## 3Ô∏è‚É£ Weekly (7-Day Units) Analysis

In [None]:
# 1. Group Raw Data (7-Day Units)
df_weekly_grouped = df_raw.groupby(pd.Grouper(key='Txn Date', freq='7D')).agg({
    'Debit': 'sum',
    'Credit': 'sum',
    'Description': 'first'
}).reset_index()

# 2. Preprocess and Detect
df_weekly_processed = preprocess_data(df_weekly_grouped)
df_weekly_grouped['is_anomaly'] = detect_anomalies(df_weekly_processed, eps=0.5, min_samples=2)

print(f"Anomalous 7-day units identified: {len(df_weekly_grouped[df_weekly_grouped['is_anomaly'] == -1])}")

## 4Ô∏è‚É£ Consolidated Results Extraction
Extracting results directly from their dataframes without complex merges or transaction mapping.

In [None]:
def format_summary(df):
    """Helper to convert anomalous rows to cleaned list of dicts."""
    anomalies = df[df['is_anomaly'] == -1].copy()
    if 'Txn Date' in anomalies.columns:
        anomalies['Txn Date'] = anomalies['Txn Date'].dt.strftime('%Y-%m-%d')
    return anomalies.drop(columns=['is_anomaly']).to_dict(orient='records')

# Final Aggregate Dictionary
anomaly_report = {
    "transactional": clean_and_format_records(df_raw, df_raw.index[df_raw['is_anomaly'] == -1]),
    "weekly": format_summary(df_weekly_grouped),
    "monthly": format_summary(df_monthly_grouped)
}

print(f"\nFinal Summary:")
print(f"Transactional: {len(anomaly_report['transactional'])}")
print(f"Weekly Periods: {len(anomaly_report['weekly'])}")
print(f"Monthly Periods: {len(anomaly_report['monthly'])}")

## 5Ô∏è‚É£ Sample Record Output

In [None]:
if anomaly_report['transactional']:
    print("Sample Transactional Anomaly:")
    print(json.dumps(anomaly_report['transactional'][0], indent=2))