# üöÄ Advanced Anomaly Detection Pipeline (Direct Reporting)

This notebook implements a robust anomaly detection pipeline with direct reporting:
1. **Transactional Analysis**: Anomalies detected at the individual record level.
2. **Monthly Analysis**: Anomalies detected in monthly aggregated spending.
3. **7-Day Unit Analysis**: Anomalies detected in 7-day rolling aggregates.
4. **Direct Reporting**: Results are extracted directly from their respective analysis dataframes.

In [1]:
import pandas as pd
import numpy as np
import json
from preprocess import preprocess_data
from model_dbscan import detect_anomalies, clean_and_format_records

# Load raw dataset
df_raw = pd.read_csv('data/RawDataset.csv')
df_raw['Txn Date'] = pd.to_datetime(df_raw['Txn Date'], dayfirst=True)

print(f"Loaded {len(df_raw)} records.")

Loaded 29578 records.


## 1Ô∏è‚É£ Individual Transaction Analysis

In [2]:
# 1. Preprocess every transaction
df_transactional_final = preprocess_data(df_raw)

# 2. Detect Anomalies and attach directly
df_raw['is_anomaly'] = detect_anomalies(df_transactional_final, eps=0.3, min_samples=10)

print(f"Anomalous individual transactions: {len(df_raw[df_raw['is_anomaly'] == -1])}")

Anomalous individual transactions: 5


## 2Ô∏è‚É£ Monthly Analysis

In [3]:
# 1. Group Raw Data
df_monthly_grouped = df_raw.groupby(pd.Grouper(key='Txn Date', freq='ME')).agg({
    'Debit': 'sum',
    'Credit': 'sum',
    'Description': 'first'
}).reset_index()

# 2. Preprocess and Detect
df_monthly_processed = preprocess_data(df_monthly_grouped)
df_monthly_grouped['is_anomaly'] = detect_anomalies(df_monthly_processed, eps=0.5, min_samples=2)

print(f"Anomalous Months identified: {len(df_monthly_grouped[df_monthly_grouped['is_anomaly'] == -1])}")

Anomalous Months identified: 15


## 3Ô∏è‚É£ Weekly (7-Day Units) Analysis

In [4]:
# 1. Group Raw Data (7-Day Units)
df_weekly_grouped = df_raw.groupby(pd.Grouper(key='Txn Date', freq='7D')).agg({
    'Debit': 'sum',
    'Credit': 'sum',
    'Description': 'first'
}).reset_index()

# 2. Preprocess and Detect
df_weekly_processed = preprocess_data(df_weekly_grouped)
df_weekly_grouped['is_anomaly'] = detect_anomalies(df_weekly_processed, eps=0.5, min_samples=2)

print(f"Anomalous 7-day units identified: {len(df_weekly_grouped[df_weekly_grouped['is_anomaly'] == -1])}")

Anomalous 7-day units identified: 31


## 4Ô∏è‚É£ Consolidated Results Extraction
Extracting results directly from their dataframes without complex merges or transaction mapping.

In [5]:
def format_summary(df):
    """Helper to convert anomalous rows to cleaned list of dicts."""
    anomalies = df[df['is_anomaly'] == -1].copy()
    if 'Txn Date' in anomalies.columns:
        anomalies['Txn Date'] = anomalies['Txn Date'].dt.strftime('%Y-%m-%d')
    return anomalies.drop(columns=['is_anomaly']).to_dict(orient='records')

# Final Aggregate Dictionary
anomaly_report = {
    "transactional": clean_and_format_records(df_raw, df_raw.index[df_raw['is_anomaly'] == -1]),
    "weekly": format_summary(df_weekly_grouped),
    "monthly": format_summary(df_monthly_grouped)
}

print(f"\nFinal Summary:")
print(f"Transactional: {len(anomaly_report['transactional'])}")
print(f"Weekly Periods: {len(anomaly_report['weekly'])}")
print(f"Monthly Periods: {len(anomaly_report['monthly'])}")


Final Summary:
Transactional: 5
Weekly Periods: 31
Monthly Periods: 15


## 5Ô∏è‚É£ Sample Record Output

In [9]:
if anomaly_report['transactional']:
    print("Sample Transactional Anomaly:")
    print(json.dumps(anomaly_report['transactional'][0], indent=2))

if anomaly_report['weekly']:
    print("Sample Weekly Anomaly:")
    print(json.dumps(anomaly_report['weekly'][0], indent=2))

if anomaly_report['monthly']:
    print("Sample Monthly Anomaly:")
    print(json.dumps(anomaly_report['monthly'][0], indent=2))
anomaly_report

Sample Transactional Anomaly:
{
  "Txn Date": "2022-05-05",
  "Value Date": "05/05/2022",
  "Description": "NEFT/CR/463226414075/SALARY",
  "Ref No./Cheque No.": "TRANSFER FROM 463226414075",
  "Debit": 733459.54,
  "Credit": NaN,
  "Balance": 36104525.44,
  "payment_type": "neft",
  "category": "salary",
  "is_anomaly": -1
}
Sample Weekly Anomaly:
{
  "Txn Date": "2022-01-01",
  "Debit": 1164824.07,
  "Credit": 2856087.0,
  "Description": "NEFT/CR/952735718030/SALARY"
}
Sample Monthly Anomaly:
{
  "Txn Date": "2022-02-28",
  "Debit": 2392185.93,
  "Credit": 10433176.0,
  "Description": "NEFT/DR/344747805726/ZOMATO"
}


{'transactional': [{'Txn Date': '2022-05-05',
   'Value Date': '05/05/2022',
   'Description': 'NEFT/CR/463226414075/SALARY',
   'Ref No./Cheque No.': 'TRANSFER FROM 463226414075',
   'Debit': 733459.54,
   'Credit': nan,
   'Balance': 36104525.44,
   'payment_type': 'neft',
   'category': 'salary',
   'is_anomaly': -1},
  {'Txn Date': '2022-08-26',
   'Value Date': '26/08/2022',
   'Description': 'IMPS/DR/610546955696/SWIGGY',
   'Ref No./Cheque No.': 'TRANSFER TO 610546955696',
   'Debit': nan,
   'Credit': 526245.57,
   'Balance': 51674880.34,
   'payment_type': 'imps',
   'category': 'food',
   'is_anomaly': -1},
  {'Txn Date': '2022-12-01',
   'Value Date': '01/12/2022',
   'Description': 'IMPS/DR/767182967913/FLIPKART',
   'Ref No./Cheque No.': 'TRANSFER TO 767182967913',
   'Debit': 921220.81,
   'Credit': nan,
   'Balance': 38599069.89,
   'payment_type': 'imps',
   'category': 'shopping',
   'is_anomaly': -1},
  {'Txn Date': '2023-04-24',
   'Value Date': '24/04/2023',
   'Des