# 1️⃣ Imports & Configuration

In this section, we import the necessary libraries and our custom preprocessing and modeling modules. We also set the parameters for the DBSCAN algorithm.

In [1]:
import pandas as pd
import numpy as np
from preprocess import preprocess_data
from model_dbscan import detect_anomalies

# Configurable parameters for DBSCAN
EPS = 0.3
MIN_SAMPLES = 10

# 2️⃣ Load Raw Dataset

We load the raw CSV file into `df_original`. This dataframe will be preserved to attach the final anomaly labels.

In [2]:
raw_data_path = 'data/RawDataset.csv'
df_original = pd.read_csv(raw_data_path)

print(f"Loaded {len(df_original)} transactions from {raw_data_path}")
df_original.head()

Loaded 29578 transactions from data/RawDataset.csv


Unnamed: 0,Txn Date,Value Date,Description,Ref No./Cheque No.,Debit,Credit,Balance,payment_type,category
0,01/01/2022,01/01/2022,NEFT/CR/952735718030/SALARY,TRANSFER FROM 952735718030,,125213.0,431114.0,neft,salary
1,01/01/2022,01/01/2022,POS/DR/711995879155/MYNTRA,TRANSFER TO 711995879155,1665.43,,429448.57,pos,shopping
2,01/01/2022,01/01/2022,IMPS/DR/821490367535/BIGBAZAAR,TRANSFER TO 821490367535,1563.68,,427884.89,imps,others
3,01/01/2022,01/01/2022,NEFT/DR/183967131867/FLIPKART,TRANSFER TO 183967131867,3419.58,,424465.31,neft,shopping
4,01/01/2022,01/01/2022,UPI/DR/858057510226/FLIPKART,TRANSFER TO 858057510226,6077.98,,418387.33,upi,shopping


# 3️⃣ Preprocessing

We pass the original dataframe through our preprocessing pipeline to handle categorical encoding, scaling, and feature engineering.

In [3]:
df_processed = preprocess_data(df_original)

print(f"Preprocessing complete. Feature count: {df_processed.shape[1]}")
df_processed.head()

Preprocessing complete. Feature count: 13


Unnamed: 0,Debit,Credit,payment_type_imps,payment_type_neft,payment_type_pos,payment_type_upi,category_food,category_medical,category_others,category_recharge,category_salary,category_shopping,category_travel
0,0.0,0.196161,0,1,0,0,0,0,0,0,1,0,0
1,0.001808,0.0,0,0,1,0,0,0,0,0,0,1,0
2,0.001697,0.0,1,0,0,0,0,0,0,0,0,1,0
3,0.003712,0.0,0,1,0,0,0,0,0,0,0,1,0
4,0.006598,0.0,0,0,0,1,0,0,0,0,0,1,0


# 4️⃣ DBSCAN Modeling

We fit the DBSCAN model on the processed features and generate cluster labels and anomaly indicators.

In [4]:
labels = detect_anomalies(df_processed, eps=EPS, min_samples=MIN_SAMPLES)

# Create anomaly indicator (1 for anomaly, 0 for normal)
isAnomaly = (labels == -1).astype(int)

# 5️⃣ Attach Results to Original Data

We attach the resulting cluster labels and anomaly flags back to the original dataframe while ensuring index alignment is preserved.

In [5]:
df_original["cluster_label"] = labels
df_original["isAnomaly"] = isAnomaly

print("Results attached to df_original.")

Results attached to df_original.


# 6️⃣ Extract Anomalous Transactions

We filter the original dataframe to extract only those transactions marked as anomalies.

In [6]:
df_anomalies = df_original[df_original["isAnomaly"] == 1]

total_txn = len(df_original)
total_anomalies = len(df_anomalies)
percentage = (total_anomalies / total_txn) * 100

print(f"Total transactions:  {total_txn}")
print(f"Total anomalies:     {total_anomalies}")
print(f"Percentage anomalies: {percentage:.2f}%")

Total transactions:  29578
Total anomalies:     5
Percentage anomalies: 0.02%


# 7️⃣ Output

Displaying the anomalous transactions and returning the final output object.

In [7]:
df_anomalies

Unnamed: 0,Txn Date,Value Date,Description,Ref No./Cheque No.,Debit,Credit,Balance,payment_type,category,cluster_label,isAnomaly
3687,05/05/2022,05/05/2022,NEFT/CR/463226414075/SALARY,TRANSFER FROM 463226414075,733459.54,,36104525.44,neft,salary,-1,1
7159,26/08/2022,26/08/2022,IMPS/DR/610546955696/SWIGGY,TRANSFER TO 610546955696,,526245.57,51674880.34,imps,food,-1,1
10108,01/12/2022,01/12/2022,IMPS/DR/767182967913/FLIPKART,TRANSFER TO 767182967913,921220.81,,38599069.89,imps,shopping,-1,1
14392,24/04/2023,24/04/2023,IMPS/DR/171635404827/UBER,TRANSFER TO 171635404827,,638316.95,24105482.22,imps,transport,-1,1
21573,21/12/2023,21/12/2023,POS/DR/990787561818/SWIGGY,TRANSFER TO 990787561818,772926.84,,2765841.55,pos,food,-1,1
