# Hyper Spectral Anomaly Detection
## Credit Card Fraud
<sub><sub><sub>Copyright 2025, Battelle Energy Alliance, LLC, ALL RIGHTS RESERVED</sub></sub></sub>

https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud?resource=download

In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

path = "../../HSA_Classes"
sys.path.append(path)
from sklearn.preprocessing import MaxAbsScaler
import matplotlib.image as mpimg

from PIL import Image
from loguru import logger
import Model as hsa_model
import DataSet as hsa_dataset
import Viz as hsa_viz
from torch.utils.data import DataLoader

logger.remove()
logger.add(sink=f"HSA_log.log", level="CRITICAL")
import MultiFilter as hsa_multifilter

In [None]:
path = "creditcard.csv"
df = pd.read_csv(path)
preprocessed_df = df.head(10000)
print(preprocessed_df.shape)

## Instantiate
Instantiate the model and dataloader with preprocessed data as a np.array()

In [None]:
anomaly_std_tolerance = 1.2
penalty_ratio = 0.75
cutoff_distance = 2
converge_toll = (1e-5,)

batch_size = 1000
iterations = 10000

model = hsa_model.HSA_model(
    penalty_ratio=penalty_ratio,
    cutoff_distance=cutoff_distance,
    converge_toll=converge_toll,
    anomaly_std_tolerance=anomaly_std_tolerance,
    logger=logger,
    affinity_matrix_iterations=20,
    lr=2.7,
    multifilter_flag=0,
)
dataset = hsa_dataset.HSA_dataset(
    preprocessed_np=preprocessed_df.to_numpy(), logger=logger
)
dataloader = DataLoader(dataset, batch_size=batch_size)

In [None]:
import sys

path = "../"
sys.path.append(path)
import Pipeline
from loguru import logger
import pandas as pd

penalty_ratio = 0.9
cutoff_distance = 1.2
lr = 2.7
anomaly_std_tolerance = 1.2
bin_count = 3
max_spawn_dummies = 30
percent_variance_explained = 1
min_additional_percent_variance_exp = 0
logging_level = "TRACE"

pipe = Pipeline.HSA_pipeline(
    penalty_ratio,
    cutoff_distance,
    lr,
    anomaly_std_tolerance,
    bin_count,
    max_spawn_dummies,
    percent_variance_explained,
    min_additional_percent_variance_exp,
    logger=logger,
    logging_level="DEBUG",
    base_directory="./",
    num_workers=0,
    unique_id_str="",
)

results_df = pipe.pipeline(preprocessed_df, 0)

In [None]:
display(results_df[results_df["Class"] == 1])
print(f"Number of Anomalous Predictions: {len(results_df)}")