<a href="https://colab.research.google.com/github/Shreyanshd23/Cedit-Card-Fraud-Detection/blob/main/Credit-Card-Anomaly-Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from scipy.stats import multivariate_normal

# Load the dataset
df = pd.read_csv('/content/creditcard.csv')

# Use all PCA features (V1 to V28) and 'Amount'
features = [f'V{i}' for i in range(1, 29)] #+  ['Amount']

# Ensure all feature columns are numeric and handle errors
df[features] = df[features].apply(pd.to_numeric, errors='coerce')

# Also ensure 'Class' column is numeric
df['Class'] = pd.to_numeric(df['Class'], errors='coerce')

# Drop any rows where 'Class' is NaN (important!)
df = df.dropna(subset=['Class'])

# Deal with any NaN values in features
df[features] = df[features].fillna(df[features].median())

X = df[features].values
y = df['Class'].values.astype(int)  # Make sure y is integer type

# Fit multivariate Gaussian on normal data (Class == 0)
X_normal = X[y == 0]
mu = X_normal.mean(axis=0)
cov = np.cov(X_normal, rowvar=False)

# Compute probability density for all data points
rv = multivariate_normal(mean=mu, cov=cov, allow_singular=True)
probs = rv.pdf(X)

epsilon = np.percentile(probs, 0.3)
preds = (probs < epsilon).astype(int)

f1 = f1_score(y, preds)

print(f"Mean vector shape: {mu.shape}")
print(f"Covariance matrix shape: {cov.shape}")
print(f"Epsilon (0.5 percentile): {epsilon:.8e}")
print(f"F1 Score: {f1:.4f}")
print("Sample Probabilities:", probs[:10])

Mean vector shape: (28,)
Covariance matrix shape: (28, 28)
Epsilon (0.5 percentile): 9.66828319e-162
F1 Score: 0.5122
Sample Probabilities: [2.28986240e-12 1.26509168e-11 6.12478790e-19 9.10199169e-15
 8.02932820e-13 1.84216021e-11 3.97771554e-12 4.01397628e-22
 3.22633751e-14 4.93935243e-12]
