In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
# Replace with your actual dataset path
df = pd.read_csv('transactions.csv')  # Example CSV file

# Display the first few rows
print(df.head())

# Check for missing values
print("Missing values:\n", df.isnull().sum())

# Basic stats
print(df.describe())

# Data preprocessing
# Assume 'Amount' and 'Time' are important features
features = ['Amount', 'Time']
X = df[features]

# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Use Isolation Forest for anomaly detection
model = IsolationForest(n_estimators=100, contamination=0.01, random_state=42)
model.fit(X_scaled)

# Predict anomalies (-1 = anomaly, 1 = normal)
df['anomaly'] = model.predict(X_scaled)

# Convert predictions to binary (1 = anomaly, 0 = normal)
df['anomaly'] = df['anomaly'].map({1: 0, -1: 1})

# Count anomalies
print("Anomalies detected:", df['anomaly'].sum())

# Visualize anomalies
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Time', y='Amount', hue='anomaly', data=df, palette=['blue', 'red'])
plt.title('Anomaly Detection in Transactions')
plt.show()

# (Optional) If the dataset has a true label column for fraud, like 'Class'
if 'Class' in df.columns:
    print("Confusion Matrix:\n", confusion_matrix(df['Class'], df['anomaly']))
    print("\nClassification Report:\n", classification_report(df['Class'], df['anomaly']))

# Save results
df.to_csv('transactions_with_anomalies.csv', index=False)