In [2]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

df = pd.read_csv('creditcard.csv')

print("Missing values in each column:")
print(df.isnull().sum())

df.dropna(inplace=True)

X = df.drop('Class', axis=1)
y = df['Class']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = IsolationForest(random_state=42, contamination='auto')
model.fit(X_scaled)


preds = model.predict(X_scaled)  # -1 for anomalies, 1 for normal
df['anomaly_pred'] = preds
df['anomaly_pred_binary'] = df['anomaly_pred'].apply(lambda x: 1 if x == -1 else 0)
y = df['Class']

print("Confusion Matrix:")
cm = confusion_matrix(y, df['anomaly_pred_binary'])
print(cm)

print("\nClassification Report:")
print(classification_report(y, df['anomaly_pred_binary']))

print(f"\nNumber of anomalies detected: {(preds == -1).sum()}")
print(f"Number of normal points: {(preds == 1).sum()}")

Missing values in each column:
Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        1
V8        1
V9        1
V10       1
V11       1
V12       1
V13       1
V14       1
V15       1
V16       1
V17       1
V18       1
V19       1
V20       1
V21       1
V22       1
V23       1
V24       1
V25       1
V26       1
V27       1
V28       1
Amount    1
Class     1
dtype: int64
Confusion Matrix:
[[112041   4656]
 [    35    209]]

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.96      0.98    116697
         1.0       0.04      0.86      0.08       244

    accuracy                           0.96    116941
   macro avg       0.52      0.91      0.53    116941
weighted avg       1.00      0.96      0.98    116941


Number of anomalies detected: 4865
Number of normal points: 112076
