In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import LabelEncoder
import pickle
from datetime import datetime

# Load and preprocess the data
df = pd.read_csv('dataset.csv')

# Feature engineering
df['Date'] = pd.to_datetime(df['Date'])
df['day_of_week'] = df['Date'].dt.dayofweek
df['day_of_month'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month

# Encode categorical features
label_encoders = {}
categorical_cols = ['Type', 'Recipient', 'Category']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# Features for the model
features = ['Balance Amount', 'Transaction_Amount', 'Type', 'day_of_week', 
            'day_of_month', 'month', 'Recipient', 'Category']

# Train Isolation Forest model
model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
model.fit(df[features])

# Save the model and label encoders
with open('anomaly_detection_model.pkl', 'wb') as f:
    pickle.dump({
        'model': model,
        'label_encoders': label_encoders
    }, f)