In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

def load_and_preprocess_data(file_path):
    """Load and preprocess the fraud detection dataset."""
    # Load the dataset
    df = pd.read_csv(file_path)

    # Convert datetime column to proper format
    df['TX_DATETIME'] = pd.to_datetime(df['TX_DATETIME'])

    # Create features based on fraud patterns
    # Feature 1: High amount transactions (> 220)
    df['HIGH_AMOUNT'] = (df['TX_AMOUNT'] > 220).astype(int)

    # Feature 2: Terminal fraud patterns
    # Group by terminal and date to track suspicious patterns
    terminal_stats = df.groupby(['TERMINAL_ID', pd.Grouper(key='TX_DATETIME', freq='D')])['TX_FRAUD']\
        .mean().reset_index()
    terminal_stats.columns = ['TERMINAL_ID', 'TX_DATETIME', 'TERMINAL_FRAUD_RATE']
    df = pd.merge(df, terminal_stats, on=['TERMINAL_ID', 'TX_DATETIME'], how='left')

    # Feature 3: Customer fraud patterns
    # Track customer transaction amounts and detect anomalies
    df['CUSTOMER_TX_COUNT'] = df.groupby(['CUSTOMER_ID', pd.Grouper(key='TX_DATETIME', freq='D')])\
        ['TX_AMOUNT'].transform('count')
    df['CUSTOMER_TX_AMOUNT_MEAN'] = df.groupby(['CUSTOMER_ID', pd.Grouper(key='TX_DATETIME', freq='D')])\
        ['TX_AMOUNT'].transform('mean')

    # Create time-based features
    df['HOUR'] = df['TX_DATETIME'].dt.hour
    df['DAY_OF_WEEK'] = df['TX_DATETIME'].dt.dayofweek

    # Select features for modeling
    features = ['TX_AMOUNT', 'HIGH_AMOUNT', 'TERMINAL_FRAUD_RATE',
               'CUSTOMER_TX_COUNT', 'CUSTOMER_TX_AMOUNT_MEAN',
               'HOUR', 'DAY_OF_WEEK']

    X = df[features]
    y = df['TX_FRAUD']

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    return X_train, X_test, y_train, y_test, df

def train_model(X_train, y_train):
    """Train the Random Forest Classifier."""
    # Initialize and train the model
    rf = RandomForestClassifier(
        n_estimators=100,
        max_depth=None,
        min_samples_split=2,
        random_state=42,
        class_weight='balanced'
    )
    rf.fit(X_train, y_train)
    return rf

def evaluate_model(model, X_test, y_test):
    """Evaluate the model performance."""
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    # Calculate AUC-ROC score
    auc_roc = roc_auc_score(y_test, y_pred_proba)
    print(f"\nAUC-ROC Score: {auc_roc:.4f}")

    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    conf_matrix = confusion_matrix(y_test, y_pred)
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
               xticklabels=['Legitimate', 'Fraud'],
               yticklabels=['Legitimate', 'Fraud'])
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.show()

    # Plot feature importances
    plt.figure(figsize=(10, 6))
    importances = pd.Series(model.feature_importances_, index=X_test.columns)
    importances.sort_values(ascending=False).plot(kind='bar')
    plt.title('Feature Importances')
    plt.ylabel('Importance')
    plt.tight_layout()
    plt.show()

def main():
    # Load and preprocess the data
    file_path = 'fraud_transactions.csv'  # You'll need to provide your dataset path
    X_train, X_test, y_train, y_test, df = load_and_preprocess_data(file_path)

    # Train the model
    print("Training model...")
    model = train_model(X_train, y_train)

    # Evaluate the model
    print("\nEvaluating model...")
    evaluate_model(model, X_test, y_test)

if __name__ == "__main__":
    main()