In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import warnings
from datetime import datetime, timedelta
import random

warnings.filterwarnings('ignore')

class CreditCardFraudDetector:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(
            n_estimators=200,
            max_depth=15,
            min_samples_split=5,
            min_samples_leaf=2,
            max_features='sqrt',
            class_weight='balanced',  # Handle imbalance
            random_state=42,
            n_jobs=-1
        )

    def preprocess_data(self, X, y):
        """Preprocess data including feature scaling"""
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )

        # Fit only on training data to prevent data leakage
        X_train[['Amount', 'Time']] = self.scaler.fit_transform(X_train[['Amount', 'Time']])
        X_test[['Amount', 'Time']] = self.scaler.transform(X_test[['Amount', 'Time']])

        return X_train, X_test, y_train, y_test

    def train_and_evaluate(self, X_train, X_test, y_train, y_test):
        """Train the model and evaluate performance"""
        self.model.fit(X_train, y_train)
        y_pred = self.model.predict(X_test)
        y_pred_proba = self.model.predict_proba(X_test)[:, 1]

        print("\nModel Performance:")
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred))
        print("\nROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

    def predict_transactions(self, transactions):
        """Predict fraud transactions"""
        X = transactions.copy()
        if 'Amount' in X.columns and 'Time' in X.columns:
            X[['Amount', 'Time']] = self.scaler.transform(X[['Amount', 'Time']])

        predictions = self.model.predict(X)
        probabilities = self.model.predict_proba(X)[:, 1]

        return predictions, probabilities


class TransactionGenerator:
    def __init__(self, start_date='2024-01-01', num_days=14):
        self.start_date = datetime.strptime(start_date, '%Y-%m-%d')
        self.num_days = num_days
        self.merchants = {
            'grocery': ['Walmart', 'Whole Foods', 'Trader Joes', 'Safeway'],
            'restaurant': ['McDonalds', 'Starbucks', 'Chipotle', 'Local Diner'],
            'shopping': ['Amazon', 'Target', 'Best Buy', 'Nike'],
            'utilities': ['Electric Co.', 'Water Corp', 'Internet Service', 'Phone Bill'],
            'entertainment': ['Netflix', 'Cinema', 'Steam Games', 'Spotify']
        }

    def generate_normal_transaction(self, date):
        """Generate a normal transaction"""
        category = random.choice(list(self.merchants.keys()))
        merchant = random.choice(self.merchants[category])

        amount_ranges = {
            'grocery': (30, 200),
            'restaurant': (10, 100),
            'shopping': (20, 500),
            'utilities': (50, 250),
            'entertainment': (10, 75)
        }

        hour = random.randint(7, 23)
        minute = random.randint(0, 59)
        time = hour * 3600 + minute * 60

        amount = random.uniform(*amount_ranges[category])

        v_features = [random.uniform(-1, 1) for _ in range(28)]

        return {
            'Time': time,
            'Amount': amount,
            'Date': date,
            'Merchant': merchant,
            'Category': category,
            'Class': 0,
            **{f'V{i+1}': v for i, v in enumerate(v_features)}
        }

    def generate_fraud_transaction(self, date):
        """Generate a fraudulent transaction"""
        fraud_patterns = [
            {'merchant': 'Unknown Online Store', 'amount_range': (1000, 5000), 'hour_range': (0, 5)},
            {'merchant': 'Foreign ATM', 'amount_range': (800, 2500), 'hour_range': (1, 4)},
            {'merchant': 'Suspicious Marketplace', 'amount_range': (1500, 4000), 'hour_range': (0, 3)}
        ]

        pattern = random.choice(fraud_patterns)
        hour = random.randint(*pattern['hour_range'])
        minute = random.randint(0, 59)
        time = hour * 3600 + minute * 60

        amount = random.uniform(*pattern['amount_range'])

        v_features = [random.uniform(-3, 3) for _ in range(28)]
        for i in random.sample(range(28), 8):
            v_features[i] *= 2

        return {
            'Time': time,
            'Amount': amount,
            'Date': date,
            'Merchant': pattern['merchant'],
            'Category': 'suspicious',
            'Class': 1,
            **{f'V{i+1}': v for i, v in enumerate(v_features)}
        }

    def generate_dataset(self, min_transactions=100, fraud_rate=0.2):
        """Generate dataset with normal and fraudulent transactions"""
        transactions = []
        current_date = self.start_date

        min_frauds = max(5, int(min_transactions * fraud_rate))

        for _ in range(self.num_days):
            num_daily = random.randint(5, 10)
            for _ in range(num_daily):
                transactions.append(self.generate_normal_transaction(current_date))
            current_date += timedelta(days=1)

        for _ in range(min_frauds):
            fraud_date = self.start_date + timedelta(days=random.randint(0, self.num_days - 1))
            transactions.append(self.generate_fraud_transaction(fraud_date))

        df = pd.DataFrame(transactions)
        df['DateTime'] = pd.to_datetime(df['Date']) + pd.to_timedelta(df['Time'], unit='s')
        df = df.sort_values('DateTime').reset_index(drop=True)

        return df


def analyze_fraudulent_transactions(transactions, predictions, probabilities):
    """Analyze and display fraudulent transactions"""
    transactions['Predicted_Fraud'] = predictions
    transactions['Fraud_Probability'] = probabilities

    fraud_transactions = transactions[transactions['Predicted_Fraud'] == 1].copy()
    fraud_transactions['Risk_Level'] = pd.cut(
        fraud_transactions['Fraud_Probability'], bins=[0, 0.7, 0.9, 1], labels=['Medium', 'High', 'Very High']
    )

    return fraud_transactions.sort_values('Fraud_Probability', ascending=False)


def main():
    generator = TransactionGenerator(num_days=14)
    transactions = generator.generate_dataset(min_transactions=200)

    detector = CreditCardFraudDetector()

    X = transactions.drop(['Class', 'Date', 'Merchant', 'DateTime', 'Category'], axis=1)
    y = transactions['Class']

    X_train, X_test, y_train, y_test = detector.preprocess_data(X, y)
    detector.train_and_evaluate(X_train, X_test, y_train, y_test)

    predictions, probabilities = detector.predict_transactions(X)
    fraud_analysis = analyze_fraudulent_transactions(transactions, predictions, probabilities)

    print(f"\nTotal Transactions: {len(transactions)}")
    print(f"Detected Fraudulent Transactions: {len(fraud_analysis)}")
    fraud_analysis = analyze_fraudulent_transactions(transactions, predictions, probabilities)
    print(fraud_analysis)



if __name__ == "__main__":
    main()



Model Performance:

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        21
           1       1.00      1.00      1.00         8

    accuracy                           1.00        29
   macro avg       1.00      1.00      1.00        29
weighted avg       1.00      1.00      1.00        29


ROC AUC Score: 1.0

Total Transactions: 145
Detected Fraudulent Transactions: 40
      Time       Amount       Date                Merchant    Category  Class  \
21   19680  4287.877613 2024-01-02    Unknown Online Store  suspicious      1   
81    6600  1796.765091 2024-01-09             Foreign ATM  suspicious      1   
48    7020  3516.469133 2024-01-06  Suspicious Marketplace  suspicious      1   
1     4080  1923.201239 2024-01-01  Suspicious Marketplace  suspicious      1   
70   14340  4330.879155 2024-01-08    Unknown Online Store  suspicious      1   
39     960  3097.705034 2024-01-05    Unknown Online Store  s

In [None]:
class CreditCardFraudDetector:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(
            n_estimators=100,
            max_depth=15,
            min_samples_split=5,
            min_samples_leaf=2,
            max_features='sqrt',
            random_state=42,
            n_jobs=-1
        )
        self.features = None

In [None]:
    def load_and_prepare_data(self):
        """
        Load and prepare the credit card fraud dataset
        """
        # Load dataset (Ensure the file exists at this location)
        url = "/content/creditcard.csv"  # Update this path if needed
        df = pd.read_csv(url)

        print("Dataset shape:", df.shape)
        print("\nClass distribution:")
        print(df['Class'].value_counts(normalize=True))

        # Separate features and target
        X = df.drop('Class', axis=1)
        y = df['Class']

        # Save feature names
        self.features = X.columns.tolist()

        return X, y


In [None]:
    def plot_data_analysis(self, X, y):
        """
        Create visualizations for data analysis
        """
        plt.figure(figsize=(15, 5))

        # Transaction amount distribution
        plt.subplot(1, 2, 1)
        sns.histplot(data=X, x='Amount', hue=y, bins=50)
        plt.title('Transaction Amount Distribution by Class')
        plt.xlabel('Amount')
        plt.ylabel('Count')

        # Time distribution
        plt.subplot(1, 2, 2)
        sns.histplot(data=X, x='Time', hue=y, bins=50)
        plt.title('Transaction Time Distribution by Class')
        plt.xlabel('Time (seconds)')
        plt.ylabel('Count')

        plt.tight_layout()
        plt.show()


In [None]:
    def preprocess_data(self, X, y, balance_data=True):
        """
        Preprocess the data including scaling and optional balancing
        """
        # Scale Amount and Time features
        X[['Amount', 'Time']] = self.scaler.fit_transform(X[['Amount', 'Time']])

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )

        if balance_data:
            # Apply SMOTE to balance the training data
            smote = SMOTE(random_state=42)
            X_train, y_train = smote.fit_resample(X_train, y_train)
            print("\nBalanced training set shape:", X_train.shape)
            print("Balanced class distribution:")
            print(pd.Series(y_train).value_counts(normalize=True))

        return X_train, X_test, y_train, y_test

In [None]:
    def train_and_evaluate(self, X_train, X_test, y_train, y_test):
        """
        Train the model and evaluate its performance
        """
        # Train the model
        print("\nTraining the model...")
        self.model.fit(X_train, y_train)

        # Make predictions
        y_pred = self.model.predict(X_test)
        y_pred_proba = self.model.predict_proba(X_test)[:, 1]

        # Print evaluation metrics
        print("\nModel Performance:")
        print("\nConfusion Matrix:")
        print(confusion_matrix(y_test, y_pred))
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred))
        print("\nROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

        # Plot feature importance
        self.plot_feature_importance()


In [None]:
    def plot_feature_importance(self):
        """
        Plot feature importance from the Random Forest model
        """
        importance = pd.DataFrame({
            'feature': self.features,
            'importance': self.model.feature_importances_
        }).sort_values('importance', ascending=False)

        plt.figure(figsize=(12, 6))
        sns.barplot(data=importance.head(10), x='importance', y='feature')
        plt.title('Top 10 Most Important Features')
        plt.xlabel('Feature Importance')
        plt.tight_layout()
        plt.show()

In [None]:
    def predict_transaction(self, transaction_data):
        """
        Predict whether a new transaction is fraudulent
        """
        # Scale the Amount and Time features
        transaction_data[['Amount', 'Time']] = self.scaler.transform(transaction_data[['Amount', 'Time']])

        # Make prediction
        prediction = self.model.predict(transaction_data)
        probability = self.model.predict_proba(transaction_data)[:, 1]

        return prediction, probability

In [None]:
def main():
    # Initialize the fraud detector
    detector = CreditCardFraudDetector()

    # Load and prepare data
    print("Loading and preparing data...")
    X, y = detector.load_and_prepare_data()

    # Plot data analysis
    print("\nGenerating data visualizations...")
    detector.plot_data_analysis(X, y)

    # Preprocess data
    print("\nPreprocessing data...")
    X_train, X_test, y_train, y_test = detector.preprocess_data(X, y, balance_data=True)

    # Train and evaluate model
    detector.train_and_evaluate(X_train, X_test, y_train, y_test)

    # Example: Make predictions on new transactions
    print("\nExample: Predicting new transactions...")
    sample_transactions = X_test.head(5)
    predictions, probabilities = detector.predict_transaction(sample_transactions)

    results = pd.DataFrame({
        'Transaction_ID': range(1, 6),
        'Predicted_Class': predictions,
        'Fraud_Probability': probabilities
    })
    print("\nPredictions for sample transactions:")
    print(results)

if __name__ == "__main__":
    main()

Loading and preparing data...


AttributeError: 'CreditCardFraudDetector' object has no attribute 'load_and_prepare_data'