In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

# Load and preprocess the data
def preprocess_data(file_path):
    # Load dataset
    data = pd.read_csv(file_path)

    # Add a dummy TransactionID and Timestamp if not present
    if 'TransactionID' not in data.columns:
        data['TransactionID'] = range(1, len(data) + 1)
    if 'Timestamp' not in data.columns:
        data['Timestamp'] = pd.date_range(start='2023-01-01', periods=len(data), freq='T')

    # Separate features and target
    X = data.drop(['Class', 'TransactionID', 'Timestamp'], axis=1)
    y = data['Class']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    return X_train, X_test, y_train, y_test, data

# Train the fraud detection model
def train_model(X_train, y_train):
    # Initialize the RandomForestClassifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)

    # Train the model
    model.fit(X_train, y_train)
    return model

# Evaluate the model
def evaluate_model(model, X_test, y_test):
    # Predict probabilities
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    y_pred = model.predict(X_test)

    # Calculate metrics
    auc_score = roc_auc_score(y_test, y_pred_proba)
    report = classification_report(y_test, y_pred)

    return auc_score, report

# Identify fraud type based on thresholds
def identify_fraud_type(row):
    if row['Fraud_Score'] > 0.8:
        return 'Account Takeover'
    elif row['Amount'] > 1000:
        return 'Unusual Transaction Volume'
    elif row.get('Location_Anomaly', False):
        return 'Anomalous Location'
    else:
        return 'Unknown'

# Save flagged transactions
def save_results(model, X_test, full_data, output_path):
    predictions = model.predict_proba(X_test)[:, 1]
    flagged_transactions = full_data.iloc[X_test.index].copy()
    flagged_transactions['Fraud_Score'] = predictions
    flagged_transactions['Fraud_Flag'] = model.predict(X_test)

    # Add a dummy Location_Anomaly column for demonstration
    flagged_transactions['Location_Anomaly'] = np.random.choice([True, False], size=len(flagged_transactions))

    # Add Fraud Type
    flagged_transactions['Fraud_Type'] = flagged_transactions.apply(identify_fraud_type, axis=1)

    # Filter flagged transactions
    flagged_transactions = flagged_transactions[flagged_transactions['Fraud_Flag'] == 1]

    # Save as JSON and CSV
    flagged_transactions[['TransactionID', 'Fraud_Score', 'Fraud_Type', 'Timestamp']].to_csv(output_path + '.csv', index=False)
    flagged_transactions[['TransactionID', 'Fraud_Score', 'Fraud_Type', 'Timestamp']].to_json(output_path + '.json', orient='records', lines=True)
    print(f"Flagged transactions saved to {output_path}.csv and {output_path}.json")

if __name__ == "__main__":
    # File paths
    input_file = '/content/creditcard.csv'
    output_file = '/content/flagged_transactions'

    # Preprocessing
    X_train, X_test, y_train, y_test, full_data = preprocess_data(input_file)

    # Training
    model = train_model(X_train, y_train)

    # Evaluation
    auc, report = evaluate_model(model, X_test, y_test)
    print(f"AUC Score: {auc}\n")
    print(f"Classification Report:\n{report}")

    # Save results
    save_results(model, X_test, full_data, output_file)
