In [8]:
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def benfords_law_analysis_with_confidence(file, column):
    df = pd.read_csv(file.name)
    data = pd.to_numeric(df[column], errors='coerce').dropna()
    data = data[data > 0]
    if data.empty:
        return None, "No valid positive numeric data in this column."

    first_digits = data.astype(str).str.replace(r'\D', '', regex=True).str[0].astype(int)
    actual_freq = first_digits.value_counts(normalize=True).sort_index()
    benford_freq = np.log10(1 + 1 / np.arange(1, 10))
    benford_series = pd.Series(benford_freq, index=np.arange(1, 10))

    # Align indexes
    actual_freq = actual_freq.reindex(np.arange(1, 10), fill_value=0)
    digits = np.arange(1, 10)

    # Plot
    fig, ax = plt.subplots(figsize=(10, 6))
    # Actual data as bar graph
    ax.bar(digits, actual_freq, width=0.6, alpha=0.7, label="Expected Frequencies (Bar)", color='skyblue', zorder=1)
    # Benford's expected as line graph
    ax.plot(digits, benford_series, color='red', marker='o', linewidth=2, label="Theoretical Benford Frequencies (Line)", zorder=2)
    ax.set_xlabel('Leading Digit')
    ax.set_ylabel('Frequency')
    ax.set_xticks(digits)
    ax.legend()
    ax.set_title(f"Benford's Law: Expected Frequencies (Bar) and Theoretical Frequencies (Line)")
    plt.tight_layout()

    # Calculate fraud confidence
    deviation = actual_freq - benford_series
    total_deviation = np.sum(np.abs(deviation))
    max_deviation = 0.5  # Empirical threshold
    confidence = min(1.0, total_deviation / max_deviation) * 100
    fraud_threshold = 30  # If confidence exceeds this, flag as fraud
    fraud_detected = confidence >= fraud_threshold
    fraud_message = f"Fraud detected with {confidence:.1f}% confidence." if fraud_detected else f"No significant fraud detected (confidence {confidence:.1f}%)."

    return fig, fraud_message

def get_column_names(file):
    df = pd.read_csv(file.name, nrows=1)
    return list(df.columns)

with gr.Blocks() as demo:
    gr.Markdown("# Benford's Law Interactive Analysis with Fraud Confidence")
    file_input = gr.File(label="Upload CSV File")
    column_dropdown = gr.Dropdown(label="Select Column", choices=[], interactive=True)
    analyze_btn = gr.Button("Analyze")
    output_plot = gr.Plot()
    output_text = gr.Textbox(label="Fraud Confidence Result", interactive=False)

    def update_columns(file):
        if file is not None:
            return gr.update(choices=get_column_names(file))
        return gr.update(choices=[])

    file_input.change(update_columns, inputs=file_input, outputs=column_dropdown)
    analyze_btn.click(benfords_law_analysis_with_confidence, inputs=[file_input, column_dropdown], outputs=[output_plot, output_text])

demo.launch()


* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


