# Analyzing Government Spending with Papilon

Use Papilon to explore patterns, waste, and possible fraud in government budgets using entropy, causal inference, and scenario simulation.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from papilon import (
    shannon_entropy,
    analyze_relationships,
    simulate_kde_scenarios,
    infer_causal_structure,
    grid_search_optimize
)

# Load or simulate government spending data
data = {
    'education': [500, 520, 490, 530, 505],
    'healthcare': [600, 610, 620, 605, 615],
    'defense': [700, 680, 690, 710, 705],
    'infrastructure': [300, 310, 295, 320, 315],
    'admin': [200, 205, 210, 215, 220]
}
df = pd.DataFrame(data)
df['total_spending'] = df.sum(axis=1)
df.head()


##  Entropy Analysis
Evaluate the uncertainty of each spending category to identify irregularities or unpredictable allocations.

In [None]:
columns = ['education', 'healthcare', 'defense', 'infrastructure', 'admin']
for col in columns:
    entropy = shannon_entropy(df[col])
    print(f"{col} Entropy: {entropy:.4f}")


##  Correlation and Relationships
Check for suspicious or unexplained relationships between categories.

In [None]:
analyze_relationships(df[columns], method='correlation')


##  Simulate Spending Scenarios
Model alternate budget scenarios based on KDE simulation.

In [None]:
simulated = simulate_kde_scenarios(df[columns], df['total_spending'], n_samples=1000)
simulated.plot(kind='kde', title='Simulated Total Spending Distribution', legend=False)
plt.xlabel('Total Spending')
plt.show()


##  Causal Inference
Reveal potential drivers and dependencies of total government spending.

In [None]:
graph = infer_causal_structure(df[columns + ['total_spending']])
graph.draw()


##  Optimization: Minimize Waste
Define a custom function to optimize for spending efficiency and flag overallocated sectors.

In [None]:
def waste_score(config):
    return config['admin'] * 1.5 + config['defense'] * 1.2 - config['education'] * 0.8 - config['healthcare'] * 0.7

bounds = {
    'education': (400, 600),
    'healthcare': (500, 700),
    'defense': (600, 800),
    'admin': (180, 250)
}

result = grid_search_optimize(waste_score, bounds, resolution=10)
print("Optimal Spending Configuration to Reduce Waste:")
print(result)


##  Summary
- Entropy helps uncover volatile allocations
- Correlations may indicate unexplainable dependencies
- Causal graphs reveal root causes
- Optimization helps minimize potential waste