# FOIA Accelerate — Demo Notebook
This notebook demonstrates the four prototype modules on synthetic data.
> Note: This is **not** a production system and should only be used on non-sensitive data.


In [None]:
import pandas as pd
from src.routing_classifier import RoutingClassifier
from src.pii_redaction import detect_pii, redact_text
from src.deduplication import cluster_near_duplicates
from src.summarizer import summarize
from src.utils import set_seed
set_seed(42)
df = pd.read_csv('data/sample_requests.csv')
df.head()

## 1) Train routing classifier (baseline)

In [None]:
rc = RoutingClassifier()
mask = df['label'].notna()
rc.fit(df.loc[mask, 'text'], df.loc[mask, 'label'])
rc.predict(['Budget docs for FY22'])

## 2) Redaction suggestions

In [None]:
sample = 'Contact me at jane.doe@example.com on 03/14/2024. SSN 123-45-6789.'
findings = detect_pii(sample)
sample, findings, redact_text(sample, findings)

## 3) Deduplication clustering

In [None]:
texts = df['text'].tolist()
clusters = cluster_near_duplicates(texts, threshold=0.35)
clusters

## 4) Extractive summary

In [None]:
long_text = (
  'The agency received a significant number of requests this year. '
  'Processing times increased due to staffing shortages. '
  'However, new tooling improved triage. '
  'Future investments could reduce the backlog.'
)
summarize(long_text, max_sentences=2)