# Test FilterPipeline

Quick test of the new configuration-based filtering pipeline.

In [1]:
from dotenv import load_dotenv
from scout.contexts.scraping.scrapers import BoozScraper
from scout.contexts.filtering import FilterPipeline
from pathlib import Path
import os


In [2]:
load_dotenv()
CONFIG_PATH = Path(os.getenv("CONFIG_PATH"))

In [None]:
# Initialize scraper and pipeline
bs = BoozScraper()
pipeline = FilterPipeline(CONFIG_PATH / "demo.yaml")

bs.print_cache_summary()

In [4]:
# Build SQL query from config
query = pipeline.build_sql_query()
print("Generated SQL query:")
print(query)
print()

Generated SQL query:
SELECT * FROM listings WHERE ( max_salary >= 200000 OR max_salary = 0 ) AND ( date_posted >= '2025-10-07' ) AND (  ( location LIKE '%MD%' )  OR  ( ( location LIKE '%DC%' OR location LIKE '%VA%' ) AND remote = 'Hybrid' )  OR ( remote = 'Yes' ) )



In [5]:
# Load with SQL filters
df = bs.import_db_as_df(query=query)
print(f"After SQL filtering: {len(df)} jobs")

After SQL filtering: 17 jobs


  df = pd.read_sql_query(query, conn)


In [6]:
# Apply all remaining filters from config
df_filtered = pipeline.apply_filters(df, verbose=True)

Starting with 17 jobs
After keyword filtering: 6 jobs
After 'Description' red flag filtering: 6 jobs
After 'Job Title' red flag filtering: 6 jobs

Filtered out 11 jobs (64.7%)


In [7]:
# Display results
from datetime import datetime

for idx, job in df_filtered.iterrows():
    age_days = (datetime.today() - job["Date Posted"]).days
    print(f"\n{'='*60}")
    print(f"Title: {job['Job Title']}")
    print(f"Salary: ${job['Min Salary']:,} - ${job['Max Salary']:,}")
    print(f"Location: {job['Location']}")
    print(f"Posted: {age_days} days ago")
    print(f"URL: {job['url']}")


Title: AI Solution Architect
Salary: $99,000 - $225,000
Location: McLean,VA,US
Posted: 3 days ago
URL: https://careers.boozallen.com/jobs/JobDetail?jobId=116760

Title: MLOps Engineer, Senior
Salary: $99,000 - $225,000
Location: Chantilly,VA,US
Posted: 6 days ago
URL: https://careers.boozallen.com/jobs/JobDetail?jobId=116638

Title: MLOps Engineer, Senior
Salary: $99,000 - $225,000
Location: Chantilly,VA,US
Posted: 6 days ago
URL: https://careers.boozallen.com/jobs/JobDetail?jobId=116643

Title: Model and Simulation Engineer, Lead
Salary: $99,000 - $225,000
Location: Arlington,VA,US
Posted: 7 days ago
URL: https://careers.boozallen.com/jobs/JobDetail?jobId=116608

Title: Agentic AI &amp; Machine Learning Engineer
Salary: $99,000 - $225,000
Location: Bethesda,MD,US
Posted: 7 days ago
URL: https://careers.boozallen.com/jobs/JobDetail?jobId=116609

Title: AI and ML Engineer
Salary: $99,000 - $225,000
Location: Ashburn,VA,US
Posted: 7 days ago
URL: https://careers.boozallen.com/jobs/JobDe

In [12]:
df_filtered

Unnamed: 0,Job Title,Job Number (Public),Job Number (metadata?),Description,Date Posted,Location,Remote Work,url,Min Salary,Max Salary,status,last_checked,Description_Good,Job_Title_OK,Clearance Required
59,"AWS Engineer, Senior",R0227786,116667,"AWS Engineer, Senior\n\n**The Opportunity:**\n...",2025-10-08,"Ashburn,VA,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,86800,198000,,,True,True,False
62,Agentic AI &amp; Machine Learning Engineer,R0227765,116609,Agentic AI & Machine Learning Engineer\n\n**Th...,2025-10-07,"Bethesda,MD,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,99000,225000,,,True,True,False
63,AI and ML Engineer,R0227780,116611,AI and ML Engineer\n\n**The Opportunity:**\n\n...,2025-10-07,"Ashburn,VA,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,99000,225000,,,True,True,False
65,AI and ML Engineer,R0227603,116506,AI and ML Engineer\n\n**The Opportunity:**\n\n...,2025-10-06,"Ashburn,VA,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,99000,225000,,,True,True,False
67,Product Software Engineer,R0227584,116485,Product Software Engineer\n\n**The Opportunity...,2025-10-03,"McLean,VA,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,86800,198000,,,True,True,False
69,Product AI Engineer,R0227588,116488,Product AI Engineer\n\n**The Opportunity:** \...,2025-10-03,"McLean,VA,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,86800,198000,,,True,True,False
70,Product DevSecOps Engineer,R0227589,116489,Product DevSecOps Engineer\n\n**The Opportunit...,2025-10-03,"McLean,VA,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,86800,198000,,,True,True,False
88,AI Solution Architect,R0226854,116265,AI Solution Architect\n\n**Key Role:**\n\nDesi...,2025-09-29,"Arlington,VA,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,99000,225000,,,True,True,False
95,Power Platform Integrations Developer,R0227074,116217,Power Platform Integrations Developer\n\n**The...,2025-09-26,"Bethesda,MD,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,86800,198000,,,True,True,False
99,Robotics Engineer and Interdisciplinary Scientist,R0226839,116024,Robotics Engineer and Interdisciplinary Scient...,2025-09-24,"Washington,DC,US",Hybrid,https://careers.boozallen.com/jobs/JobDetail?j...,99000,225000,,,True,True,False
