In [8]:
import pandas as pd
import json

def build_single_filer_tax_json(csv_path: str) -> dict:
    # Load the CSV
    df = pd.read_csv(csv_path)

    # Normalize state names
    df['state'] = (
        df['Unnamed: 0']
        .ffill()
        .str.replace(r'\s*\(.*$', '', regex=True)
        .str.strip()
    )

    # Keep only relevant columns
    df_single = df[['state', 'Single Filer', 'Unnamed: 2', 'Unnamed: 3']].copy()
    df_single.columns = ['state', 'rate', 'symbol', 'threshold']

    # Drop rows with missing rate
    df_single = df_single.dropna(subset=['rate'])

    # Group into a dictionary
    state_tax_dict = (
        df_single
        .groupby('state')
        .apply(lambda group: group[['rate', 'symbol', 'threshold']].to_dict(orient='records'))
        .to_dict()
    )

    return state_tax_dict

# Example usage
csv_path = 'state_income_tax_2025.csv'  # replace with your actual path
tax_data = build_single_filer_tax_json(csv_path)

# Optionally save to JSON file
with open('single_filer_tax_data.json', 'w') as f:
    json.dump(tax_data, f, indent=2)

# Print a sample
for state, brackets in list(tax_data.items())[:5]:
    print(f"{state}:\n{brackets}\n")



:
[{'rate': '0.04', 'symbol': '>', 'threshold': '500'}, {'rate': '0.05', 'symbol': '>', 'threshold': '3000'}, {'rate': '0.039', 'symbol': '>', 'threshold': '4500'}, {'rate': '0.02', 'symbol': '>', 'threshold': '10756'}, {'rate': '0.04', 'symbol': '>', 'threshold': '25499'}, {'rate': '0.06', 'symbol': '>', 'threshold': '40245'}, {'rate': '0.08', 'symbol': '>', 'threshold': '55866'}, {'rate': '0.093', 'symbol': '>', 'threshold': '70606'}, {'rate': '0.103', 'symbol': '>', 'threshold': '360659'}, {'rate': '0.113', 'symbol': '>', 'threshold': '432787'}, {'rate': '0.123', 'symbol': '>', 'threshold': '721314'}, {'rate': '0.133', 'symbol': '>', 'threshold': '1000000'}, {'rate': '0.045', 'symbol': '>', 'threshold': '10000'}, {'rate': '0.055', 'symbol': '>', 'threshold': '50000'}, {'rate': '0.06', 'symbol': '>', 'threshold': '100000'}, {'rate': '0.065', 'symbol': '>', 'threshold': '200000'}, {'rate': '0.069', 'symbol': '>', 'threshold': '250000'}, {'rate': '0.0699', 'symbol': '>', 'threshold': '

  .apply(lambda group: group[['rate', 'symbol', 'threshold']].to_dict(orient='records'))


In [9]:
def build_single_filer_tax_json(csv_path: str) -> dict:
    df = pd.read_csv(csv_path)

    # Normalize state names
    df['state'] = (
        df['Unnamed: 0']
        .ffill()
        .str.replace(r'\s*\(.*$', '', regex=True)
        .str.strip()
    )

    # Keep only valid state rows (remove blanks and header rows)
    df = df[df['state'].notna() & (df['state'] != '') & (df['state'].str.lower() != 'state')]

    # Keep only relevant columns
    df_single = df[['state', 'Single Filer', 'Unnamed: 2', 'Unnamed: 3']].copy()
    df_single.columns = ['state', 'rate', 'symbol', 'threshold']

    # Drop rows without a rate value
    df_single = df_single.dropna(subset=['rate'])

    # Group into a dictionary
    state_tax_dict = (
        df_single
        .groupby('state')
        .apply(lambda g: g[['rate', 'symbol', 'threshold']].to_dict(orient='records'))
        .to_dict()
    )

    return state_tax_dict


In [10]:
import pandas as pd
import json

def build_and_save_single_filer_tax_json(csv_path: str, json_path: str):
    # Step 1: Load CSV
    df = pd.read_csv(csv_path)

    # Step 2: Normalize the 'state' column
    df['state'] = (
        df['Unnamed: 0']
        .ffill()
        .str.replace(r'\s*\(.*$', '', regex=True)
        .str.strip()
    )

    # Step 3: Remove invalid state entries (like blanks or "State" row)
    df = df[df['state'].notna() & (df['state'] != '') & (df['state'].str.lower() != 'state')]

    # Step 4: Extract relevant columns
    df_single = df[['state', 'Single Filer', 'Unnamed: 2', 'Unnamed: 3']].copy()
    df_single.columns = ['state', 'rate', 'symbol', 'threshold']

    # Step 5: Drop rows with missing rates
    df_single = df_single.dropna(subset=['rate'])

    # Step 6: Group by state and convert to dictionary
    tax_dict = (
        df_single
        .groupby('state')
        .apply(lambda g: g[['rate', 'symbol', 'threshold']].to_dict(orient='records'))
        .to_dict()
    )

    # Step 7: Save to JSON
    with open(json_path, 'w') as f:
        json.dump(tax_dict, f, indent=2)

    print(f"✅ JSON saved to {json_path}")

# Example usage
build_and_save_single_filer_tax_json("state_income_tax_2025.csv", "single_filer_clean.json")


✅ JSON saved to single_filer_clean.json


  .apply(lambda g: g[['rate', 'symbol', 'threshold']].to_dict(orient='records'))
