# 📓 California County Employment Dataset from BLS LAUS

This notebook builds a **county-level dataset** for California using BLS Local Area Unemployment Statistics (LAUS) files.

We’ll take the raw LAUS files (`la.area`, `la.area_type`, `la.series`, and `la.data.11.California`) and stitch them together into a clean dataset:

| county | year | month | employment | unemployment | labor_force | unemployment_rate |


In [4]:
# --- 1. Setup ---
import pandas as pd

In [8]:
# --- 2. Load Data ---

# Supporting lookup files
la_area = pd.read_csv("la.area", sep="\t", dtype=str)
la_area_type = pd.read_csv("la.area_type", sep="\t", dtype=str)

# Load la.series with explicit headers
la_series = pd.read_csv("la.series", 
                        sep="\t", 
                        header=None, 
                        names=[
                            "series_id", "area_type_code", "area_code", 
                            "measure_code", "seasonal", "srd_code", 
                            "series_title", "footnote_codes", 
                            "begin_year", "begin_period", 
                            "end_year", "end_period"
                        ],
                        dtype=str)

# Load Michigan data with explicit headers
la_data = pd.read_csv("la.data.11.California", 
                      sep="\t", 
                      header=None, 
                      names=[
                          "series_id", "year", "period", "value", "footnote_codes"
                      ],
                      dtype=str)

print("Files loaded with explicit headers.")

Files loaded with explicit headers.


In [18]:
# --- 3. Identify California Counties ---

# Counties = area_type_code "F"
ca_counties = la_area[
    (la_area["area_type_code"] == "F") &
    (la_area["area_code"].str.startswith("CN06"))  # California counties
]

print("California counties found:", len(ca_counties))
ca_counties.head()

California counties found: 58


Unnamed: 0,area_type_code,area_code,area_text,display_level,selectable,sort_sequence
1399,F,CN0600100000000,"Alameda County, CA",0,T,431
1400,F,CN0600300000000,"Alpine County, CA",0,T,432
1401,F,CN0600500000000,"Amador County, CA",0,T,433
1402,F,CN0600700000000,"Butte County, CA",0,T,434
1403,F,CN0600900000000,"Calaveras County, CA",0,T,435


In [26]:
# --- 4. Link Counties to Series ---

ca_series = la_series[la_series["area_code"].isin(ca_counties["area_code"])]

print("Series linked to California counties:", len(ca_series))
ca_series.head()

Series linked to California counties: 232


Unnamed: 0,series_id,area_type_code,area_code,measure_code,seasonal,srd_code,series_title,footnote_codes,begin_year,begin_period,end_year,end_period
1951,LAUCN060010000000003,F,CN0600100000000,3,U,6,"Unemployment Rate: Alameda County, CA (U)",,1990,M01,2025,M02
1952,LAUCN060010000000004,F,CN0600100000000,4,U,6,"Unemployment: Alameda County, CA (U)",,1990,M01,2025,M02
1953,LAUCN060010000000005,F,CN0600100000000,5,U,6,"Employment: Alameda County, CA (U)",,1990,M01,2025,M02
1954,LAUCN060010000000006,F,CN0600100000000,6,U,6,"Labor Force: Alameda County, CA (U)",,1990,M01,2025,M02
1955,LAUCN060030000000003,F,CN0600300000000,3,U,6,"Unemployment Rate: Alpine County, CA (U)",,1990,M01,2025,M02


In [28]:
# --- 5. Merge Data + Series Metadata ---
# Triple merge: la.data + la.series + la.area
ca_data = (
    la_data
    .merge(ca_series, on="series_id")
    .merge(ca_counties, on="area_code")   # brings in area_text
)

ca_data["year"] = ca_data["year"].astype(int)
ca_data["month"] = ca_data["period"].str[1:].astype(int)
ca_data["value"] = pd.to_numeric(ca_data["value"], errors="coerce")

# --- 6. Pivot to Wide Format ---
ca_pivot = ca_data.pivot_table(
    index=["area_text", "year", "month"],
    columns="measure_code",
    values="value"
).reset_index()

ca_pivot.rename(columns={
    "03": "unemployment_rate",
    "04": "labor_force",
    "05": "employment",
    "06": "unemployment"
}, inplace=True)

# --- 7. Final Dataset ---
ca_final = ca_pivot[[
    "area_text", "year", "month", "employment", "unemployment", "labor_force", "unemployment_rate"
]].rename(columns={"area_text": "county"})

ca_final.head()

measure_code,county,year,month,employment,unemployment,labor_force,unemployment_rate
0,"Alameda County, CA",1990,1,657058.0,683503.0,26445.0,3.9
1,"Alameda County, CA",1990,2,650679.0,676039.0,25360.0,3.8
2,"Alameda County, CA",1990,3,649735.0,673971.0,24236.0,3.6
3,"Alameda County, CA",1990,4,645659.0,671250.0,25591.0,3.8
4,"Alameda County, CA",1990,5,646698.0,671949.0,25251.0,3.8


In [32]:
# --- 8. Save to File ---

ca_final.to_csv("california_county_employment.csv", index=False)
ca_final.to_excel("california_county_employment.xlsx", index=False)

print("✅ Exported California county dataset")

✅ Exported California county dataset
