In [2]:
import pandas as pd
import requests
import json
from io import StringIO
from config import bls_api_key

# --- 1. Load CSV Data for Women ---
csv_data = """
Series id,Year,Period,Value
LEU0252882700,2014,Q01,722
LEU0252882700,2014,Q02,716
LEU0252882700,2014,Q03,715
LEU0252882700,2014,Q04,724
LEU0252882700,2015,Q01,730
LEU0252882700,2015,Q02,726
LEU0252882700,2015,Q03,721
LEU0252882700,2015,Q04,729
LEU0252882700,2016,Q01,750
LEU0252882700,2016,Q02,744
LEU0252882700,2016,Q03,745
LEU0252882700,2016,Q04,758
LEU0252882700,2017,Q01,765
LEU0252882700,2017,Q02,780
LEU0252882700,2017,Q03,767
LEU0252882700,2017,Q04,769
LEU0252882700,2018,Q01,783
LEU0252882700,2018,Q02,780
LEU0252882700,2018,Q03,796
LEU0252882700,2018,Q04,794
LEU0252882700,2019,Q01,806
LEU0252882700,2019,Q02,814
LEU0252882700,2019,Q03,825
LEU0252882700,2019,Q04,843
LEU0252882700,2020,Q01,857
LEU0252882700,2020,Q02,913
LEU0252882700,2020,Q03,902
LEU0252882700,2020,Q04,894
LEU0252882700,2021,Q01,900
LEU0252882700,2021,Q02,899
LEU0252882700,2021,Q03,916
LEU0252882700,2021,Q04,930
LEU0252882700,2022,Q01,939
LEU0252882700,2022,Q02,943
LEU0252882700,2022,Q03,971
LEU0252882700,2022,Q04,975
LEU0252882700,2023,Q01,996
LEU0252882700,2023,Q02,993
LEU0252882700,2023,Q03,1005
LEU0252882700,2023,Q04,1031
LEU0252882700,2024,Q01,1021
LEU0252882700,2024,Q02,1017
LEU0252882700,2024,Q03,1054
LEU0252882700,2024,Q04,1083
"""

women_df = pd.read_csv(StringIO(csv_data.strip()))
women_df['Group'] = 'Women'
women_df.rename(columns={'Series id': 'SeriesID'}, inplace=True)

# --- 2. BLS API Data for All & Men (2014–2024) ---
BLS_API_URL = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
series_ids = {
    "All": "LEU0252881500",
    "Men": "LEU0252881800"
}

headers = {'Content-Type': 'application/json'}
payload = {
    "seriesid": list(series_ids.values()),
    "startyear": "2014",
    "endyear": "2024",
    "registrationkey": bls_api_key
}

response = requests.post(BLS_API_URL, headers=headers, data=json.dumps(payload))
api_data = response.json()

# Parse BLS API results
records = []
if api_data["status"] == "REQUEST_SUCCEEDED":
    for series in api_data["Results"]["series"]:
        sid = series["seriesID"]
        group = [k for k, v in series_ids.items() if v == sid][0]
        for item in series["data"]:
            if item["period"].startswith("Q"):
                records.append({
                    "SeriesID": sid,
                    "Group": group,
                    "Year": int(item["year"]),
                    "Period": item["period"],
                    "Value": int(item["value"])
                })

api_df = pd.DataFrame(records)

# --- 3. Combine all data ---
combined_df = pd.concat([api_df, women_df], ignore_index=True)

# --- 4. Sort and display ---
combined_df = combined_df.sort_values(by=["Group", "Year", "Period"]).reset_index(drop=True)

combined_df

Unnamed: 0,SeriesID,Group,Year,Period,Value
0,LEU0252881500,All,2014,Q01,796
1,LEU0252881500,All,2014,Q02,780
2,LEU0252881500,All,2014,Q03,790
3,LEU0252881500,All,2014,Q04,799
4,LEU0252881500,All,2015,Q01,808
...,...,...,...,...,...
127,LEU0252882700,Women,2023,Q04,1031
128,LEU0252882700,Women,2024,Q01,1021
129,LEU0252882700,Women,2024,Q02,1017
130,LEU0252882700,Women,2024,Q03,1054


In [3]:
combined_df.to_csv("wage_data_2014_2024.csv", index=False)

## Approach
Review datasets from BLS
Pull gender, 

## Visualizations

- line chart of all median wage data from 2014-2024
    - Filter (All, Men, Women)

In [4]:
import requests
import json

# Replace this with your actual BLS API key
BLS_API_KEY = bls_api_key

# Example: Median usual weekly earnings of full-time wage and salary workers by sex (LNU02000001 = men, LNU02000002 = women)
series_ids = [
    "LNU02000001",  # Men
    "LNU02000002",  # Women
    "LEU0252912500",  # Bachelor's degree or higher, 25+
    "LEU0252883700",  # Age 25 to 34
    # Add more series for job titles, etc.
]

headers = {'Content-type': 'application/json'}
data = json.dumps({
    "seriesid": series_ids,
    "startyear": "2023",
    "endyear": "2024",
    "registrationkey": BLS_API_KEY
})

response = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
json_data = response.json()

# Print result
for series in json_data['Results']['series']:
    print(f"Series ID: {series['seriesID']}")
    for item in series['data'][:5]:  # Last 5 data points
        print(f"{item['year']} {item['periodName']}: {item['value']}")
    print("\n")

Series ID: LNU02000001
2024 December: 85139
2024 November: 85296
2024 October: 85770
2024 September: 85754
2024 August: 85379


Series ID: LNU02000002
2024 December: 76155
2024 November: 76161
2024 October: 76169
2024 September: 76292
2024 August: 75970


Series ID: LEU0252912500
2024 4th Quarter: 818
2024 3rd Quarter: 804
2024 2nd Quarter: 792
2024 1st Quarter: 791
2023 4th Quarter: 789


Series ID: LEU0252883700
2024 4th Quarter: 382
2024 3rd Quarter: 376
2024 2nd Quarter: 372
2024 1st Quarter: 373
2023 4th Quarter: 377


