In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import time


In [2]:
#API KEYS
bls_api = '476d935074304bb5921d6c11f03abcc3'
bea_api = '1877DB8B-6730-4F80-B187-242ACB99D57C'

In [3]:
#URLS

url_bea = "https://apps.bea.gov/api/data"
url_bls = "https://api.bls.gov/publicAPI/v2/timeseries/data/"

In [4]:
#Datos para demsempleo por Estado
estados = {
    "Alabama": "LAUST010000000000003",
    "Alaska": "LAUST020000000000003",
    "Arizona": "LAUST040000000000003",
    "Arkansas": "LAUST050000000000003",
    "California": "LAUST060000000000003",
    "Colorado": "LAUST080000000000003",
    "Connecticut": "LAUST090000000000003",
    "Delaware": "LAUST100000000000003",
    "Florida": "LAUST120000000000003",
    "Georgia": "LAUST130000000000003",
    "Hawaii": "LAUST150000000000003",
    "Idaho": "LAUST160000000000003",
    "Illinois": "LAUST170000000000003",
    "Indiana": "LAUST180000000000003",
    "Iowa": "LAUST190000000000003",
    "Kansas": "LAUST200000000000003",
    "Kentucky": "LAUST210000000000003",
    "Louisiana": "LAUST220000000000003",
    "Maine": "LAUST230000000000003",
    "Maryland": "LAUST240000000000003",
    "Massachusetts": "LAUST250000000000003",
    "Michigan": "LAUST260000000000003",
    "Minnesota": "LAUST270000000000003",
    "Mississippi": "LAUST280000000000003",
    "Missouri": "LAUST290000000000003",
    "Montana": "LAUST300000000000003",
    "Nebraska": "LAUST310000000000003",
    "Nevada": "LAUST320000000000003",
    "New Hampshire": "LAUST330000000000003",
    "New Jersey": "LAUST340000000000003",
    "New Mexico": "LAUST350000000000003",
    "New York": "LAUST360000000000003",
    "North Carolina": "LAUST370000000000003",
    "North Dakota": "LAUST380000000000003",
    "Ohio": "LAUST390000000000003",
    "Oklahoma": "LAUST400000000000003",
    "Oregon": "LAUST410000000000003",
    "Pennsylvania": "LAUST420000000000003",
    "Rhode Island": "LAUST440000000000003",
    "South Carolina": "LAUST450000000000003",
    "South Dakota": "LAUST460000000000003",
    "Tennessee": "LAUST470000000000003",
    "Texas": "LAUST480000000000003",
    "Utah": "LAUST490000000000003",
    "Vermont": "LAUST500000000000003",
    "Virginia": "LAUST510000000000003",
    "Washington": "LAUST530000000000003",
    "West Virginia": "LAUST540000000000003",
    "Wisconsin": "LAUST550000000000003",
    "Wyoming": "LAUST560000000000003"
}

In [5]:
meses = {
    "January": 1, "February": 2, "March": 3, "April": 4,
    "May": 5, "June": 6, "July": 7, "August": 8,
    "September": 9, "October": 10, "November": 11, "December": 12
}

In [6]:
rangos = [(2000, 2009), (2010, 2019), (2020, 2025)]

In [7]:
#Payload
registros = []

series = list(estados.values())

for start, end in rangos:
    for i in range(0, len(series), 25):  # La API solo acepta 25 series por consulta
        bloque = series[i:i+25]
        payload = {
            "seriesid": bloque,
            "startyear": str(start),
            "endyear": str(end)
        }
        url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
        response = requests.post(url, json=payload)
        data = response.json()

        # Procesar cada serie
        for serie in data["Results"]["series"]:
            estado = [k for k, v in estados.items() if v == serie["seriesID"]][0]
            for d in serie["data"]:
                if d["periodName"] in meses:
                    registros.append({
                        "Estado": estado,
                        "Año": int(d["year"]),
                        "Mes": meses[d["periodName"]],
                        "TasaDesempleo": float(d["value"])
                    })
        time.sleep(0.5)  # pequeño delay para no saturar la API


In [8]:
df = pd.DataFrame(registros)
df["Fecha"] = pd.to_datetime(df["Año"].astype(str) + "-" + df["Mes"].astype(str).str.zfill(2))
df = df.sort_values(["Estado", "Fecha"]).reset_index(drop=True)
df = df[df['Estado'] != 'New York city']
df = df[df['Estado'] != 'Los Angeles County']
df = df[['Estado','Fecha','TasaDesempleo']]
df['U_3m'] = df.groupby('Estado')['TasaDesempleo'].transform(lambda x: x.rolling(3, min_periods = 3).mean().round(2))
df['U_min12']= df.groupby('Estado')['U_3m'].transform(lambda x: x.rolling(12, min_periods = 12).min().round(2))
df['SahmRule'] = (df['U_3m']-df['U_min12']).round(2)
df['Dummy'] = (df['SahmRule'] >=0.5).astype(int)
df['Trim'] = df['Fecha'].dt.to_period('Q').astype(str)
df

Unnamed: 0,Estado,Fecha,TasaDesempleo,U_3m,U_min12,SahmRule,Dummy,Trim
0,Alabama,2000-01-01,5.1,,,,0,2000Q1
1,Alabama,2000-02-01,5.1,,,,0,2000Q1
2,Alabama,2000-03-01,4.7,4.97,,,0,2000Q1
3,Alabama,2000-04-01,4.1,4.63,,,0,2000Q2
4,Alabama,2000-05-01,4.3,4.37,,,0,2000Q2
...,...,...,...,...,...,...,...,...
15395,Wyoming,2025-04-01,3.0,3.47,2.97,0.50,1,2025Q2
15396,Wyoming,2025-05-01,3.2,3.30,2.97,0.33,0,2025Q2
15397,Wyoming,2025-06-01,3.7,3.30,2.97,0.33,0,2025Q2
15398,Wyoming,2025-07-01,2.8,3.23,2.97,0.26,0,2025Q3


In [9]:
#Parametros para GDP

params = {
    "UserID": bea_api,
    "method": "GetData",
    "datasetname": "Regional",
    "TableName": "SQGDP1",   
    "Frequency": "Q",
    "Year": "ALL",    
    'GeoFips':'STATE',     
    "ResultFormat": "JSON",
    'LineCode':1
}

In [10]:
#Request GDP
response = requests.get(url_bea, params=params)
response.status_code == 200

True

In [11]:
gdp = response.json()
gdp

{'BEAAPI': {'Request': {'RequestParam': [{'ParameterName': 'USERID',
     'ParameterValue': '1877DB8B-6730-4F80-B187-242ACB99D57C'},
    {'ParameterName': 'METHOD', 'ParameterValue': 'GETDATA'},
    {'ParameterName': 'DATASETNAME', 'ParameterValue': 'REGIONAL'},
    {'ParameterName': 'TABLENAME', 'ParameterValue': 'SQGDP1'},
    {'ParameterName': 'FREQUENCY', 'ParameterValue': 'Q'},
    {'ParameterName': 'YEAR', 'ParameterValue': 'ALL'},
    {'ParameterName': 'GEOFIPS', 'ParameterValue': 'STATE'},
    {'ParameterName': 'RESULTFORMAT', 'ParameterValue': 'JSON'},
    {'ParameterName': 'LINECODE', 'ParameterValue': '1'}]},
  'Results': {'Statistic': 'Real Gross Domestic Product (GDP)',
   'UnitOfMeasure': 'Millions of chained 2017 dollars',
   'PublicTable': 'SQGDP1 State quarterly gross domestic product (GDP) summary',
   'UTCProductionTime': '2025-10-29T16:28:01.387',
   'NoteRef': '1',
   'Dimensions': [{'Name': 'Code', 'DataType': 'string', 'IsValue': '0'},
    {'Name': 'GeoFips', 'Da

In [12]:
records = gdp['BEAAPI']['Results']['Data']
records

[{'Code': 'SQGDP1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '2005Q1',
  'CL_UNIT': 'Millions of chained 2017 dollars',
  'UNIT_MULT': '6',
  'DataValue': '15844727.0',
  'NoteRef': '1'},
 {'Code': 'SQGDP1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '2005Q2',
  'CL_UNIT': 'Millions of chained 2017 dollars',
  'UNIT_MULT': '6',
  'DataValue': '15922782.0',
  'NoteRef': '1'},
 {'Code': 'SQGDP1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '2005Q3',
  'CL_UNIT': 'Millions of chained 2017 dollars',
  'UNIT_MULT': '6',
  'DataValue': '16047587.0',
  'NoteRef': '1'},
 {'Code': 'SQGDP1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '2005Q4',
  'CL_UNIT': 'Millions of chained 2017 dollars',
  'UNIT_MULT': '6',
  'DataValue': '16136734.0',
  'NoteRef': '1'},
 {'Code': 'SQGDP1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '2006Q1',
  'CL_UNIT': 'Millions of chaine

In [13]:
gdp_data = pd.DataFrame(records)[['GeoName','TimePeriod','DataValue']]
gdp_data[gdp_data['GeoName']=='United States']
gdp_data

Unnamed: 0,GeoName,TimePeriod,DataValue
0,United States,2005Q1,15844727.0
1,United States,2005Q2,15922782.0
2,United States,2005Q3,16047587.0
3,United States,2005Q4,16136734.0
4,United States,2006Q1,16353835.0
...,...,...,...
4915,Far West,2024Q2,4606652.1
4916,Far West,2024Q3,4649588.3
4917,Far West,2024Q4,4679775.7
4918,Far West,2025Q1,4674104.5


In [14]:
gdp_data["DataValue"] = pd.to_numeric(gdp_data["DataValue"], errors="coerce")
gdp_data.rename(columns={'GeoName':'Estado','TimePeriod':'Trim','DataValue':'GDP'}, inplace=True)
gdp_data

Unnamed: 0,Estado,Trim,GDP
0,United States,2005Q1,15844727.0
1,United States,2005Q2,15922782.0
2,United States,2005Q3,16047587.0
3,United States,2005Q4,16136734.0
4,United States,2006Q1,16353835.0
...,...,...,...
4915,Far West,2024Q2,4606652.1
4916,Far West,2024Q3,4649588.3
4917,Far West,2024Q4,4679775.7
4918,Far West,2025Q1,4674104.5


In [15]:
base_cox = pd.merge(
    df,
    gdp_data[['Estado','Trim','GDP']],
    on=['Estado','Trim'],
    how='left'
)
base_cox

Unnamed: 0,Estado,Fecha,TasaDesempleo,U_3m,U_min12,SahmRule,Dummy,Trim,GDP
0,Alabama,2000-01-01,5.1,,,,0,2000Q1,
1,Alabama,2000-02-01,5.1,,,,0,2000Q1,
2,Alabama,2000-03-01,4.7,4.97,,,0,2000Q1,
3,Alabama,2000-04-01,4.1,4.63,,,0,2000Q2,
4,Alabama,2000-05-01,4.3,4.37,,,0,2000Q2,
...,...,...,...,...,...,...,...,...,...
15395,Wyoming,2025-04-01,3.0,3.47,2.97,0.50,1,2025Q2,39982.7
15396,Wyoming,2025-05-01,3.2,3.30,2.97,0.33,0,2025Q2,39982.7
15397,Wyoming,2025-06-01,3.7,3.30,2.97,0.33,0,2025Q2,39982.7
15398,Wyoming,2025-07-01,2.8,3.23,2.97,0.26,0,2025Q3,


In [16]:
#Parametros Personla Income
params_pi = {
    "UserID": bea_api,
    "method": "GetData",
    "datasetname": "Regional",
    "TableName": "SQINC1",   
    "Frequency": "Q",
    "Year": "ALL",    
    'GeoFips':'STATE',     
    "ResultFormat": "JSON",
    'LineCode':1
}

In [17]:
#Request 
response_pi = requests.get(url_bea, params=params_pi)
response_pi.status_code == 200

True

In [18]:
pi = response_pi.json()
pi

{'BEAAPI': {'Request': {'RequestParam': [{'ParameterName': 'USERID',
     'ParameterValue': '1877DB8B-6730-4F80-B187-242ACB99D57C'},
    {'ParameterName': 'METHOD', 'ParameterValue': 'GETDATA'},
    {'ParameterName': 'DATASETNAME', 'ParameterValue': 'REGIONAL'},
    {'ParameterName': 'TABLENAME', 'ParameterValue': 'SQINC1'},
    {'ParameterName': 'FREQUENCY', 'ParameterValue': 'Q'},
    {'ParameterName': 'YEAR', 'ParameterValue': 'ALL'},
    {'ParameterName': 'GEOFIPS', 'ParameterValue': 'STATE'},
    {'ParameterName': 'RESULTFORMAT', 'ParameterValue': 'JSON'},
    {'ParameterName': 'LINECODE', 'ParameterValue': '1'}]},
  'Results': {'Statistic': 'Personal income',
   'UnitOfMeasure': 'Millions of dollars',
   'PublicTable': 'SQINC1 State quarterly personal income summary: personal income, population, per capita personal income',
   'UTCProductionTime': '2025-10-29T16:51:22.483',
   'NoteRef': ' ',
   'Dimensions': [{'Name': 'Code', 'DataType': 'string', 'IsValue': '0'},
    {'Name': '

In [19]:
records_pi = pi['BEAAPI']['Results']['Data']
records_pi

[{'Code': 'SQINC1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '1971Q4',
  'CL_UNIT': 'Millions of dollars',
  'UNIT_MULT': '6',
  'DataValue': '952246.9'},
 {'Code': 'SQINC1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '1972Q1',
  'CL_UNIT': 'Millions of dollars',
  'UNIT_MULT': '6',
  'DataValue': '977447.2'},
 {'Code': 'SQINC1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '1972Q2',
  'CL_UNIT': 'Millions of dollars',
  'UNIT_MULT': '6',
  'DataValue': '996338.5'},
 {'Code': 'SQINC1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '1972Q3',
  'CL_UNIT': 'Millions of dollars',
  'UNIT_MULT': '6',
  'DataValue': '1023328.8'},
 {'Code': 'SQINC1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'TimePeriod': '1972Q4',
  'CL_UNIT': 'Millions of dollars',
  'UNIT_MULT': '6',
  'DataValue': '1068517.5'},
 {'Code': 'SQINC1-1',
  'GeoFips': '00000',
  'GeoName': 'United States',
  'T

In [20]:
data_pi = pd.DataFrame(records_pi)[['GeoName','TimePeriod','DataValue']]
data_pi

Unnamed: 0,GeoName,TimePeriod,DataValue
0,United States,1971Q4,952246.9
1,United States,1972Q1,977447.2
2,United States,1972Q2,996338.5
3,United States,1972Q3,1023328.8
4,United States,1972Q4,1068517.5
...,...,...,...
18595,Far West *,2024Q2,4739480.1
18596,Far West *,2024Q3,4788860.9
18597,Far West *,2024Q4,4876382.7
18598,Far West *,2025Q1,4932645.0


In [21]:
data_pi['DataValue'] = pd.to_numeric(data_pi['DataValue'],errors='coerce')
data_pi.rename(columns={'GeoName':'Estado','TimePeriod':'Trim','DataValue':'Personal_Income'}, inplace=True)
data_pi["Estado"] = data_pi["Estado"].str.replace("*", "", regex=False).str.strip()
data_pi


Unnamed: 0,Estado,Trim,Personal_Income
0,United States,1971Q4,952246.9
1,United States,1972Q1,977447.2
2,United States,1972Q2,996338.5
3,United States,1972Q3,1023328.8
4,United States,1972Q4,1068517.5
...,...,...,...
18595,Far West,2024Q2,4739480.1
18596,Far West,2024Q3,4788860.9
18597,Far West,2024Q4,4876382.7
18598,Far West,2025Q1,4932645.0


In [22]:
base_cox = pd.merge(
    base_cox,
    data_pi[['Estado','Trim','Personal_Income']],
    on=['Estado','Trim'],
    how='left'
)

In [23]:
base_cox

Unnamed: 0,Estado,Fecha,TasaDesempleo,U_3m,U_min12,SahmRule,Dummy,Trim,GDP,Personal_Income
0,Alabama,2000-01-01,5.1,,,,0,2000Q1,,105965.0
1,Alabama,2000-02-01,5.1,,,,0,2000Q1,,105965.0
2,Alabama,2000-03-01,4.7,4.97,,,0,2000Q1,,105965.0
3,Alabama,2000-04-01,4.1,4.63,,,0,2000Q2,,107747.9
4,Alabama,2000-05-01,4.3,4.37,,,0,2000Q2,,107747.9
...,...,...,...,...,...,...,...,...,...,...
15395,Wyoming,2025-04-01,3.0,3.47,2.97,0.50,1,2025Q2,39982.7,52880.9
15396,Wyoming,2025-05-01,3.2,3.30,2.97,0.33,0,2025Q2,39982.7,52880.9
15397,Wyoming,2025-06-01,3.7,3.30,2.97,0.33,0,2025Q2,39982.7,52880.9
15398,Wyoming,2025-07-01,2.8,3.23,2.97,0.26,0,2025Q3,,


In [24]:
base_cox = base_cox[base_cox['Trim']>='2005-01-01']
base_cox = base_cox[base_cox['Trim']<'2015-01-01']
base_cox['GDP'] = np.log(base_cox['GDP'])
base_cox['Personal_Income']=np.log(base_cox['Personal_Income']) 
base_cox

Unnamed: 0,Estado,Fecha,TasaDesempleo,U_3m,U_min12,SahmRule,Dummy,Trim,GDP,Personal_Income
60,Alabama,2005-01-01,5.4,5.13,5.07,0.06,0,2005Q1,12.184200,11.797564
61,Alabama,2005-02-01,5.4,5.23,5.07,0.16,0,2005Q1,12.184200,11.797564
62,Alabama,2005-03-01,4.6,5.13,5.07,0.06,0,2005Q1,12.184200,11.797564
63,Alabama,2005-04-01,4.1,4.70,4.70,0.00,0,2005Q2,12.193473,11.814656
64,Alabama,2005-05-01,4.0,4.23,4.23,0.00,0,2005Q2,12.193473,11.814656
...,...,...,...,...,...,...,...,...,...,...
15267,Wyoming,2014-08-01,4.1,4.27,4.17,0.10,0,2014Q3,10.585187,10.403726
15268,Wyoming,2014-09-01,3.8,4.07,4.07,0.00,0,2014Q3,10.585187,10.403726
15269,Wyoming,2014-10-01,3.9,3.93,3.93,0.00,0,2014Q4,10.602053,10.415817
15270,Wyoming,2014-11-01,4.1,3.93,3.93,0.00,0,2014Q4,10.602053,10.415817
