In [2]:
import os
import pandas as pd
import requests

In [56]:
def parse_data(url, csv=False, columns_headers = True, columns=[], contains_comma=False ):

    headers = {"User-Agent":"Mozilla/5.0"}
    
    response = requests.get(url, headers=headers)

    data = response.text.split('\n')
    
    if columns_headers:
        columns = data[0]
        data = data[1:]
        if csv:
            if contains_comma:
                columns = columns.split('",')
                columns = [i.replace('\r','').replace('"','') for i in columns]
            else:
                columns = columns.split(',')
                columns = [i.replace('\r','').replace('"','') for i in columns]
        else:
            columns = columns.split('\t')
            columns = [i.replace('\r','') for i in columns]
    
    
    if csv:
        if contains_comma:
            data = [i.replace('\r','').replace('",','|').replace('"', '').split('|') for i in data]
        else:
            data = [i.replace('\r','').replace('"','').split(',') for i in data]
    else:
        
        data = [i.replace('\r','').split('\t') for i in data]
    
    
    
    df = pd.DataFrame(data)
    df = df.replace('', None)
    df = df.dropna(how='all', axis=1)
    df.columns = columns
    df = df.dropna(how='all', axis=0)
    
    return df

# Employment & Unemployment

### National Employment, Hours, and Earnings

In [4]:
seasonal = 'https://download.bls.gov/pub/time.series/ce/ce.seasonal'
df_seasonal = parse_data(seasonal)
df_seasonal

Unnamed: 0,seasonal_code,seasonal_text
0,S,Seasonally Adjusted
1,U,Not Seasonally Adjusted


In [5]:
supersector = 'https://download.bls.gov/pub/time.series/ce/ce.supersector'
df_supersector = parse_data(supersector)
df_supersector

Unnamed: 0,supersector_code,supersector_name
0,0,Total nonfarm
1,5,Total private
2,6,Goods-producing
3,7,Service-providing
4,8,Private service-providing
5,10,Mining and logging
6,20,Construction
7,30,Manufacturing
8,31,Durable Goods
9,32,Nondurable Goods


In [89]:
area = 'https://download.bls.gov/pub/time.series/la/la.area'
df_area = parse_data(area)
df_area

Unnamed: 0,area_type_code,area_code,area_text,display_level,selectable,sort_sequence
0,A,ST0100000000000,Alabama,0,T,1
1,A,ST0200000000000,Alaska,0,T,149
2,A,ST0400000000000,Arizona,0,T,193
3,A,ST0500000000000,Arkansas,0,T,257
4,A,ST0600000000000,California,0,T,383
...,...,...,...,...,...,...
8376,N,RD8500000000000,South Atlantic division,0,T,8377
8377,N,RD8600000000000,East South Central division,0,T,8378
8378,N,RD8700000000000,West South Central division,0,T,8379
8379,N,RD8800000000000,Mountain division,0,T,8380


In [6]:
industry = 'https://download.bls.gov/pub/time.series/ce/ce.industry'
df_industry = parse_data(industry)
df_industry

Unnamed: 0,industry_code,naics_code,publishing_status,industry_name,display_level,selectable,sort_sequence
0,00000000,-,B,Total nonfarm,0,T,1
1,05000000,-,A,Total private,1,T,2
2,06000000,-,A,Goods-producing,1,T,3
3,07000000,-,B,Service-providing,1,T,4
4,08000000,-,A,Private service-providing,1,T,5
...,...,...,...,...,...,...,...
845,90932221,-,B,Local government utilities,5,T,846
846,90932480,-,B,Local government transportation,5,T,847
847,90932622,-,B,Local hospitals,5,T,848
848,90932920,-,B,Local government general administration,5,T,849


In [7]:
data_type = 'https://download.bls.gov/pub/time.series/ce/ce.datatype'
df_data_type = parse_data(data_type)
df_data_type

Unnamed: 0,data_type_code,data_type_text
0,1,"ALL EMPLOYEES, THOUSANDS"
1,2,AVERAGE WEEKLY HOURS OF ALL EMPLOYEES
2,3,AVERAGE HOURLY EARNINGS OF ALL EMPLOYEES
3,4,AVERAGE WEEKLY OVERTIME HOURS OF ALL EMPLOYEES
4,6,"PRODUCTION AND NONSUPERVISORY EMPLOYEES, THOUS..."
5,7,AVERAGE WEEKLY HOURS OF PRODUCTION AND NONSUPE...
6,8,AVERAGE HOURLY EARNINGS OF PRODUCTION AND NONS...
7,9,AVERAGE WEEKLY OVERTIME HOURS OF PRODUCTION AN...
8,10,"WOMEN EMPLOYEES, THOUSANDS"
9,11,AVERAGE WEEKLY EARNINGS OF ALL EMPLOYEES


### National Employment, Hours, and Earnings (SIC basis)

In [60]:
industry = 'https://download.bls.gov/pub/time.series/ee/ee.industry'
df_industry = parse_data(industry, columns_headers=False, 
                         columns=['industry_code', 
                                  'SIC_code', 
                                  'publishing_status', 
                                  'industry_name',
                                  'display_level',
                                  'selectable',
                                  'sort_sequence'])
df_industry

Unnamed: 0,industry_code,SIC_code,publishing_status,industry_name,display_level,selectable,sort_sequence
0,industry_code,SIC_code,publishing_status,industry_name,,,
2,000000,,B,Total nonfarm,1,T,1
3,005000,,A,Total private,1,T,2
4,005100,,A,Goods-producing,1,T,3
5,100000,10-14,A,Mining,2,T,4
...,...,...,...,...,...,...,...
666,958060,,B,Local government hospitals,4,T,665
667,958200,,B,Local government education,3,T,666
668,958300,,C,Local government social services,5,T,667
669,958500,,C,"Local gov. serv., ex. hosp., educ., soc. srv.",5,T,668


In [11]:
data_type = 'https://download.bls.gov/pub/time.series/ee/ee.datatype'
df_data_type = parse_data(data_type)
df_data_type

Unnamed: 0,data_type_code,data_type_text
1,1,"ALL EMPLOYEES, THOUSANDS"
2,2,"WOMEN WORKERS, THOUSANDS"
3,3,"PRODUCTION WORKERS, THOUSANDS"
4,4,AVERAGE WEEKLY EARNINGS OF PRODUCTION WORKERS
5,5,AVERAGE WEEKLY HOURS OF PRODUCTION WORKERS
6,6,AVERAGE HOURLY EARNINGS OF PRODUCTION WORKERS
7,7,AVERAGE WEEKLY OVERTIME OF PRODUCTION WORKERS
8,40,"INDEXES OF AGGREGATE WEEKLY HOURS, 1982=100"
9,45,"INDEXES OF AGGREGATE WEEKLY PAYROLLS, 1982=100"
10,49,"AVERAGE HOURLY EARNINGS, 1982 DOLLARS"


### State and Area Employment, Hours, and Earnings

In [12]:
state = 'https://download.bls.gov/pub/time.series/sm/sm.state'
df_state = parse_data(state)
df_state

Unnamed: 0,state_code,state_name
0,0,All States
1,1,Alabama
2,2,Alaska
3,4,Arizona
4,5,Arkansas
5,6,California
6,8,Colorado
7,9,Connecticut
8,10,Delaware
9,11,District of Columbia


In [13]:
area = 'https://download.bls.gov/pub/time.series/sm/sm.area'
df_area = parse_data(area)
df_area

Unnamed: 0,area_code,area_name
0,00000,Statewide
1,10180,"Abilene, TX"
2,10380,"Aguadilla-Isabela, PR"
3,10420,"Akron, OH"
4,10500,"Albany, GA"
...,...,...
442,94781,"Calvert-Charles-Prince George's, MD"
443,94783,"Northern Virginia, VA"
444,97961,"Philadelphia City, PA"
445,97962,"Delaware County, PA"


In [14]:
supersector = 'https://download.bls.gov/pub/time.series/sm/sm.supersector'
df_supersector = parse_data(supersector)
df_supersector

Unnamed: 0,supersector_code,supersector_name
0,0,Total Nonfarm
1,5,Total Private
2,6,Goods Producing
3,7,Service-Providing
4,8,Private Service Providing
5,10,Mining and Logging
6,15,"Mining, Logging, and Construction"
7,20,Construction
8,30,Manufacturing
9,31,Durable Goods


In [15]:
industry = 'https://download.bls.gov/pub/time.series/sm/sm.industry'
df_industry = parse_data(industry)
df_industry

Unnamed: 0,industry_code,industry_name
0,00000000,Total Nonfarm
1,05000000,Total Private
2,06000000,Goods Producing
3,07000000,Service-Providing
4,08000000,Private Service Providing
...,...,...
336,90932480,Transportation
337,90932622,Local Government Hospitals
338,90936111,Local Government Elementary and Secondary Educ...
339,90940000,Total State and Local Government


In [16]:
data_type = 'https://download.bls.gov/pub/time.series/sm/sm.data_type'
df_data_type = parse_data(data_type)
df_data_type

Unnamed: 0,data_type_code,data_type_text
0,1,"All Employees, In Thousands"
1,2,Average Weekly Hours of All Employees
2,3,"Average Hourly Earnings of All Employees, In D..."
3,6,"Production or Nonsupervisory Employees, In Tho..."
4,7,Average Weekly Hours of Production Employees
5,8,Average Hourly Earnings of Production Employee...
6,11,"Average Weekly Earnings of All Employees, In D..."
7,21,"Diffusion Indexes, 1-month span, seasonally ad..."
8,22,"Diffusion Indexes, 3-month span, seasonally ad..."
9,23,"Diffusion Indexes, 6-month span, seasonally ad..."


### State and Area Employment, Hours, and Earnings (SIC basis)

In [17]:
state = 'https://download.bls.gov/pub/time.series/sa/sa.state'
df_state = parse_data(state)
df_state

Unnamed: 0,state_code,state_name
1,1,Alabama
2,2,Alaska
3,4,Arizona
4,5,Arkansas
5,6,California
6,8,Colorado
7,9,Connecticut
8,10,Delaware
9,11,District of Columbia
10,12,Florida


In [18]:
area = 'https://download.bls.gov/pub/time.series/sa/sa.area'
df_area = parse_data(area)
df_area

Unnamed: 0,area_code,area_name
1,0000,Statewide
2,0040,Abilene
3,0080,Akron
4,0120,Albany
5,0160,Albany-Schenectady-Troy
...,...,...
328,9140,Williamsport
329,9160,Wilmington
330,9240,Worcester
331,9280,York


In [19]:
industry = 'https://download.bls.gov/pub/time.series/sa/sa.industry'
df_industry = parse_data(industry)
df_industry

Unnamed: 0,industry_code,industry_name
1,000000,Total Non-Farm
2,000015,Total Private
3,000045,Goods Producing
4,000055,Service Producing
5,000060,*Total Nonmanufacturing
...,...,...
601,939032,Special Districts (Non Educational)
602,939133,Exec. & Legis. Offices Combined
603,940001,Total State And Local Government
604,948202,*State And Local Government Education


In [59]:
data_type = 'https://download.bls.gov/pub/time.series/sa/sa.data_type'
df_data_type = parse_data(data_type)
df_data_type

Unnamed: 0,data_type_code,data_type_text
0,1,"All Employees, In Thousands"
1,2,"Women Workers, In Thousands"
2,3,"Production Workers, In Thousands"
3,4,"Average Weekly Earnings, In Dollars"
4,5,Average Weekly Hours
5,6,"Average Hourly Earnings, In Dollars"
6,7,Average Overtime Hours


### State and County Employment and Wages from Quarterly Census of Employment and Wages

In [27]:
area = 'https://www.bls.gov/cew/classifications/areas/area-titles-csv.csv'
df_area = parse_data(area, csv=True, columns_headers=False, 
                          columns=['area_code', 'area_title', 'state'])
df_area

Unnamed: 0,area_code,area_title,state
0,area_fips,area_title,
1,US000,U.S. TOTAL,
2,USCMS,U.S. Combined Statistical Areas (combined),
3,USMSA,U.S. Metropolitan Statistical Areas (combined),
4,USNMS,U.S. Nonmetropolitan Area Counties (combined),
...,...,...,...
4721,CS554,Wausau-Stevens Point-Wisconsin Rapids,WI CSA
4722,CS556,Wichita-Arkansas City-Winfield,KS CSA
4723,CS558,Williamsport-Lock Haven,PA CSA
4724,CS564,York-Hanover-Gettysburg,PA CSA


In [28]:
data_type = 'https://www.bls.gov/cew/classifications/datatype/datatype-titles-csv.csv'
df_data_type = parse_data(data_type, csv=True)
df_data_type

Unnamed: 0,data_type,title
0,0,(Filler used when multiple datatypes are on th...
1,1,All Employees
2,2,Number of Establishments
3,3,Total Wages (in thousands)
4,4,Average Weekly Wage
5,5,Average Annual Pay
6,6,(Archaic -- formerly used for quarterly averag...


In [29]:
size = 'https://www.bls.gov/cew/classifications/size/size-titles-csv.csv'
df_size = parse_data(size, csv=True)
df_size

Unnamed: 0,size_code,size_title
0,0,All establishment sizes
1,1,Fewer than 5 employees per establishment
2,2,5 to 9 employees per establishment
3,3,10 to 19 employees per establishment
4,4,20 to 49 employees per establishment
5,5,50 to 99 employees per establishment
6,6,100 to 249 employees per establishment
7,7,250 to 499 employees per establishment
8,8,500 to 999 employees per establishment
9,9,1000 or more employees per establishment


In [58]:
ownership = 'https://www.bls.gov/cew/classifications/ownerships/ownership-titles-csv.csv'
df_ownership = parse_data(ownership, csv=True)
df_ownership

Unnamed: 0,own_code,own_title
0,0,Total Covered
1,1,Federal Government
2,2,State Government
3,3,Local Government
4,4,International Government
5,5,Private
6,8,Total Government
7,9,Total U.I. Covered (Excludes Federal Government)


In [57]:
industry = 'https://www.bls.gov/cew/classifications/industry/industry-titles.csv'
df_industry = parse_data(industry, csv=True, contains_comma=True )
df_industry

Unnamed: 0,industry_code,industry_title
0,10,"10 Total, all industries"
1,101,101 Goods-producing
2,1011,1011 Natural resources and mining
3,1012,1012 Construction
4,1013,1013 Manufacturing
...,...,...
2673,99,NAICS 99 Unclassified
2674,999,NAICS 999 Unclassified
2675,9999,NAICS 9999 Unclassified
2676,99999,NAICS 99999 Unclassified
