API limitations:
 - 50 series allowed per pull
 - 20 years allowed per pull
 - approx 5000 PPI series with some series having data starting in the 1960s means approx 300 pulls required - 100 for all series, * 3 for 20 year selections out to the 60s
 - daily limit of 500 pulls means this can only be ran once a day

In [1]:
import requests
import json
import pandas as pd
from bls_definitions import bls_ppi_codes
import datetime
import os

In [2]:
key = os.environ['BLS_API_KEY']
url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {'Content-type' : 'application/json'}

# Looping all codes in sets of 50 over three 20 year periods

In [3]:
# create date ranges to loop on
endyear = datetime.datetime.now().year

date_ranges = {0 : (endyear-19, endyear),
               1 : (endyear-39, endyear-20),
               2 : (endyear-59, endyear-40)}

In [4]:
bls_ppi_code_segments = [list(bls_ppi_codes.keys())[x:x+50]for x in range(0, len(list(bls_ppi_codes.keys())), 50)]

In [5]:
master_dfs = {}
master_missing_series = {}
segment_no = 0

# loop over sets of 50 codes
for i in bls_ppi_code_segments:
    dfs_diff_periods = {}
    missing_series = {}
    for j in date_ranges:
        # create variatble selection dictionary
        selection_dict = {"seriesid": i, "startyear":date_ranges[j][0], "endyear":date_ranges[j][1], "registrationkey" : key}
        selection = json.dumps(selection_dict)

        # send request to BLS
        p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=selection, headers=headers)
        p = p.json()['Results']['series']

        # create dictionaries to hold values and index
        data = {}
        index = {}

        # save time series and index for each variable
        for k in p:
            data[k['seriesID']] = pd.Series([l['value'] for l in k['data']]).astype(float).iloc[::-1].values
            index[k['seriesID']] = [f"{l['year']}-{l['period'][1:]}-01" for l in k['data']][::-1]

        # identify where codes are missing in each period window
        missing_codes = []
        for k in p:
            if len(k['data']) == 0:
                missing_codes.append(k['seriesID'])
        missing_series[j] = missing_codes

        # convert series into dataframes and then combine into one dataframe for the period window
        dfs = {}
        for k in data.keys():
            dfs[k] = pd.DataFrame(data = data[k], index = pd.to_datetime(index[k]))

        final = pd.concat(dfs,axis=1)
        final.columns = final.columns.get_level_values(0)

        dfs_diff_periods[j] = final
    
    # combine dfs of series into one dataframe
    segment_df = pd.concat(dfs_diff_periods)
    segment_df.index = segment_df.index.droplevel(0)
    segment_df = segment_df.sort_index()

    # export segment df to a parquet file format for reduced file size storage
    # once API is run the dat is then saved - no need to rerun every time
    segment_df.to_parquet('C:\\DSWG_PPI\\api_pulls\\'+str(segment_no)+'api.gzip', compression='gzip')
    
    # save segment df to master dictionary
    master_dfs[segment_no] = segment_df

    # identify codes with no values across all periods
    missing_sets = {}
    for j in missing_series.keys():
        missing_sets[j] = set(missing_series[j])

    variables_no_data = missing_sets[0].intersection(missing_sets[1]).intersection(missing_sets[2])

    master_missing_series[segment_no] = variables_no_data

    print(segment_no)
    segment_no += 1
    

  data[k['seriesID']] = pd.Series([l['value'] for l in k['data']]).astype(float).iloc[::-1].values


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [6]:
p

[{'seriesID': 'PCU7211207211202', 'data': []},
 {'seriesID': 'PCU721120721120201', 'data': []},
 {'seriesID': 'PCU721120721120202', 'data': []},
 {'seriesID': 'PCU7211207211205', 'data': []},
 {'seriesID': 'PCU7211207211206', 'data': []},
 {'seriesID': 'PCU721120721120P', 'data': []},
 {'seriesID': 'PCU721120721120SM1', 'data': []},
 {'seriesID': 'PCU8113--8113--', 'data': []},
 {'seriesID': 'PCU81131-81131-', 'data': []},
 {'seriesID': 'PCU811310811310', 'data': []},
 {'seriesID': 'PCU8113108113105', 'data': []},
 {'seriesID': 'PCU8113108113106', 'data': []},
 {'seriesID': 'PCU8113108113107', 'data': []},
 {'seriesID': 'PCU8113108113108', 'data': []},
 {'seriesID': 'PCU811310811310P', 'data': []},
 {'seriesID': 'PCU811310811310SM', 'data': []},
 {'seriesID': 'PCU924126924126', 'data': []},
 {'seriesID': 'PCU9241269241261', 'data': []},
 {'seriesID': 'PCU9241269241262', 'data': []},
 {'seriesID': 'PCU9241269241263', 'data': []},
 {'seriesID': 'PCU9241269241264', 'data': []},
 {'seriesI

In [15]:
date_ranges[0]

(2004, 2023)

In [13]:
selection_dict = {"seriesid": ['PCU3391143391143'], 'startyear' : '2015', 'endyear' : '2023', "registrationkey" : key}
selection = json.dumps(selection_dict)

# send request to BLS
p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=selection, headers=headers)
p = p.json()['Results']['series']

In [14]:
headers

{'Content-type': 'application/json'}

In [15]:
selection

'{"seriesid": ["PCU3391143391143"], "startyear": "2015", "endyear": "2023", "registrationkey": "9648e84bbf0f4c38b01689279269a05e"}'

In [16]:
p

[{'seriesID': 'PCU3391143391143',
  'data': [{'year': '2019',
    'period': 'M02',
    'periodName': 'February',
    'latest': 'true',
    'value': '506.7',
    'footnotes': [{}]},
   {'year': '2019',
    'period': 'M01',
    'periodName': 'January',
    'value': '490.3',
    'footnotes': [{}]},
   {'year': '2018',
    'period': 'M12',
    'periodName': 'December',
    'value': '469.0',
    'footnotes': [{}]},
   {'year': '2018',
    'period': 'M11',
    'periodName': 'November',
    'value': '435.4',
    'footnotes': [{}]},
   {'year': '2018',
    'period': 'M10',
    'periodName': 'October',
    'value': '426.7',
    'footnotes': [{}]},
   {'year': '2018',
    'period': 'M09',
    'periodName': 'September',
    'value': '402.9',
    'footnotes': [{}]},
   {'year': '2018',
    'period': 'M08',
    'periodName': 'August',
    'value': '386.1',
    'footnotes': [{}]},
   {'year': '2018',
    'period': 'M07',
    'periodName': 'July',
    'value': '400.8',
    'footnotes': [{}]},
   {'ye