In [1]:
import requests
import pandas as pd
import numpy as np
import time

In [2]:
from credentials import fred_api_key

In [3]:
api_key = fred_api_key

In [4]:
def fred(series_id, api_key, date_list):
    """
    Retrieve economic data from the Federal Reserve Economic Data (FRED) API and return it as a DataFrame.

    Parameters:
        series_id (str): The unique identifier of the economic series on FRED.
        api_key (str): Your personal API key to access FRED API. Get it from https://fred.stlouisfed.org/.
        date_list (list): A list of tuples containing start and end dates for data retrieval. 
                          Each tuple should be in the format ('YYYY-MM-DD', 'YYYY-MM-DD').

    Returns:
        pandas.DataFrame: A DataFrame containing the retrieved economic data, indexed by date.

    Raises:
        ValueError: If the API response status code is not 200.
    """
    
    data_frames = []  # List to store individual DataFrames
    for start_date, end_date in date_list:
        params = {
            'series_id': series_id,
            'api_key': api_key,
            'file_type': 'json',
            'limit': 2000,
            'observation_start': start_date,
            'observation_end': end_date
        }
        
        endpoint = 'https://api.stlouisfed.org/fred/series/observations'
        response = requests.get(endpoint, params=params)

        if response.status_code == 200:
            data = response.json()
            # Process the data as needed (you can uncomment the print statement for debugging)
            # print(data)
        else:
            raise ValueError(f"Error: {response.status_code} - {response.text}")
            
        df = pd.DataFrame(data['observations'])
        data_frames.append(df)
        
    if data_frames:
        data = pd.concat(data_frames, axis=0, ignore_index=True)
    else:
        data = pd.DataFrame()
        
    data.drop(columns=['realtime_start', 'realtime_end'], inplace=True)
    data['date'] = pd.to_datetime(data['date'])

    data['value'] = pd.to_numeric(data['value'], errors='coerce')
    data.set_index('date', inplace=True)
    data.rename(columns={'value': f'{series_id}'}, inplace=True)

    return data


In [5]:
# For many of the data series FRED has a daily limit of 2000 rows. Therefore, I split the data into ranges below so that
# collect all the data from a series.
date_list = [
    ('1993-01-01', '1998-12-31'), ('1999-01-01', '2004-12-31'),
    ('2005-01-01', '2010-12-31'), ('2011-01-01', '2016-12-31'),
    ('2017-01-01', '2022-12-31'), ('2023-01-01', '2029-12-31'),
] 

In [6]:
start_time = time.time()

series_id_list = ['DFF', 'GDPC1', 'UNRATE', 'CORESTICKM159SFRBATL', 'MRTSSM44X72USS', 'IRLTLT01USM156N', 'T10Y3M']
variable_names = ['ffr', 'real_gdp', 'unemployment_rate', 'sticky_cpi', 'retail_sales', '10_year_treasury_yeild', '10year_3month_yield_spread']

df_list = [fred(series_id, api_key, date_list) for series_id in series_id_list]

economic_data = pd.concat(df_list, axis=1)
economic_data.rename(columns={k:v for k,v in zip(series_id_list, variable_names)}, inplace=True)

end_time = time.time()
execution_time = end_time - start_time
print("Execution time:", execution_time, "seconds")

Execution time: 10.964179992675781 seconds


In [7]:
economic_data.head()

Unnamed: 0_level_0,ffr,real_gdp,unemployment_rate,sticky_cpi,retail_sales,10_year_treasury_yeild,10year_3month_yield_spread
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1993-01-01,2.66,9857.185,7.3,3.547355,175108.0,6.6,
1993-01-02,2.66,,,,,,
1993-01-03,2.66,,,,,,
1993-01-04,3.6,,,,,,3.41
1993-01-05,3.33,,,,,,3.43


In [8]:
economic_data.to_pickle('../data/fred.pkl')