In [1]:
import requests
import pandas as pd

# Replace with your API key
API_KEY = 

In [2]:
series_ids = {
    'cpi_fah': 'CUSR0000SAF11',                    # Monthly index (1982-1984=100)
    'grocery_sales': 'RSGCS',                      # Monthly millions of dollars
    'rdi': 'DSPIC96',                              # Monthly dollars Chained 2017
    'home_price': 'CSUSHPINSA',                    # Monthly index (Jan 2000 = 100)
    'cons_sent': 'UMCSENT',                        # Monthly index (1966 Q1 = 100)
    'unrate': 'UNRATE',                            # Monthly seasonally adjusted percent
    'initial_claims': 'ICSA',                      # Weekly claims number - sum into months
    'gdp': 'GDPC1',                                # Quarterly dollars Chained 2017
    'oil_prices': 'DCOILWTICO',                    # Daily oil prices - West Texas Intermediate - avg into months
    'ppi_food_feed':'WPU02',                       # Monthly index (1982=100)
    'ppi_food_final':'PPIDFS',                     # Monthly index (1982=100)
    'hourly_wages':'CES0500000003',                # Monthly dollars per hour
    'ppi_food_mfg':'PCU311311',                    # Monthly index (Dec 1984=100) Not seasonally adjusted
    'ppi_cons_food':'WPSFD4111'                    # Monthly index (1982=100)
}

In [3]:
# Define storage dictionary
df_dict = {}

# Define the date range
start_date = "2000-01-01"
end_date = "2025-01-01"

for selected_key, series_id in series_ids.items():
    print(f"🔄 Fetching data for {selected_key} ({series_id})")

    url = f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={API_KEY}&file_type=json&observation_start={start_date}&observation_end={end_date}"
    response = requests.get(url)
    data = response.json()

    observations = data.get("observations", [])

    if observations:
        df = pd.DataFrame(observations)
        df["date"] = pd.to_datetime(df["date"])
        df["value"] = pd.to_numeric(df["value"], errors='coerce')
        df.rename(columns={"value": selected_key}, inplace=True)
        df.drop(columns=['realtime_start', 'realtime_end'], inplace=True, errors='ignore')

        # 🔴 Debugging: Print before storing
        print(f"📌 Before storing: df_dict keys = {df_dict.keys()}")

        df_dict[selected_key] = df  # ✅ Store in dictionary

        # 🔴 Debugging: Print after storing
        print(f"✅ After storing {selected_key}: df_dict keys = {df_dict.keys()}")

    else:
        print(f"⚠ No data found for {selected_key}, skipping.")

print("\n📌 Final keys in df_dict:", df_dict.keys())  # ✅ Should list all variable names


🔄 Fetching data for cpi_fah (CUSR0000SAF11)
📌 Before storing: df_dict keys = dict_keys([])
✅ After storing cpi_fah: df_dict keys = dict_keys(['cpi_fah'])
🔄 Fetching data for grocery_sales (RSGCS)
📌 Before storing: df_dict keys = dict_keys(['cpi_fah'])
✅ After storing grocery_sales: df_dict keys = dict_keys(['cpi_fah', 'grocery_sales'])
🔄 Fetching data for rdi (DSPIC96)
📌 Before storing: df_dict keys = dict_keys(['cpi_fah', 'grocery_sales'])
✅ After storing rdi: df_dict keys = dict_keys(['cpi_fah', 'grocery_sales', 'rdi'])
🔄 Fetching data for home_price (CSUSHPINSA)
📌 Before storing: df_dict keys = dict_keys(['cpi_fah', 'grocery_sales', 'rdi'])
✅ After storing home_price: df_dict keys = dict_keys(['cpi_fah', 'grocery_sales', 'rdi', 'home_price'])
🔄 Fetching data for cons_sent (UMCSENT)
📌 Before storing: df_dict keys = dict_keys(['cpi_fah', 'grocery_sales', 'rdi', 'home_price'])
✅ After storing cons_sent: df_dict keys = dict_keys(['cpi_fah', 'grocery_sales', 'rdi', 'home_price', 'cons_se

In [4]:
print("📌 Keys in df_dict:", df_dict.keys())  # Should list all variables
for key, df in df_dict.items():
    print(f"🔍 {key}: Shape={df.shape}, Columns={df.columns.tolist()}")

📌 Keys in df_dict: dict_keys(['cpi_fah', 'grocery_sales', 'rdi', 'home_price', 'cons_sent', 'unrate', 'initial_claims', 'gdp', 'oil_prices', 'ppi_food_feed', 'ppi_food_final', 'hourly_wages', 'ppi_food_mfg', 'ppi_cons_food'])
🔍 cpi_fah: Shape=(301, 2), Columns=['date', 'cpi_fah']
🔍 grocery_sales: Shape=(301, 2), Columns=['date', 'grocery_sales']
🔍 rdi: Shape=(301, 2), Columns=['date', 'rdi']
🔍 home_price: Shape=(300, 2), Columns=['date', 'home_price']
🔍 cons_sent: Shape=(301, 2), Columns=['date', 'cons_sent']
🔍 unrate: Shape=(301, 2), Columns=['date', 'unrate']
🔍 initial_claims: Shape=(1305, 2), Columns=['date', 'initial_claims']
🔍 gdp: Shape=(100, 2), Columns=['date', 'gdp']
🔍 oil_prices: Shape=(6523, 2), Columns=['date', 'oil_prices']
🔍 ppi_food_feed: Shape=(301, 2), Columns=['date', 'ppi_food_feed']
🔍 ppi_food_final: Shape=(183, 2), Columns=['date', 'ppi_food_final']
🔍 hourly_wages: Shape=(227, 2), Columns=['date', 'hourly_wages']
🔍 ppi_food_mfg: Shape=(301, 2), Columns=['date', 'pp

In [5]:
# Ensure all datasets are resampled to Monthly (ME)
for key, df in df_dict.items():
    df.set_index("date", inplace=True)  # Ensure 'date' is the index
    df = df.resample("ME").ffill()  # Convert to monthly, filling forward
    df_dict[key] = df  # Store back in dictionary


In [6]:
# Start with the first DataFrame in the dictionary
final_df = list(df_dict.values())[0]  # Take the first dataset as the base

# Merge the rest
for key in list(df_dict.keys())[1:]:
    final_df = final_df.merge(df_dict[key], on="date", how="outer")

final_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 301 entries, 2000-01-31 to 2025-01-31
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   cpi_fah         301 non-null    float64
 1   grocery_sales   301 non-null    float64
 2   rdi             301 non-null    float64
 3   home_price      300 non-null    float64
 4   cons_sent       301 non-null    float64
 5   unrate          301 non-null    float64
 6   initial_claims  300 non-null    float64
 7   gdp             298 non-null    float64
 8   oil_prices      289 non-null    float64
 9   ppi_food_feed   301 non-null    float64
 10  ppi_food_final  183 non-null    float64
 11  hourly_wages    227 non-null    float64
 12  ppi_food_mfg    301 non-null    float64
 13  ppi_cons_food   301 non-null    float64
dtypes: float64(14)
memory usage: 35.3 KB


In [7]:
final_df

Unnamed: 0_level_0,cpi_fah,grocery_sales,rdi,home_price,cons_sent,unrate,initial_claims,gdp,oil_prices,ppi_food_feed,ppi_food_final,hourly_wages,ppi_food_mfg,ppi_cons_food
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,165.300,32239.0,9799.9,100.000,112.0,4.0,285000.0,13878.147,27.65,131.000,,,126.700,135.500
2000-02-29,166.100,32567.0,9837.9,100.571,111.3,4.1,280000.0,13878.147,30.57,131.700,,,127.200,136.400
2000-03-31,166.400,33117.0,9864.0,101.466,107.1,4.0,272000.0,13878.147,26.86,132.100,,,127.400,136.400
2000-04-30,166.600,33541.0,9913.7,102.541,109.2,3.8,291000.0,14130.908,25.71,133.200,,,128.100,137.500
2000-05-31,167.400,33221.0,9954.5,103.702,110.7,4.0,280000.0,14130.908,29.03,134.300,,,129.300,138.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-30,307.201,75140.0,17519.6,324.779,70.1,4.1,225000.0,23400.294,68.75,267.145,154.010,35.33,261.715,277.075
2024-10-31,307.798,75403.0,17584.7,324.211,70.5,4.1,218000.0,23536.293,69.58,266.152,153.114,35.48,260.642,274.644
2024-11-30,308.881,75235.0,17618.8,323.690,71.8,4.2,225000.0,,68.26,267.806,156.339,35.61,262.165,281.448
2024-12-31,309.754,76036.0,17646.1,323.219,74.0,4.1,211000.0,,72.44,267.890,156.930,35.70,262.290,282.621


In [8]:
final_df = final_df.ffill()
final_df = final_df.bfill()

In [9]:
final_df

Unnamed: 0_level_0,cpi_fah,grocery_sales,rdi,home_price,cons_sent,unrate,initial_claims,gdp,oil_prices,ppi_food_feed,ppi_food_final,hourly_wages,ppi_food_mfg,ppi_cons_food
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,165.300,32239.0,9799.9,100.000,112.0,4.0,285000.0,13878.147,27.65,131.000,100.300,20.05,126.700,135.500
2000-02-29,166.100,32567.0,9837.9,100.571,111.3,4.1,280000.0,13878.147,30.57,131.700,100.300,20.05,127.200,136.400
2000-03-31,166.400,33117.0,9864.0,101.466,107.1,4.0,272000.0,13878.147,26.86,132.100,100.300,20.05,127.400,136.400
2000-04-30,166.600,33541.0,9913.7,102.541,109.2,3.8,291000.0,14130.908,25.71,133.200,100.300,20.05,128.100,137.500
2000-05-31,167.400,33221.0,9954.5,103.702,110.7,4.0,280000.0,14130.908,29.03,134.300,100.300,20.05,129.300,138.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-30,307.201,75140.0,17519.6,324.779,70.1,4.1,225000.0,23400.294,68.75,267.145,154.010,35.33,261.715,277.075
2024-10-31,307.798,75403.0,17584.7,324.211,70.5,4.1,218000.0,23536.293,69.58,266.152,153.114,35.48,260.642,274.644
2024-11-30,308.881,75235.0,17618.8,323.690,71.8,4.2,225000.0,23536.293,68.26,267.806,156.339,35.61,262.165,281.448
2024-12-31,309.754,76036.0,17646.1,323.219,74.0,4.1,211000.0,23536.293,72.44,267.890,156.930,35.70,262.290,282.621


In [10]:
final_df.to_csv('grocery_dataset.csv', index = True)