In [37]:
import requests
import pandas as pd

# Read API key from file
with open("fred_api_key.txt", "r") as file:
    API_KEY = file.read().strip()  

# Define storage dictionary
df_dict = {}

# Define the date range
start_date = "2000-01-01"
end_date = "2025-01-01"

# Dictionary of FRED series IDs
series_ids = {
    'gdp': 'GDPC1',
    'cpi_fah': 'CUSR0000SAF11',
    'unrate': 'UNRATE',
    'grocery_sales': 'RSGCS',
    'home_price': 'CSUSHPINSA',
    'cons_sent': 'UMCSENT',
    'initial_claims': 'ICSA',
    'oil_prices': 'DCOILWTICO',
    'ppi_final_food': 'PPIDFS',
    'ppi_food_feed' : 'WPU02',
    'ppi_fin_cons_food': 'WPSFD4111',
    'ppi_food_mfg' : 'PCU311311',
    'ppi_grocery' : 'PCU445110445110',
    'wages_retail' : 'CES4200000003'
}

# üîÑ Loop through all variables and store them in df_dict
for selected_key, series_id in series_ids.items():
    print(f"üîÑ Fetching data for {selected_key} ({series_id})")

    url = f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={API_KEY}&file_type=json&observation_start={start_date}&observation_end={end_date}"
    response = requests.get(url)
    data = response.json()

    observations = data.get("observations", [])

    if observations:
        df = pd.DataFrame(observations)

        # ‚úÖ Ensure 'date' is retained and formatted
        if "date" not in df.columns:
            print(f"‚ö† WARNING: 'date' column missing in {selected_key}, skipping.")
            continue  # Skip dataset if 'date' is missing

        df["date"] = pd.to_datetime(df["date"])
        df["value"] = pd.to_numeric(df["value"], errors='coerce')

        # ‚úÖ Keep only 'date' and 'value' before renaming
        df = df[["date", "value"]]
        df.rename(columns={"value": selected_key}, inplace=True)

        df_dict[selected_key] = df.copy()  # ‚úÖ Store in dictionary

        # üîç Debugging
        print(f"‚úÖ Stored {selected_key}: {df.shape} rows")

    else:
        print(f"‚ö† No data found for {selected_key}, skipping.")

# ‚úÖ Final Debugging Check
print("\nüìå Final keys in df_dict:", df_dict.keys())

# üîç Check columns after fix
for key, df in df_dict.items():
    print(f"üîç {key} Columns = {df.columns}")


üîÑ Fetching data for gdp (GDPC1)
‚úÖ Stored gdp: (100, 2) rows
üîÑ Fetching data for cpi_fah (CUSR0000SAF11)
‚úÖ Stored cpi_fah: (301, 2) rows
üîÑ Fetching data for unrate (UNRATE)
‚úÖ Stored unrate: (301, 2) rows
üîÑ Fetching data for grocery_sales (RSGCS)
‚úÖ Stored grocery_sales: (301, 2) rows
üîÑ Fetching data for home_price (CSUSHPINSA)
‚úÖ Stored home_price: (300, 2) rows
üîÑ Fetching data for cons_sent (UMCSENT)
‚úÖ Stored cons_sent: (301, 2) rows
üîÑ Fetching data for initial_claims (ICSA)
‚úÖ Stored initial_claims: (1305, 2) rows
üîÑ Fetching data for oil_prices (DCOILWTICO)
‚úÖ Stored oil_prices: (6523, 2) rows
üîÑ Fetching data for ppi_final_food (PPIDFS)
‚úÖ Stored ppi_final_food: (183, 2) rows
üîÑ Fetching data for ppi_food_feed (WPU02)
‚úÖ Stored ppi_food_feed: (301, 2) rows
üîÑ Fetching data for ppi_fin_cons_food (WPSFD4111)
‚úÖ Stored ppi_fin_cons_food: (301, 2) rows
üîÑ Fetching data for ppi_food_mfg (PCU311311)
‚úÖ Stored ppi_food_mfg: (301, 2) rows
üîÑ 

In [38]:
# Check which DataFrames do NOT have 'date' as a column
for key, df in df_dict.items():
    print(f"üîç Checking {key}: Columns = {df.columns}")


üîç Checking gdp: Columns = Index(['date', 'gdp'], dtype='object')
üîç Checking cpi_fah: Columns = Index(['date', 'cpi_fah'], dtype='object')
üîç Checking unrate: Columns = Index(['date', 'unrate'], dtype='object')
üîç Checking grocery_sales: Columns = Index(['date', 'grocery_sales'], dtype='object')
üîç Checking home_price: Columns = Index(['date', 'home_price'], dtype='object')
üîç Checking cons_sent: Columns = Index(['date', 'cons_sent'], dtype='object')
üîç Checking initial_claims: Columns = Index(['date', 'initial_claims'], dtype='object')
üîç Checking oil_prices: Columns = Index(['date', 'oil_prices'], dtype='object')
üîç Checking ppi_final_food: Columns = Index(['date', 'ppi_final_food'], dtype='object')
üîç Checking ppi_food_feed: Columns = Index(['date', 'ppi_food_feed'], dtype='object')
üîç Checking ppi_fin_cons_food: Columns = Index(['date', 'ppi_fin_cons_food'], dtype='object')
üîç Checking ppi_food_mfg: Columns = Index(['date', 'ppi_food_mfg'], dtype='object')


In [39]:
# Define frequency categories
daily_series = ['oil_prices']
weekly_series = ['initial_claims']
quarterly_series = ['gdp']
annual_series = []  # If any annual series are added later

# ‚úÖ Resample all datasets to Monthly (Month End = "ME")
for key, df in df_dict.items():
    df = df.copy()  # Ensure no modification to the original
    df.set_index("date", inplace=True)

    # Sort to ensure chronological order
    df.sort_index(inplace=True)

    # ‚úÖ Apply correct resampling method
    if key in daily_series:
        df = df.resample("ME").mean()  # Daily ‚Üí Monthly (average)
    elif key in weekly_series:
        df = df.resample("ME").sum()  # Weekly ‚Üí Monthly (sum)
    elif key in quarterly_series:
        df = df.resample("ME").ffill()  # Quarterly ‚Üí Monthly (forward fill)
    elif key in annual_series:
        df = df.resample("ME").ffill()  # Annual ‚Üí Monthly (forward fill)
    else:
        df = df.resample("ME").ffill()  # Default forward-fill for monthly data

    # ‚úÖ Force frequency assignment
    df = df.asfreq("ME")  # Ensure index is consistently Monthly-End

    df_dict[key] = df  # Store back in dictionary

    # ‚úÖ Debugging: Check resampling worked
    print(f"‚úÖ Resampled {key}: {df.index.freq} | Shape: {df.shape}")




‚úÖ Resampled gdp: <MonthEnd> | Shape: (298, 1)
‚úÖ Resampled cpi_fah: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled unrate: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled grocery_sales: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled home_price: <MonthEnd> | Shape: (300, 1)
‚úÖ Resampled cons_sent: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled initial_claims: <MonthEnd> | Shape: (300, 1)
‚úÖ Resampled oil_prices: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled ppi_final_food: <MonthEnd> | Shape: (183, 1)
‚úÖ Resampled ppi_food_feed: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled ppi_fin_cons_food: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled ppi_food_mfg: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled ppi_grocery: <MonthEnd> | Shape: (301, 1)
‚úÖ Resampled wages_retail: <MonthEnd> | Shape: (227, 1)


In [40]:
for key, df in df_dict.items():
    print(f"üìå {key}: {df.index.freq} | Shape: {df.shape}")

üìå gdp: <MonthEnd> | Shape: (298, 1)
üìå cpi_fah: <MonthEnd> | Shape: (301, 1)
üìå unrate: <MonthEnd> | Shape: (301, 1)
üìå grocery_sales: <MonthEnd> | Shape: (301, 1)
üìå home_price: <MonthEnd> | Shape: (300, 1)
üìå cons_sent: <MonthEnd> | Shape: (301, 1)
üìå initial_claims: <MonthEnd> | Shape: (300, 1)
üìå oil_prices: <MonthEnd> | Shape: (301, 1)
üìå ppi_final_food: <MonthEnd> | Shape: (183, 1)
üìå ppi_food_feed: <MonthEnd> | Shape: (301, 1)
üìå ppi_fin_cons_food: <MonthEnd> | Shape: (301, 1)
üìå ppi_food_mfg: <MonthEnd> | Shape: (301, 1)
üìå ppi_grocery: <MonthEnd> | Shape: (301, 1)
üìå wages_retail: <MonthEnd> | Shape: (227, 1)


In [41]:
# Start with the first DataFrame in df_dict
final_df = list(df_dict.values())[0]

# ‚úÖ Merge all datasets on 'date' using 'outer' join
for key in list(df_dict.keys())[1:]:
    final_df = final_df.merge(df_dict[key], on="date", how="outer")

# ‚úÖ Final Debugging
print("üìå Final DataFrame Shape:", final_df.shape)
print("üìå Final DataFrame Index Frequency:", final_df.index.freq)

# ‚úÖ Save to CSV (optional)
final_df.to_csv("fred_data_resampled.csv", index=True)

# ‚úÖ Preview results
print(final_df.head())


üìå Final DataFrame Shape: (301, 14)
üìå Final DataFrame Index Frequency: None
                  gdp  cpi_fah  unrate  grocery_sales  home_price  cons_sent  \
date                                                                           
2000-01-31  13878.147    165.3     4.0        32239.0     100.000      112.0   
2000-02-29  13878.147    166.1     4.1        32567.0     100.571      111.3   
2000-03-31  13878.147    166.4     4.0        33117.0     101.466      107.1   
2000-04-30  14130.908    166.6     3.8        33541.0     102.541      109.2   
2000-05-31  14130.908    167.4     4.0        33221.0     103.702      110.7   

            initial_claims  oil_prices  ppi_final_food  ppi_food_feed  \
date                                                                    
2000-01-31       1442000.0   27.259474             NaN          131.0   
2000-02-29       1175000.0   29.366000             NaN          131.7   
2000-03-31       1099000.0   29.841739             NaN          13

In [42]:
# Count total missing values per column
missing_counts = final_df.isna().sum()

# Show only columns with missing values
missing_counts = missing_counts[missing_counts > 0]

missing_counts


gdp                 3
home_price          1
initial_claims      1
oil_prices          1
ppi_final_food    118
wages_retail       74
dtype: int64

In [43]:
final_df

Unnamed: 0_level_0,gdp,cpi_fah,unrate,grocery_sales,home_price,cons_sent,initial_claims,oil_prices,ppi_final_food,ppi_food_feed,ppi_fin_cons_food,ppi_food_mfg,ppi_grocery,wages_retail
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,13878.147,165.300,4.0,32239.0,100.000,112.0,1442000.0,27.259474,,131.000,135.500,126.700,101.600,
2000-02-29,13878.147,166.100,4.1,32567.0,100.571,111.3,1175000.0,29.366000,,131.700,136.400,127.200,106.700,
2000-03-31,13878.147,166.400,4.0,33117.0,101.466,107.1,1099000.0,29.841739,,132.100,136.400,127.400,106.200,
2000-04-30,14130.908,166.600,3.8,33541.0,102.541,109.2,1358000.0,25.722105,,133.200,137.500,128.100,106.300,
2000-05-31,14130.908,167.400,4.0,33221.0,103.702,110.7,1129000.0,28.788182,,134.300,138.000,129.300,103.600,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-30,23400.294,307.201,4.1,75140.0,324.779,70.1,897000.0,70.236000,154.010,267.145,277.075,261.715,275.649,24.59
2024-10-31,23536.293,307.798,4.1,75403.0,324.211,70.5,948000.0,71.985000,153.114,266.152,274.644,260.642,278.565,24.73
2024-11-30,,308.881,4.2,75235.0,323.690,71.8,1095000.0,69.950000,156.339,267.806,281.448,262.165,278.043,24.93
2024-12-31,,309.754,4.1,76036.0,323.219,74.0,893000.0,70.118095,156.930,267.890,282.621,262.290,276.210,24.88
