# Economic Indicators Data Collection

## Setup and Imports

In [1]:
# Import essential libraries for data handling, numerical operations, 
# and web data retrieval
import pandas as pd
import numpy as np
from datetime import datetime
import pandas_datareader as web
import yfinance as yf

## Set random seeds for reproducibility
Establish random seeds to ensure consistent and reproducible results across executions

In [2]:
# Set random seeds for reproducibility
np.random.seed(42)

## Define date range for the economic data collection
Specify the temporal scope for collecting historical and current economic data

In [3]:
# Define date range for the data collection
start_date = datetime(1990, 1, 1)
end_date = datetime(2025, 4, 9)  # Recent date

## Fetch Economic Indicators Data from FRED
In this section, key economic indicators are fetched from FRED database and saved to CSV. A comprehensive set of macroeconomic indicators are included in the data collection.

In [4]:
def fetch_economic_data():
    """
    Fetches economic indicators from FRED and S&P 500 from Yahoo Finance, then saves to CSV.
    """
    indicators = {
        'UNRATE': 'unemployment',                     # Unemployment Rate
        'T10Y2Y': 'yield_spread',                     # 10-Year minus 2-Year Yield Spread
        'INDPRO': 'industrial_prod',                  # Industrial Production Index
        'UMCSENT': 'consumer_conf',                   # Consumer Sentiment Index
        'USSLIND': 'LEI',                             # Leading Economic Index
        'CPIAUCSL': 'CPI',                            # Consumer Price Index
        'GDPC1': 'GDP_Growth',                        # Real GDP (quarterly, will interpolate)
        'RECPROUSM156N': 'recession_probabilities',   # Recession Probabilities
        'FEDFUNDS': 'fed_funds_rate',                 # Federal Funds Effective Rate
        'TWEXB': 'currency_strength',                 # Trade Weighted U.S. Dollar Index
        'HOUST': 'housing_starts',                    # Housing Starts
        'PCEPI': 'personal_consumption_expenses',     # PCE Price Index
        'PPIACO': 'PPI'                               # Producer Price Index
    }

    print(f"Fetching {len(indicators)} economic indicators from FRED...")

    # Fetch macro indicators from FRED
    fred_data = web.DataReader(list(indicators.keys()), 'fred', start_date, end_date)
    fred_data.columns = [indicators[k] for k in indicators.keys()]

    # Interpolate quarterly GDP to daily
    fred_data['GDP_Growth'] = fred_data['GDP_Growth'].interpolate(method='linear')

    # Fetch S&P 500 from Yahoo Finance
    print("Fetching S&P 500 data from Yahoo Finance...")
    sp500 = yf.download('^GSPC', start=start_date, end=end_date, group_by='column')
    sp500.index = sp500.index.normalize()  # Align timestamp to date only (remove time)
    sp500.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in sp500.columns.values]
    sp500 = sp500[['Close_^GSPC']].rename(columns={'Close_^GSPC': 'SP500'})
    


    # Merge on date
    combined = fred_data.join(sp500, how='left')

    # Forward-fill and clean
    combined = combined.ffill().dropna(how='all')

    # Save to CSV
    combined.index.name = 'DATE'
    combined.to_csv('data/economic_indicators.csv')

    print(f"Data collected from {combined.index.min()} to {combined.index.max()}")
    print("Data saved to 'data/economic_indicators.csv'")

    return combined

## Main Execution Function
This section defines the `main()` function, which orchestrates the entire data collection workflow. It handles Reddit API authentication and data collection from specified subreddits.

In [5]:
def main():
    # Call the fetch function to download and save data
    fetch_economic_data()

    # Now load the newly saved CSV
    economic_indicators = pd.read_csv('./data/economic_indicators.csv', 
                                      index_col='DATE', 
                                      parse_dates=True)

    # Display basic information
    print("\nDataset shape:", economic_indicators.shape)
    print("\nFirst 5 rows:")
    print(economic_indicators.head())

if __name__ == "__main__":
    main()


Fetching 13 economic indicators from FRED...
Fetching S&P 500 data from Yahoo Finance...
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

Data collected from 1990-01-01 00:00:00 to 2025-04-08 00:00:00
Data saved to 'data/economic_indicators.csv'

Dataset shape: (9323, 14)

First 5 rows:
            unemployment  yield_spread  industrial_prod  consumer_conf   LEI  \
DATE                                                                           
1990-01-01           5.4           NaN          61.6352           93.0  1.71   
1990-01-02           5.4          0.07          61.6352           93.0  1.71   
1990-01-03           5.4          0.05          61.6352           93.0  1.71   
1990-01-04           5.4          0.06          61.6352           93.0  1.71   
1990-01-05           5.4          0.09          61.6352           93.0  1.71   

              CPI    GDP_Growth  recession_probabilities  fed_funds_rate  \
DATE                                                                       
1990-01-01  127.5  10047.386000                     0.18            8.23   
1990-01-02  127.5  10047.947062                     0.18     


