# Economic Indicators Data Collection

## Setup and Imports

In [1]:
# Import essential libraries for data handling, numerical operations, 
# and web data retrieval
import pandas as pd
import numpy as np
from datetime import datetime
import pandas_datareader as web

## Set random seeds for reproducibility
Establish random seeds to ensure consistent and reproducible results across executions

In [2]:
# Set random seeds for reproducibility
np.random.seed(42)

## Define date range for the economic data collection
Specify the temporal scope for collecting historical and current economic data

In [3]:
# Define date range for the data collection
start_date = datetime(1990, 1, 1)
end_date = datetime(2025, 4, 1)  # Current date

## Fetch Economic Indicators Data from FRED
In this section, key economic indicators are fetched from FRED database and saved to CSV. A comprehensive set of macroeconomic indicators are included in the data collection.

In [4]:
def fetch_economic_data():
    """
    Fetches economic indicators from FRED database and saves to CSV
    """
    indicators = {
        'UNRATE': 'unemployment',                     # Unemployment Rate
        'T10Y2Y': 'yield_spread',                     # 10-Year minus 2-Year Yield Spread
        'INDPRO': 'industrial_prod',                  # Industrial Production Index
        'UMCSENT': 'consumer_conf',                   # Consumer Sentiment Index
        'SP500': 'SP500',                             # S&P 500 Index
        'USSLIND': 'LEI',                             # Leading Economic Index
        'CPIAUCSL': 'CPI',                            # Consumer Price Index
        'GDPC1': 'GDP_Growth',                        # Real GDP (quarterly, will interpolate)
        'RECPROUSM156N': 'recession_probabilities',   # Recession Probabilities
        'FEDFUNDS': 'fed_funds_rate',                 # Federal Funds Effective Rate
        'TWEXB': 'currency_strength',                 # Trade Weighted U.S. Dollar Index
        'HOUST': 'housing_starts',                    # Housing Starts
        'PCEPI': 'personal_consumption_expenses'      # PCE Price Index
    }
    
    print(f"Fetching {len(indicators)} economic indicators from FRED...")
    
    # Fetch data from FRED
    data = web.DataReader(list(indicators.keys()), 'fred', start_date, end_date)
    
    # Rename columns for clarity
    data.columns = [indicators[code] for code in indicators.keys()]
    
    # Convert quarterly GDP to monthly by interpolation
    data['GDP_Growth'] = data['GDP_Growth'].interpolate(method='linear')
    
    # Forward fill missing values and only drop rows where ALL values are NaN
    data = data.ffill().dropna(how='all')
    
    # Save to CSV
    data.to_csv('data/economic_indicators.csv')
    
    print(f"Data collected from {data. index.min()} to {data.index.max()}")
    print("Data saved to 'data/economic_indicators.csv'")
    
    return data

## Main Execution Function
This section defines the `main()` function, which orchestrates the entire data collection workflow. It handles Reddit API authentication and data collection from specified subreddits.

In [5]:
def main():
    # Fetch data and set 'DATE' as index
    economic_indicators = pd.read_csv('./data/economic_indicators.csv', 
                                    index_col='DATE', 
                                    parse_dates=True)

    # Display basic information
    print("\nDataset shape:", economic_indicators.shape)
    print("\nFirst 5 rows:")
    print(economic_indicators.head())

if __name__ == "__main__":
    main()


Dataset shape: (9318, 13)

First 5 rows:
            unemployment  yield_spread  industrial_prod  consumer_conf  SP500  \
DATE                                                                            
1990-01-01           5.4           NaN          61.6352           93.0    NaN   
1990-01-02           5.4          0.07          61.6352           93.0    NaN   
1990-01-03           5.4          0.05          61.6352           93.0    NaN   
1990-01-04           5.4          0.06          61.6352           93.0    NaN   
1990-01-05           5.4          0.09          61.6352           93.0    NaN   

             LEI    CPI    GDP_Growth  recession_probabilities  \
DATE                                                             
1990-01-01  1.71  127.5  10047.386000                      0.2   
1990-01-02  1.71  127.5  10047.947062                      0.2   
1990-01-03  1.71  127.5  10048.508123                      0.2   
1990-01-04  1.71  127.5  10049.069185                      0