In [7]:
import pandas as pd
import logging
from pathlib import Path

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def prepare_data(stock_ticker='AAPL', fred_files=None, stock_file=None, lags=3, test_size=0.2):
    """
    Merge stock and FRED data, create lagged features, and split into train/test sets.
    
    Args:
        stock_ticker (str): Stock ticker (e.g., 'AAPL').
        fred_files (list): List of FRED CSV files.
        stock_file (str): Path to stock CSV file.
        lags (int): Number of lag periods for features.
        test_size (float): Proportion of data for test set.
    
    Returns:
        tuple: (X_train, X_test, y_train, y_test, feature_names)
    """
    # Load stock data
    stock_df = pd.read_csv(stock_file)
    stock_df['Date'] = pd.to_datetime(stock_df['Date'])
    stock_df = stock_df[['Date', 'Close', 'Volume']].set_index('Date')

    # Load and merge FRED data
    fred_dfs = []
    for fred_file in fred_files:
        df = pd.read_csv(fred_file)
        df['date'] = pd.to_datetime(df['date'])
        series_id = fred_file.stem  # e.g., '00XALCATM086NEST'
        df = df[['date', series_id]].set_index('date')
        fred_dfs.append(df)
    fred_df = pd.concat(fred_dfs, axis=1)

    # Merge stock and FRED data
    fred_df.reset_index(inplace=True)
    fred_df['date'] = pd.to_datetime(fred_df['date']).dt.to_period('M').dt.to_timestamp()
    fred_df.set_index('date',inplace=True)
    stock_df.reset_index(inplace=True)
    stock_df.drop('Volume', axis=1, inplace=True)
    stock_df['Date'] = pd.to_datetime(stock_df['Date']).dt.to_period('M').dt.to_timestamp()
    stock_df.rename(columns={'Date': 'date'}, inplace=True)
    stock_df.set_index('date', inplace=True)
    data = stock_df.join(fred_df, how='inner').dropna()

    # Create lagged features
    features = []
    feature_names = []
    for col in data.columns:
        for lag in range(1, lags + 1):
            data[f'{col}_lag{lag}'] = data[col].shift(lag)
            feature_names.append(f'{col}_lag{lag}')
    data['target'] = data['Close'].shift(-1)  # Next month's Close price
    data = data.dropna()

    # Split train/test
    train_size = int(len(data) * (1 - test_size))
    train = data.iloc[:train_size]
    test = data.iloc[train_size:]

    X_train = train[feature_names]
    y_train = train['target']
    X_test = test[feature_names]
    y_test = test['target']

    logger.info(f"Prepared data: {X_train.shape[0]} train samples, {X_test.shape[0]} test samples, {len(feature_names)} features")
    return X_train, X_test, y_train, y_test, feature_names

if __name__ == "__main__":
    fred_files = [
        Path("data/fred_economic_indicators/00XALCATM086NEST.csv"),
        Path("data/fred_economic_indicators/00XALCBEM086NEST.csv"),
        Path("data/fred_economic_indicators/00XALCCZM086NEST.csv")
    ]
    stock_file = Path("data/av_monthly/AAPL_monthly.csv")
    X_train, X_test, y_train, y_test, feature_names = prepare_data(fred_files=fred_files, stock_file=stock_file)
    print(f"Features: {feature_names}")


2025-05-29 10:30:01,949 - __main__ - INFO - Prepared data: 240 train samples, 60 test samples, 12 features


Features: ['Close_lag1', 'Close_lag2', 'Close_lag3', '00XALCATM086NEST_lag1', '00XALCATM086NEST_lag2', '00XALCATM086NEST_lag3', '00XALCBEM086NEST_lag1', '00XALCBEM086NEST_lag2', '00XALCBEM086NEST_lag3', '00XALCCZM086NEST_lag1', '00XALCCZM086NEST_lag2', '00XALCCZM086NEST_lag3']


In [8]:
fred_files = [
    Path("data/fred_economic_indicators/00XALCATM086NEST.csv"),
    Path("data/fred_economic_indicators/00XALCBEM086NEST.csv"),
    Path("data/fred_economic_indicators/00XALCCZM086NEST.csv")
]
stock_file = Path("data/av_monthly/AAPL_monthly.csv")
X_train, X_test, y_train, y_test, feature_names = prepare_data(fred_files=fred_files, stock_file=stock_file)
print(f"Features: {feature_names}")

2025-05-29 10:30:09,918 - __main__ - INFO - Prepared data: 240 train samples, 60 test samples, 12 features


Features: ['Close_lag1', 'Close_lag2', 'Close_lag3', '00XALCATM086NEST_lag1', '00XALCATM086NEST_lag2', '00XALCATM086NEST_lag3', '00XALCBEM086NEST_lag1', '00XALCBEM086NEST_lag2', '00XALCBEM086NEST_lag3', '00XALCCZM086NEST_lag1', '00XALCCZM086NEST_lag2', '00XALCCZM086NEST_lag3']


In [11]:
import pandas as pd
    _ = pd.read_csv("C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/heavy-stocks.csv")
    symbols = _['Symbol'].tolist()
    for symbol in symbols:
        print(symbol)

Unnamed: 0,Company,Symbol,Weight
0,Nvidia,NVDA,6.20%
1,Microsoft,MSFT,6.18%
2,Apple Inc.,AAPL,5.42%
3,Amazon,AMZN,3.98%
4,Alphabet Inc. (Class C),GOOG,3.80%
...,...,...,...
498,Invesco,IVZ,0.01%
499,Mohawk Industries,MHK,0.01%
500,Apa Corporation,APA,0.01%
501,Caesars Entertainment,CZR,0.01%


In [13]:
import os
import time
import logging
from pathlib import Path
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
from tenacity import retry, stop_after_attempt, wait_exponential

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


class AlphaVantageStockFetcher:
    def __init__(self, tickers, api_key, output_dir="av_monthly"):
        """
        Args:
            tickers (list[str]): List of ticker symbols.
            api_key (str): Alpha Vantage API key.
            output_dir (str): Directory to save CSV files.
        """
        self.tickers = tickers
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.ts = TimeSeries(key=api_key, output_format='pandas')

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=4))
    def fetch_monthly_data(self, ticker):
        """
        Fetch monthly adjusted stock data from Alpha Vantage.
        """
        logger.info(f"Fetching monthly data for {ticker}")
        data, _ = self.ts.get_monthly_adjusted(symbol=ticker)
        if data.empty:
            raise ValueError(f"No data returned for {ticker}")

        # Reset and rename columns for consistency
        data.reset_index(inplace=True)
        data.rename(columns={
            'date': 'Date',
            '1. open': 'Open',
            '2. high': 'High',
            '3. low': 'Low',
            '4. close': 'Close',
            '5. adjusted close': 'Adj Close',
            '6. volume': 'Volume'
        }, inplace=True)

        # Keep relevant columns
        data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
        data['Date'] = pd.to_datetime(data['Date']).dt.strftime('%Y-%m-%d')
        data = data.sort_values('Date')
        data = data.round(4)
        return data

    def run(self):
        failed = []
        for ticker in self.tickers:
            try:
                df = self.fetch_monthly_data(ticker)
                output_file = self.output_dir / f"{ticker.upper()}_monthly.csv"
                df.to_csv(output_file, index=False)
                logger.info(f"Saved data to {output_file}")
            except Exception as e:
                logger.error(f"Error fetching {ticker}: {e}")
                failed.append(ticker)
            time.sleep(12)  # Alpha Vantage allows 5 requests per minute on free tier

        if failed:
            logger.warning(f"Failed tickers: {failed}")



In [14]:

if __name__ == "__main__":
    # Replace with your own Alpha Vantage API key and desired tickers
    API_KEY = "9HKRTHJGP77FGDQO"
    symbols = ['AAPL', 'MSFT', 'GOOG']
#     _ = pd.read_csv("C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/heavy-stocks.csv")
#     symbols = _['Symbol'].tolist()
#     # for symbol in symbols:
#     #     print(symbol)
    for symbol in symbols:
        TICKERS = [symbol]
        fetcher = AlphaVantageStockFetcher(tickers=TICKERS, api_key=API_KEY)
        fetcher.run()

2025-05-29 13:14:27,780 - __main__ - INFO - Fetching monthly data for AAPL
2025-05-29 13:14:29,372 - __main__ - INFO - Fetching monthly data for AAPL
2025-05-29 13:14:31,802 - __main__ - INFO - Fetching monthly data for AAPL
2025-05-29 13:14:32,192 - __main__ - ERROR - Error fetching AAPL: RetryError[<Future at 0x190b7654090 state=finished raised ValueError>]
2025-05-29 13:14:44,195 - __main__ - INFO - Fetching monthly data for MSFT
2025-05-29 13:14:46,693 - __main__ - INFO - Fetching monthly data for MSFT
2025-05-29 13:14:49,084 - __main__ - INFO - Fetching monthly data for MSFT
2025-05-29 13:14:49,479 - __main__ - ERROR - Error fetching MSFT: RetryError[<Future at 0x190b77c7f10 state=finished raised ValueError>]
2025-05-29 13:15:01,481 - __main__ - INFO - Fetching monthly data for GOOG
2025-05-29 13:15:02,907 - __main__ - INFO - Fetching monthly data for GOOG
2025-05-29 13:15:05,312 - __main__ - INFO - Fetching monthly data for GOOG
2025-05-29 13:15:05,705 - __main__ - ERROR - Error 