In [1]:
import os
import time
import logging
from pathlib import Path
import pandas as pd
import yfinance as yf
from tenacity import retry, stop_after_attempt, wait_exponential

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class YFinanceStockFetcher:
    def __init__(self, tickers, output_dir="yf_monthly"):
        """
        Args:
            tickers (list[str]): List of ticker symbols.
            output_dir (str): Directory to save CSV files.
        """
        self.tickers = tickers
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=4))
    def fetch_monthly_data(self, ticker):
        """
        Fetch monthly adjusted stock data from yfinance.
        """
        logger.info(f"Fetching monthly data for {ticker}")
        # Use yfinance download with monthly interval
        data = yf.download(ticker, period="max", interval="1mo", auto_adjust=True, progress=False)
        if data.empty:
            raise ValueError(f"No data returned for {ticker}")

        # Reset and rename columns for consistency with Alpha Vantage
        data.reset_index(inplace=True)
        data.rename(columns={
            'Date': 'Date',
            'Open': 'Open',
            'High': 'High',
            'Low': 'Low',
            'Close': 'Close',
            'Volume': 'Volume'
        }, inplace=True)
        # Add 'Adj Close' (already adjusted due to auto_adjust=True)
        data['Adj Close'] = data['Close']

        # Keep relevant columns
        data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
        data['Date'] = pd.to_datetime(data['Date']).dt.strftime('%Y-%m-%d')
        data = data.sort_values('Date')
        data = data.round(4)
        return data

    def run(self):
        failed = []
        for ticker in self.tickers:
            try:
                df = self.fetch_monthly_data(ticker)
                output_file = self.output_dir / f"{ticker.upper()}_monthly.csv"
                df.to_csv(output_file, index=False)
                logger.info(f"Saved data to {output_file}")
            except Exception as e:
                logger.error(f"Error fetching {ticker}: {e}")
                failed.append(ticker)
            time.sleep(1)  # Avoid yfinance rate-limiting
        if failed:
            logger.warning(f"Failed tickers: {failed}")
            return failed
        return []

def get_sp500_tickers(csv_path=None):
    """
    Fetch S&P 500 tickers from CSV or Wikipedia.
    Args:
        csv_path (str): Path to CSV file with 'Symbol' column (optional).
    Returns:
        list: List of ticker symbols.
    """
    if csv_path and os.path.exists(csv_path):
        try:
            df = pd.read_csv(csv_path)
            tickers = df['Symbol'].tolist()
            logger.info(f"Loaded {len(tickers)} tickers from {csv_path}")
            return [ticker.replace('.', '-') for ticker in tickers]
        except Exception as e:
            logger.error(f"Error reading CSV {csv_path}: {e}")
    try:
        url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
        tables = pd.read_html(url)
        sp500_table = tables[0]
        tickers = sp500_table['Symbol'].tolist()
        logger.info(f"Loaded {len(tickers)} S&P 500 tickers from Wikipedia")
        return [ticker.replace('.', '-') for ticker in tickers]
    except Exception as e:
        logger.error(f"Error fetching S&P 500 tickers: {e}")
        return []

if __name__ == "__main__":
    # Path to your CSV file with S&P 500 tickers
    CSV_PATH = "C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/heavy-stocks.csv"
    TICKERS = get_sp500_tickers(CSV_PATH)
    csv_path = "C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/heavy-stocks.csv"
    df_symbols = pd.read_csv(csv_path)
    symbols = df_symbols['Symbol'].dropna().unique().tolist()

    # For testing with a smaller set
    # TICKERS = ['AAPL', 'MSFT', 'GOOG']

    if not TICKERS:
        logger.error("No tickers loaded. Exiting.")
        exit()

    fetcher = YFinanceStockFetcher(tickers=symbols)
    failed_tickers = fetcher.run()

2025-05-29 14:35:49,138 - INFO - Loaded 503 tickers from C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/heavy-stocks.csv
2025-05-29 14:35:49,143 - INFO - Fetching monthly data for NVDA
2025-05-29 14:35:50,189 - INFO - Saved data to yf_monthly\NVDA_monthly.csv
2025-05-29 14:35:51,191 - INFO - Fetching monthly data for MSFT
2025-05-29 14:35:51,405 - INFO - Saved data to yf_monthly\MSFT_monthly.csv
2025-05-29 14:35:52,406 - INFO - Fetching monthly data for AAPL
2025-05-29 14:35:52,604 - INFO - Saved data to yf_monthly\AAPL_monthly.csv
2025-05-29 14:35:53,605 - INFO - Fetching monthly data for AMZN
2025-05-29 14:35:53,748 - INFO - Saved data to yf_monthly\AMZN_monthly.csv
2025-05-29 14:35:54,749 - INFO - Fetching monthly data for GOOG
2025-05-29 14:35:54,886 - INFO - Saved data to yf_monthly\GOOG_monthly.csv
2025-05-29 14:35:55,888 - INFO - Fetching monthly data for GOOGL
2025-05-29 14:35:56,033 - INFO - Saved data to yf_monthly\GOOGL_monthly.csv
2025-05-29 14:35:57,035 -

2025-05-29 14:36:59,191 - INFO - Fetching monthly data for ADBE
2025-05-29 14:36:59,361 - INFO - Saved data to yf_monthly\ADBE_monthly.csv
2025-05-29 14:37:00,362 - INFO - Fetching monthly data for TXN
2025-05-29 14:37:00,577 - INFO - Saved data to yf_monthly\TXN_monthly.csv
2025-05-29 14:37:01,579 - INFO - Fetching monthly data for CAT
2025-05-29 14:37:01,818 - INFO - Saved data to yf_monthly\CAT_monthly.csv
2025-05-29 14:37:02,819 - INFO - Fetching monthly data for QCOM
2025-05-29 14:37:02,991 - INFO - Saved data to yf_monthly\QCOM_monthly.csv
2025-05-29 14:37:03,994 - INFO - Fetching monthly data for PGR
2025-05-29 14:37:04,216 - INFO - Saved data to yf_monthly\PGR_monthly.csv
2025-05-29 14:37:05,217 - INFO - Fetching monthly data for SCHW
2025-05-29 14:37:05,408 - INFO - Saved data to yf_monthly\SCHW_monthly.csv
2025-05-29 14:37:06,410 - INFO - Fetching monthly data for SPGI
2025-05-29 14:37:06,617 - INFO - Saved data to yf_monthly\SPGI_monthly.csv
2025-05-29 14:37:07,618 - INFO - 

2025-05-29 14:38:11,474 - INFO - Fetching monthly data for DUK
2025-05-29 14:38:11,694 - INFO - Saved data to yf_monthly\DUK_monthly.csv
2025-05-29 14:38:12,695 - INFO - Fetching monthly data for SHW
2025-05-29 14:38:12,911 - INFO - Saved data to yf_monthly\SHW_monthly.csv
2025-05-29 14:38:13,913 - INFO - Fetching monthly data for MCK
2025-05-29 14:38:14,078 - INFO - Saved data to yf_monthly\MCK_monthly.csv
2025-05-29 14:38:15,080 - INFO - Fetching monthly data for FI
2025-05-29 14:38:15,502 - INFO - Saved data to yf_monthly\FI_monthly.csv
2025-05-29 14:38:16,503 - INFO - Fetching monthly data for INTC
2025-05-29 14:38:16,706 - INFO - Saved data to yf_monthly\INTC_monthly.csv
2025-05-29 14:38:17,708 - INFO - Fetching monthly data for MDLZ
2025-05-29 14:38:17,882 - INFO - Saved data to yf_monthly\MDLZ_monthly.csv
2025-05-29 14:38:18,883 - INFO - Fetching monthly data for AJG
2025-05-29 14:38:19,105 - INFO - Saved data to yf_monthly\AJG_monthly.csv
2025-05-29 14:38:20,112 - INFO - Fetchi

2025-05-29 14:39:24,885 - INFO - Fetching monthly data for DLR
2025-05-29 14:39:25,043 - INFO - Saved data to yf_monthly\DLR_monthly.csv
2025-05-29 14:39:26,045 - INFO - Fetching monthly data for PAYX
2025-05-29 14:39:26,277 - INFO - Saved data to yf_monthly\PAYX_monthly.csv
2025-05-29 14:39:27,278 - INFO - Fetching monthly data for COR
2025-05-29 14:39:27,453 - INFO - Saved data to yf_monthly\COR_monthly.csv
2025-05-29 14:39:28,455 - INFO - Fetching monthly data for FCX
2025-05-29 14:39:28,613 - INFO - Saved data to yf_monthly\FCX_monthly.csv
2025-05-29 14:39:29,614 - INFO - Fetching monthly data for AFL
2025-05-29 14:39:29,822 - INFO - Saved data to yf_monthly\AFL_monthly.csv
2025-05-29 14:39:30,823 - INFO - Fetching monthly data for NSC
2025-05-29 14:39:31,041 - INFO - Saved data to yf_monthly\NSC_monthly.csv
2025-05-29 14:39:32,043 - INFO - Fetching monthly data for VST
2025-05-29 14:39:32,231 - INFO - Saved data to yf_monthly\VST_monthly.csv
2025-05-29 14:39:33,233 - INFO - Fetchi

2025-05-29 14:40:37,298 - INFO - Fetching monthly data for VLO
2025-05-29 14:40:37,541 - INFO - Saved data to yf_monthly\VLO_monthly.csv
2025-05-29 14:40:38,543 - INFO - Fetching monthly data for XEL
2025-05-29 14:40:38,755 - INFO - Saved data to yf_monthly\XEL_monthly.csv
2025-05-29 14:40:39,756 - INFO - Fetching monthly data for YUM
2025-05-29 14:40:40,102 - INFO - Saved data to yf_monthly\YUM_monthly.csv
2025-05-29 14:40:41,103 - INFO - Fetching monthly data for CTSH
2025-05-29 14:40:41,263 - INFO - Saved data to yf_monthly\CTSH_monthly.csv
2025-05-29 14:40:42,264 - INFO - Fetching monthly data for PEG
2025-05-29 14:40:42,500 - INFO - Saved data to yf_monthly\PEG_monthly.csv
2025-05-29 14:40:43,502 - INFO - Fetching monthly data for GRMN
2025-05-29 14:40:43,658 - INFO - Saved data to yf_monthly\GRMN_monthly.csv
2025-05-29 14:40:44,659 - INFO - Fetching monthly data for PCG
2025-05-29 14:40:44,914 - INFO - Saved data to yf_monthly\PCG_monthly.csv
2025-05-29 14:40:45,916 - INFO - Fetc

2025-05-29 14:41:49,286 - INFO - Fetching monthly data for DTE
2025-05-29 14:41:49,527 - INFO - Saved data to yf_monthly\DTE_monthly.csv
2025-05-29 14:41:50,528 - INFO - Fetching monthly data for BR
2025-05-29 14:41:50,690 - INFO - Saved data to yf_monthly\BR_monthly.csv
2025-05-29 14:41:51,692 - INFO - Fetching monthly data for CNC
2025-05-29 14:41:51,809 - INFO - Saved data to yf_monthly\CNC_monthly.csv
2025-05-29 14:41:52,810 - INFO - Fetching monthly data for WRB
2025-05-29 14:41:53,057 - INFO - Saved data to yf_monthly\WRB_monthly.csv
2025-05-29 14:41:54,059 - INFO - Fetching monthly data for LEN
2025-05-29 14:41:54,369 - INFO - Saved data to yf_monthly\LEN_monthly.csv
2025-05-29 14:41:55,371 - INFO - Fetching monthly data for STT
2025-05-29 14:41:55,578 - INFO - Saved data to yf_monthly\STT_monthly.csv
2025-05-29 14:41:56,579 - INFO - Fetching monthly data for ROL
2025-05-29 14:41:56,779 - INFO - Saved data to yf_monthly\ROL_monthly.csv
2025-05-29 14:41:57,781 - INFO - Fetching m

2025-05-29 14:43:01,489 - INFO - Fetching monthly data for NTRS
2025-05-29 14:43:01,669 - INFO - Saved data to yf_monthly\NTRS_monthly.csv
2025-05-29 14:43:02,670 - INFO - Fetching monthly data for TROW
2025-05-29 14:43:02,873 - INFO - Saved data to yf_monthly\TROW_monthly.csv
2025-05-29 14:43:03,875 - INFO - Fetching monthly data for CMS
2025-05-29 14:43:04,082 - INFO - Saved data to yf_monthly\CMS_monthly.csv
2025-05-29 14:43:05,083 - INFO - Fetching monthly data for AMCR
2025-05-29 14:43:05,233 - INFO - Saved data to yf_monthly\AMCR_monthly.csv
2025-05-29 14:43:06,235 - INFO - Fetching monthly data for NVR
2025-05-29 14:43:06,454 - INFO - Saved data to yf_monthly\NVR_monthly.csv
2025-05-29 14:43:07,455 - INFO - Fetching monthly data for LH
2025-05-29 14:43:07,697 - INFO - Saved data to yf_monthly\LH_monthly.csv
2025-05-29 14:43:08,699 - INFO - Fetching monthly data for NTAP
2025-05-29 14:43:08,939 - INFO - Saved data to yf_monthly\NTAP_monthly.csv
2025-05-29 14:43:09,940 - INFO - Fe

2025-05-29 14:44:14,168 - ERROR - ['BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1mo 1926-06-23 -> 2025-05-29)')
2025-05-29 14:44:15,171 - INFO - Fetching monthly data for BF.B
2025-05-29 14:44:15,257 - ERROR - 
1 Failed download:
2025-05-29 14:44:15,257 - ERROR - ['BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1mo 1926-06-23 -> 2025-05-29)')
2025-05-29 14:44:17,261 - INFO - Fetching monthly data for BF.B
2025-05-29 14:44:17,350 - ERROR - 
1 Failed download:
2025-05-29 14:44:17,350 - ERROR - ['BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1mo 1926-06-23 -> 2025-05-29)')
2025-05-29 14:44:17,352 - ERROR - Error fetching BF.B: RetryError[<Future at 0x1f97afbe690 state=finished raised ValueError>]
2025-05-29 14:44:18,353 - INFO - Fetching monthly data for COO
2025-05-29 14:44:18,748 - INFO - Saved data to yf_monthly\COO_monthly.csv
2025-05-29 14:44:19,749 - INFO - Fetching monthly data for NWSA
2025-05-29 14:44:19,

2025-05-29 14:45:24,178 - INFO - Fetching monthly data for LKQ
2025-05-29 14:45:24,435 - INFO - Saved data to yf_monthly\LKQ_monthly.csv
2025-05-29 14:45:25,436 - INFO - Fetching monthly data for DVA
2025-05-29 14:45:25,705 - INFO - Saved data to yf_monthly\DVA_monthly.csv
2025-05-29 14:45:26,707 - INFO - Fetching monthly data for SWK
2025-05-29 14:45:26,896 - INFO - Saved data to yf_monthly\SWK_monthly.csv
2025-05-29 14:45:27,898 - INFO - Fetching monthly data for VTRS
2025-05-29 14:45:28,084 - INFO - Saved data to yf_monthly\VTRS_monthly.csv
2025-05-29 14:45:29,085 - INFO - Fetching monthly data for CPB
2025-05-29 14:45:29,320 - INFO - Saved data to yf_monthly\CPB_monthly.csv
2025-05-29 14:45:30,321 - INFO - Fetching monthly data for AIZ
2025-05-29 14:45:30,456 - INFO - Saved data to yf_monthly\AIZ_monthly.csv
2025-05-29 14:45:31,457 - INFO - Fetching monthly data for GL
2025-05-29 14:45:31,683 - INFO - Saved data to yf_monthly\GL_monthly.csv
2025-05-29 14:45:32,685 - INFO - Fetching

In [3]:
df['date'] = pd.to_datetime(df['date'])

In [5]:

print(df.dtypes)  # Check that 'Date' is now datetime64[ns]
fred_dfs = []
for fred_file in fred_files:
    df = pd.read_csv(fred_file)
    df['date'] = pd.to_datetime(df['date'])  # <- Line 31
    series_id = fred_file.stem
    df = df[['date', series_id]].set_index('date')
    fred_dfs.append(df)

fred_df = pd.concat(fred_dfs, axis=1)
fred_df.head()
stock_df = pd.read_csv(stock_file)
stock_df['Date'] = pd.to_datetime(stock_df['Date'])
stock_df = stock_df[['Date', 'Close', 'Volume']].set_index('Date')

# Merge with FRED data
data = stock_df.join(fred_df, how='inner').dropna()
data.head()
lags = 3
feature_names = []

for col in data.columns:
    for lag in range(1, lags + 1):
        data[f'{col}_lag{lag}'] = data[col].shift(lag)
        feature_names.append(f'{col}_lag{lag}')

data['target'] = data['Close'].shift(-1)
data = data.dropna()

test_size = 0.2
train_size = int(len(data) * (1 - test_size))
train = data.iloc[:train_size]
test = data.iloc[train_size:]

X_train = train[feature_names]
y_train = train['target']
X_test = test[feature_names]
y_test = test['target']

logger.info(f"Prepared data: {X_train.shape[0]} train samples, {X_test.shape[0]} test samples, {len(feature_names)} features")


2025-05-29 09:40:15,838 - NotebookPrepData - INFO - Prepared data: 0 train samples, 0 test samples, 15 features


Unnamed: 0            int64
date                 object
00XALCATM086NEST    float64
dtype: object


In [2]:
start_date = "2025-04-30"
end = start_date - 2000

TypeError: unsupported operand type(s) for -: 'str' and 'int'

In [3]:
from datetime import datetime, timedelta

start_date = "2025-04-30"
dt = datetime.strptime(start_date, "%Y-%m-%d")
new_date = dt - timedelta(days=2000)
end_date = new_date.strftime("%Y-%m-%d")
print(end_date) 

2019-11-08
