# Retrieving Data

In [40]:
import numpy as np
import pandas as pd
from datetime import time
from ta import trend, volatility, momentum, volume
import yfinance as yf
import time

# Source Data

In [None]:
# Define the function to get the sector for each stock
def get_sector(ticker):
    try:
        stock_info = yf.Ticker(ticker).info
        return stock_info.get('sector', 'N/A')  # Returns 'N/A' if sector information is not available
    except Exception as e:
        #print(f"Could not fetch sector for {ticker}: {e}")
        return 'N/A'
    

# get ticker symbols
tickers = pd.read_csv('data/tickers.csv')
tickers = tickers['Symbol'].tolist()

sectors = []

# Loop through each ticker and fetch the sector
for ticker in tickers:
    print(f"Fetching sector for {ticker}")
    sector = get_sector(ticker)
    sectors.append(sector)
   # time.sleep(0.5)  # Delay to avoid hitting API rate limits

In [43]:
# get tickers
tickers = pd.read_csv('data/tickers.csv')
#tickers['Sector'] = sectors
#tickers.to_csv('data/tickers.csv', index=False)

In [44]:
#get ticker symbols
ticker_symbols = tickers['Symbol'].tolist()

# Split tickers into batches of 100 for we don't hit the API rate limit
ticker_batches = [ticker_symbols[i:i + 100] for i in range(0, len(ticker_symbols), 100)]

# Start and end dates
start_date = '2005-01-01'
end_date = '2024-01-01'

# Create an empty DataFrame to store all data
all_data = pd.DataFrame()

# Loop through each batch of tickers and fetch the historical data
for batch in ticker_batches:
    for ticker in batch:
        print(f"Downloading data for {ticker}")
        data = yf.download(ticker, start=start_date, end=end_date, interval="1mo")
        data.dropna(inplace=True)  # Clean data by dropping NAs
        data['Ticker'] = ticker  # Add a column to identify the stock
        all_data = pd.concat([all_data, data], axis=0)
    time.sleep(10)  # sleep tim to avoid hitting API rate limits

# Save the final result to a CSV file
all_data.to_csv('data/historical_stock_data_final.csv')

print("All data downloaded and saved.")


[*********************100%%**********************]  1 of 1 completed

Downloading data for MSFT
Downloading data for AAPL



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NVDA
Downloading data for AMZN
Downloading data for GOOGL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for META
Downloading data for GOOG
Downloading data for BRK.B


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed


Downloading data for AVGO
Downloading data for LLY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for JPM
Downloading data for XOM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TSLA
Downloading data for UNH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for V
Downloading data for MA
Downloading data for PG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for JNJ
Downloading data for HD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MRK
Downloading data for COST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CVX
Downloading data for ABBV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CRM
Downloading data for BAC
Downloading data for WMT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMD
Downloading data for NFLX
Downloading data for PEP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KO
Downloading data for TMO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ADBE
Downloading data for WFC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LIN
Downloading data for DIS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MCD
Downloading data for CSCO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ACN
Downloading data for ORCL
Downloading data for ABT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for QCOM
Downloading data for INTU
Downloading data for GE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CAT
Downloading data for AMAT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VZ
Downloading data for DHR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TXN
Downloading data for COP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IBM
Downloading data for CMCSA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NOW
Downloading data for UNP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PM
Downloading data for AMGN
Downloading data for PFE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for UBER
Downloading data for GS
Downloading data for NEE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for INTC
Downloading data for AXP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RTX
Downloading data for SPGI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LOW
Downloading data for ISRG
Downloading data for ETN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HON
Downloading data for MU


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ELV
Downloading data for BKNG
Downloading data for PGR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LRCX
Downloading data for T


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for C
Downloading data for MS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NKE
Downloading data for SYK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SCHW
Downloading data for TJX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BSX
Downloading data for UPS
Downloading data for BLK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MDT
Downloading data for CI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DE
Downloading data for VRTX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ADI
Downloading data for SBUX
Downloading data for ADP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CB
Downloading data for LMT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MMC
Downloading data for PLD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KLAC
Downloading data for BA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MDLZ
Downloading data for FI
Downloading data for REGN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PANW
Downloading data for BMY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for BX
Downloading data for CMG
Downloading data for TMUS



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CVS
Downloading data for SNPS
Downloading data for GILD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMT
Downloading data for SO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EOG
Downloading data for WM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CDNS
Downloading data for MO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TGT
Downloading data for CME


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ICE
Downloading data for DUK
Downloading data for CL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MPC
Downloading data for ZTS
Downloading data for FCX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SHW
Downloading data for APH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MCK
Downloading data for ABNB
Downloading data for PH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PYPL
Downloading data for SLB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TDG
Downloading data for TT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EQIX
Downloading data for NOC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ITW
Downloading data for BDX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CSX
Downloading data for ANET


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PSX
Downloading data for GD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for USB
Downloading data for PXD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EMR
Downloading data for NXPI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for PNC
Downloading data for ORLY



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FDX
Downloading data for HCA
Downloading data for CEG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MAR
Downloading data for MCO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PCAR
Downloading data for CTAS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MSI
Downloading data for AON


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VLO
Downloading data for ECL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ROP
Downloading data for COF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NSC
Downloading data for GM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for APD
Downloading data for EW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AIG
Downloading data for WELL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HLT
Downloading data for AZO
Downloading data for TFC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MMM
Downloading data for CARR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MCHP
Downloading data for AJG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for F
Downloading data for NEM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TRV
Downloading data for CPRT
Downloading data for DXCM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WMB
Downloading data for OKE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for URI
Downloading data for ADSK
Downloading data for SPG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KMB
Downloading data for ROST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HES
Downloading data for SRE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AEP
Downloading data for OXY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for O
Downloading data for ALL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AFL
Downloading data for JCI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DHI
Downloading data for TEL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BK
Downloading data for DLR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MET
Downloading data for SMCI
Downloading data for NUE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IQV
Downloading data for D
Downloading data for STZ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LULU
Downloading data for GEV


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GEV']: Exception("%ticker%: Data doesn't exist for startDate = 1104555600, endDate = 1704085200")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IDXX
Downloading data for AMP
Downloading data for FIS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GWW
Downloading data for AME


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for FTNT
Downloading data for PSA



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LHX
Downloading data for CCI
Downloading data for A


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GIS
Downloading data for DOW
Downloading data for COR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PRU
Downloading data for YUM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CMI
Downloading data for CNC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MNST
Downloading data for RSG
Downloading data for FAST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SYY
Downloading data for CTVA
Downloading data for LEN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PAYX
Downloading data for PWR
Downloading data for IR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MSCI
Downloading data for CSGP
Downloading data for OTIS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HUM
Downloading data for MLM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FANG
Downloading data for EXC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for GEHC
Downloading data for PCG



[*********************100%%**********************]  1 of 1 completed


Downloading data for KR
Downloading data for MRNA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KMI
Downloading data for KDP
Downloading data for IT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VMC
Downloading data for HAL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EL
Downloading data for DVN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PEG
Downloading data for ACGL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CTSH
Downloading data for RCL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BKR
Downloading data for ODFL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KVUE
Downloading data for CDW
Downloading data for GPN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MPWR
Downloading data for ROK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DAL
Downloading data for ED


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for VRSK
Downloading data for ADM



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RMD
Downloading data for DFS
Downloading data for DD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for XYL
Downloading data for EA
Downloading data for DG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PPG
Downloading data for KHC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BIIB
Downloading data for XEL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TSCO
Downloading data for WAB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VICI
Downloading data for ON
Downloading data for ANSS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HIG
Downloading data for EXR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HSY
Downloading data for FICO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EFX
Downloading data for EBAY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AVB
Downloading data for EIX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MTD
Downloading data for FTV
Downloading data for WST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KEYS
Downloading data for CHD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HWM
Downloading data for CBRE
Downloading data for LYB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WTW
Downloading data for TRGP
Downloading data for WEC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TROW
Downloading data for CAH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DLTR
Downloading data for DOV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for FITB
Downloading data for CHTR



[*********************100%%**********************]  1 of 1 completed

Downloading data for ZBH
Downloading data for PHM



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NVR
Downloading data for MTB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HPQ
Downloading data for GLW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AWK
Downloading data for BLDR
Downloading data for RJF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WDC
Downloading data for TTWO
Downloading data for BR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NDAQ
Downloading data for WY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for STT
Downloading data for IRM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DTE
Downloading data for ETR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GPC
Downloading data for EQR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ALGN
Downloading data for HPE
Downloading data for GRMN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CPAY
Downloading data for BALL
Downloading data for HUBB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AXON
Downloading data for IFF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PTC
Downloading data for DECK
Downloading data for CTRA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SBAC
Downloading data for NTAP
Downloading data for ES


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VLTO
Downloading data for STLD
Downloading data for BAX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for APTV
Downloading data for STE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for INVH
Downloading data for ULTA
Downloading data for MOH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PPL
Downloading data for FE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HBAN
Downloading data for ILMN
Downloading data for TYL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BRO
Downloading data for FSLR
Downloading data for AEE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MKC
Downloading data for OMC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CBOE
Downloading data for DRI
Downloading data for SYF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WAT
Downloading data for J
Downloading data for RF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CNP
Downloading data for CLX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for ARE
Downloading data for EXPE



[*********************100%%**********************]  1 of 1 completed


Downloading data for WBD
Downloading data for HOLX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LDOS
Downloading data for AVY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VTR
Downloading data for TDY
Downloading data for COO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TER
Downloading data for CINF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DPZ
Downloading data for ATO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CMS
Downloading data for PFG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TSN
Downloading data for UAL
Downloading data for NTRS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TXT
Downloading data for LH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LVS
Downloading data for EQT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SWKS
Downloading data for STX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IEX
Downloading data for EXPD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NRG
Downloading data for CFG
Downloading data for MRO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LUV
Downloading data for FDS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ESS
Downloading data for EG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MAS
Downloading data for WRB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VRSN
Downloading data for PKG
Downloading data for CE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AKAM
Downloading data for CF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for ENPH
Downloading data for ZBRA
Downloading data for K



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CCL
Downloading data for JBL
Downloading data for MAA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TRMB
Downloading data for DGX
Downloading data for BG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CAG
Downloading data for BBY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for POOL
Downloading data for SNA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NDSN
Downloading data for VTRS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for L
Downloading data for SWK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KEY
Downloading data for ALB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EPAM
Downloading data for HST
Downloading data for DOC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for JBHT
Downloading data for PNR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMCR
Downloading data for LYV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WBA
Downloading data for ROL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RVTY
Downloading data for LNT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KIM
Downloading data for IPG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LW
Downloading data for JKHY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WRK
Downloading data for EVRG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IP
Downloading data for LKQ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CRL
Downloading data for SJM
Downloading data for GEN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TAP
Downloading data for PODD
Downloading data for AES


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for UDR
Downloading data for QRVO
Downloading data for EMN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MGM
Downloading data for NI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for JNPR
Downloading data for KMX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HII
Downloading data for ALLE
Downloading data for FFIV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CPT
Downloading data for BBWI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HRL
Downloading data for AOS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for UHS
Downloading data for TECH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CTLT
Downloading data for APA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MOS
Downloading data for TFX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for INCY
Downloading data for REG
Downloading data for HSIC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DAY
Downloading data for NWSA
Downloading data for PAYC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WYNN
Downloading data for TPR
Downloading data for AIZ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AAL
Downloading data for SOLV


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SOLV']: Exception("%ticker%: Data doesn't exist for startDate = 1104555600, endDate = 1704085200")
[*********************100%%**********************]  1 of 1 completed


Downloading data for CPB
Downloading data for BF.B


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1mo 2005-01-01 -> 2024-01-01)')
[*********************100%%**********************]  1 of 1 completed


Downloading data for MTCH
Downloading data for BXP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GNRC
Downloading data for PNW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HAS
Downloading data for CHRW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ETSY
Downloading data for NCLH
Downloading data for BWA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CZR
Downloading data for FOXA
Downloading data for MKTX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FRT
Downloading data for FMC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RHI
Downloading data for DVA
Downloading data for BEN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CMA
Downloading data for IVZ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RL
Downloading data for GL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MHK
Downloading data for BIO


[*********************100%%**********************]  1 of 1 completed

Downloading data for PARA
Downloading data for FOX



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NWS
All data downloaded and saved.


# FEATURE ENGINEERING


### TECHNICAL INDICATORS:

* MACD (Moving Average Convergence Divergence)
* RSI (Relative Strength Index)
* Bollinger Bands High (the low is commented out)
* ADX (Average Directional Index)
* ATR (Average True Range)
* CCI (Commodity Channel Index)
* MFI (Money Flow Index)
* Stochastic Oscillator
* Keltner Channel
* VWAP (Volume Weighted Average Price)
* EOM (Ease Of Movement)
* VPT (Volume Price Trend)

In [45]:
# 1. Load data
data_yf = pd.read_csv('data/historical_stock_data_final.csv')

In [46]:
data_yf

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2005-01-01,26.799999,27.100000,25.639999,26.280001,18.339539,1.521414e+09,MSFT
1,2005-02-01,26.250000,26.500000,25.129999,25.160000,17.557941,1.361126e+09,MSFT
2,2005-03-01,25.190001,25.790001,23.820000,24.170000,16.919111,1.541411e+09,MSFT
3,2005-04-01,24.240000,25.450001,23.940001,25.299999,17.710112,1.520254e+09,MSFT
4,2005-05-01,25.230000,26.090000,24.639999,25.799999,18.060112,1.270564e+09,MSFT
...,...,...,...,...,...,...,...,...
105109,2023-08-01,20.080000,22.139999,19.730000,22.000000,21.818541,2.056090e+07,NWS
105110,2023-09-01,22.059999,22.070000,20.129999,20.870001,20.697861,2.807930e+07,NWS
105111,2023-10-01,20.850000,23.170000,20.010000,21.440001,21.361782,3.251290e+07,NWS
105112,2023-11-01,21.360001,23.530001,21.190001,23.040001,22.955944,2.530900e+07,NWS


In [47]:
# 2. Add technical indicators

data_yf['macd'] = trend.MACD(data_yf['Close']).macd()
data_yf['macd_signal'] = trend.MACD(data_yf['Close']).macd_signal()
data_yf['rsi'] = momentum.RSIIndicator(data_yf['Close']).rsi()
data_yf['bb_high'] = volatility.BollingerBands(data_yf['Close']).bollinger_hband()
#data_yf['bb_low'] = volatility.BollingerBands(data_yf['Close']).bollinger_lband()
data_yf['adx'] = trend.ADXIndicator(data_yf['High'], data_yf['Low'], data_yf['Close']).adx()
data_yf['atr'] = volatility.AverageTrueRange(data_yf['High'], data_yf['Low'], data_yf['Close']).average_true_range()
data_yf['cci'] = trend.CCIIndicator(data_yf['High'], data_yf['Low'], data_yf['Close']).cci()
data_yf['mfi'] = volume.MFIIndicator(data_yf['High'], data_yf['Low'], data_yf['Close'], data_yf['Volume']).money_flow_index()
data_yf['so'] = momentum.StochasticOscillator(data_yf['High'], data_yf['Low'], data_yf['Close']).stoch()
# Calculate additional indicators
data_yf['kc'] = volatility.KeltnerChannel(data_yf['High'], data_yf['Low'], data_yf['Close']).keltner_channel_mband()
data_yf['vwap'] = volume.VolumeWeightedAveragePrice(data_yf['High'], data_yf['Low'], data_yf['Close'], data_yf['Volume']).volume_weighted_average_price()
data_yf['eom'] = volume.EaseOfMovementIndicator(data_yf['High'], data_yf['Low'], data_yf['Close'], data_yf['Volume']).ease_of_movement()
data_yf['vpt'] = volume.VolumePriceTrendIndicator(data_yf['Close'], data_yf['Volume']).volume_price_trend()

#### MOVING AVERAGES

In [48]:
# indicator columns to include
indicator_cols = [col for col in data_yf.columns if col not in ['win', 'gain_loss_pct', 'Open', 'High', 'Low', 'Close', 'Volume','Adj Close']]
# Only including the moving average of 4 weeks and 12 weeks since this is long term trading
lags_to_include = [4, 12]

for col in indicator_cols:
    for i in lags_to_include:
        data_yf[f'{col}_lag_{i}'] = data_yf[col].shift(i)

# Remove rows with NaN values after shifting
data_yf.dropna(inplace=True)


In [49]:
# 4. Calculate gain/loss percentage and assign -1, 0, 1
data_yf['gain_loss_pct'] = (data_yf['Close'] - data_yf['Open']) / data_yf['Open']
data_yf['win'] = np.where(data_yf['gain_loss_pct'] > 0.005, 1, 
                          np.where(data_yf['gain_loss_pct'] < -0.0025, -1, 0)) # 1 if 0.5% gain, -1 if 0.25% loss, 0 otherwise
# set date to datetime
data_yf['Date'] = pd.to_datetime(data_yf['Date'])


In [50]:
# 5. Remove stocks that have not existed on 01/01/2005
first_appearance_dates = data_yf.groupby('Ticker')['Date'].min().sort_values(ascending=False)
first_appearance_dates

Ticker
VLTO   2023-11-01
KVUE   2023-06-01
GEHC   2023-01-01
CEG    2022-02-01
ABNB   2021-01-01
          ...    
F      2005-01-01
EXR    2005-01-01
EXPD   2005-01-01
EXC    2005-01-01
ISRG   2005-01-01
Name: Date, Length: 499, dtype: datetime64[ns]

In [51]:
#check when the stocks first appeared
first_appearance_dates = pd.to_datetime(first_appearance_dates)

#cutoff date 2005-01-01
cutoff_date = pd.Timestamp('2005-01-01')

#Only keeop stocks that have appeared on/before 2005-01-01
valid_tickers = first_appearance_dates[first_appearance_dates <= cutoff_date].index

data_yf = data_yf[data_yf['Ticker'].isin(valid_tickers)]

In [52]:
# save data to csv before scaling
data_yf.to_csv('data/raw_data_yf.csv', index=False)

In [53]:
from sklearn.preprocessing import StandardScaler

# Define the columns to be standardized
float_columns = data_yf.select_dtypes(include=['float64']).columns

# Columns to exclude from standardization
exclude_columns = ['win', 'gain_loss_pct', 'Open', 'High', 'Low', 'Close', 'Adj Close']

# Remove excluded columns from the list of columns to be standardized
columns_to_scale = [col for col in float_columns if col not in exclude_columns]

columns_to_scale

['Volume',
 'macd',
 'macd_signal',
 'rsi',
 'bb_high',
 'adx',
 'atr',
 'cci',
 'mfi',
 'so',
 'kc',
 'vwap',
 'eom',
 'vpt',
 'macd_lag_4',
 'macd_lag_12',
 'macd_signal_lag_4',
 'macd_signal_lag_12',
 'rsi_lag_4',
 'rsi_lag_12',
 'bb_high_lag_4',
 'bb_high_lag_12',
 'adx_lag_4',
 'adx_lag_12',
 'atr_lag_4',
 'atr_lag_12',
 'cci_lag_4',
 'cci_lag_12',
 'mfi_lag_4',
 'mfi_lag_12',
 'so_lag_4',
 'so_lag_12',
 'kc_lag_4',
 'kc_lag_12',
 'vwap_lag_4',
 'vwap_lag_12',
 'eom_lag_4',
 'eom_lag_12',
 'vpt_lag_4',
 'vpt_lag_12']

In [54]:

# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize only the specified columns
data_yf[columns_to_scale] = scaler.fit_transform(data_yf[columns_to_scale])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_yf[columns_to_scale] = scaler.fit_transform(data_yf[columns_to_scale])


In [55]:
data_yf

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,macd,macd_signal,...,kc_lag_4,kc_lag_12,vwap_lag_4,vwap_lag_12,eom_lag_4,eom_lag_12,vpt_lag_4,vpt_lag_12,gain_loss_pct,win
228,2005-01-01,1.156786,1.390893,1.117857,1.373214,1.162562,38.626341,AAPL,0.019306,0.603729,...,1.168618,1.153541,1.211573,1.167815,0.024431,-0.227915,-5.001520,-5.005480,0.187094,1
229,2005-02-01,1.375893,1.622857,1.367500,1.602143,1.356373,32.189698,AAPL,-0.697760,0.332323,...,1.179566,1.152156,1.235185,1.135996,0.075228,0.496524,-4.999812,-5.005301,0.164439,1
230,2005-03-01,1.606786,1.611071,1.386786,1.488214,1.259921,21.954315,AAPL,-1.251914,-0.004435,...,1.200310,1.150016,1.287250,1.112824,0.581598,0.159055,-4.996778,-5.000109,-0.073795,-1
231,2005-04-01,1.503214,1.587500,1.214286,1.287857,1.090299,29.056338,AAPL,-1.672339,-0.364604,...,1.225616,1.150026,1.348575,1.110430,0.072348,0.406995,-4.996955,-4.998494,-0.143264,-1
232,2005-05-01,1.293214,1.462143,1.182500,1.420000,1.202171,19.207524,AAPL,-1.982337,-0.719664,...,1.145532,1.159878,-0.080819,1.117653,-10.441043,0.410987,-6.138958,-4.996653,0.098039,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104710,2023-08-01,403.059998,419.000000,375.920013,400.200012,400.200012,-0.218175,BIO,-1.313996,-1.032681,...,2.767504,3.250799,2.414665,3.306907,-0.407752,0.128764,1.392980,1.392371,-0.007096,-1
104711,2023-09-01,403.940002,406.440002,349.269989,358.450012,358.450012,-0.218753,BIO,-1.420252,-1.132780,...,2.656697,3.209133,2.320586,3.203550,-1.924982,-2.363448,1.392934,1.392350,-0.112616,-1
104712,2023-10-01,355.540009,366.299988,261.589996,275.279999,275.279999,-0.215411,BIO,-1.703035,-1.273907,...,2.539979,3.140147,2.236475,3.003707,-0.491761,-2.682393,1.392938,1.392308,-0.225741,-1
104713,2023-11-01,274.170013,314.820007,267.859985,304.920013,304.920013,-0.213842,BIO,-1.828312,-1.413855,...,2.434953,3.089418,2.173890,2.825007,0.644527,-0.077603,1.392951,1.392354,0.112157,1


In [56]:
# create csv with features data
data_yf.to_csv('data/all_stock_data.csv', index=False)