# Retrieving Data

In [2]:
import numpy as np
import pandas as pd
from datetime import time
from ta import trend, volatility, momentum, volume
import yfinance as yf
import time

# Source Data

In [6]:
# Define the function to get the sector for each stock
def get_sector(ticker):
    try:
        stock_info = yf.Ticker(ticker).info
        return stock_info.get('sector', 'N/A')  # Returns 'N/A' if sector information is not available
    except Exception as e:
        #print(f"Could not fetch sector for {ticker}: {e}")
        return 'N/A'
    

# get ticker symbols
tickers = pd.read_csv('data/tickers.csv')
tickers = tickers['Symbol'].tolist()

sectors = []

# Loop through each ticker and fetch the sector
for ticker in tickers:
    print(f"Fetching sector for {ticker}")
    sector = get_sector(ticker)
    sectors.append(sector)
   # time.sleep(0.5)  # Delay to avoid hitting API rate limits

Fetching sector for MSFT
Fetching sector for AAPL
Fetching sector for NVDA
Fetching sector for AMZN
Fetching sector for GOOGL
Fetching sector for META
Fetching sector for GOOG
Fetching sector for BRK.B
Fetching sector for AVGO
Fetching sector for LLY
Fetching sector for JPM
Fetching sector for XOM
Fetching sector for TSLA
Fetching sector for UNH
Fetching sector for V
Fetching sector for MA
Fetching sector for PG
Fetching sector for JNJ
Fetching sector for HD
Fetching sector for MRK
Fetching sector for COST
Fetching sector for CVX
Fetching sector for ABBV
Fetching sector for CRM
Fetching sector for BAC
Fetching sector for WMT
Fetching sector for AMD
Fetching sector for NFLX
Fetching sector for PEP
Fetching sector for KO
Fetching sector for TMO
Fetching sector for ADBE
Fetching sector for WFC
Fetching sector for LIN
Fetching sector for DIS
Fetching sector for MCD
Fetching sector for CSCO
Fetching sector for ACN
Fetching sector for ORCL
Fetching sector for ABT
Fetching sector for QCOM
Fet

In [8]:
# get tickers
tickers = pd.read_csv('data/tickers.csv')
tickers['Sector'] = sectors
tickers.to_csv('data/tickers.csv', index=False)

   #                Company Symbol Portfolio%                  Sector
0  1         Microsoft Corp   MSFT      7.05%              Technology
1  2             Apple Inc.   AAPL      5.74%              Technology
2  3            Nvidia Corp   NVDA      5.06%              Technology
3  4         Amazon.com Inc   AMZN      3.83%       Consumer Cyclical
4  5  Alphabet Inc. Class A  GOOGL      2.37%  Communication Services


In [36]:
#get ticker symbols
ticker_symbols = tickers['Symbol'].tolist()

# Split tickers into batches of 100 for we don't hit the API rate limit
ticker_batches = [ticker_symbols[i:i + 100] for i in range(0, len(ticker_symbols), 100)]

# Start and end dates
start_date = '2005-01-01'
end_date = '2024-01-01'

# Create an empty DataFrame to store all data
all_data = pd.DataFrame()

# Loop through each batch of tickers and fetch the historical data
for batch in ticker_batches:
    for ticker in batch:
        print(f"Downloading data for {ticker}")
        data = yf.download(ticker, start=start_date, end=end_date, interval="1wk")
        data.dropna(inplace=True)  # Clean data by dropping NAs
        data['Ticker'] = ticker  # Add a column to identify the stock
        all_data = pd.concat([all_data, data], axis=0)
    time.sleep(10)  # sleep tim to avoid hitting API rate limits

# Save the final result to a CSV file
all_data.to_csv('data/historical_stock_data_final.csv')

print("All data downloaded and saved.")


Downloading data for MSFT


[*********************100%%**********************]  1 of 1 completed


Downloading data for AAPL


[*********************100%%**********************]  1 of 1 completed


Downloading data for NVDA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMZN
Downloading data for GOOGL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for META
Downloading data for GOOG


[*********************100%%**********************]  1 of 1 completed


Downloading data for BRK.B


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')


Downloading data for AVGO


[*********************100%%**********************]  1 of 1 completed


Downloading data for LLY


[*********************100%%**********************]  1 of 1 completed


Downloading data for JPM


[*********************100%%**********************]  1 of 1 completed


Downloading data for XOM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TSLA
Downloading data for UNH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for V
Downloading data for MA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for PG





Downloading data for JNJ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HD
Downloading data for MRK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for COST
Downloading data for CVX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ABBV
Downloading data for CRM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BAC
Downloading data for WMT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMD
Downloading data for NFLX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PEP
Downloading data for KO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TMO
Downloading data for ADBE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WFC
Downloading data for LIN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DIS
Downloading data for MCD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CSCO
Downloading data for ACN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ORCL
Downloading data for ABT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for QCOM
Downloading data for INTU


[*********************100%%**********************]  1 of 1 completed


Downloading data for GE


[*********************100%%**********************]  1 of 1 completed


Downloading data for CAT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMAT
Downloading data for VZ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DHR
Downloading data for TXN


[*********************100%%**********************]  1 of 1 completed


Downloading data for COP


[*********************100%%**********************]  1 of 1 completed


Downloading data for IBM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CMCSA
Downloading data for NOW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for UNP
Downloading data for PM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMGN
Downloading data for PFE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for UBER
Downloading data for GS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NEE
Downloading data for INTC


[*********************100%%**********************]  1 of 1 completed


Downloading data for AXP


[*********************100%%**********************]  1 of 1 completed


Downloading data for RTX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SPGI
Downloading data for LOW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ISRG
Downloading data for ETN


[*********************100%%**********************]  1 of 1 completed


Downloading data for HON


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MU
Downloading data for ELV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BKNG
Downloading data for PGR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LRCX
Downloading data for T


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for C





Downloading data for MS


[*********************100%%**********************]  1 of 1 completed


Downloading data for NKE


[*********************100%%**********************]  1 of 1 completed


Downloading data for SYK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SCHW
Downloading data for TJX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BSX
Downloading data for UPS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BLK
Downloading data for MDT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CI
Downloading data for DE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VRTX
Downloading data for ADI


[*********************100%%**********************]  1 of 1 completed


Downloading data for SBUX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ADP
Downloading data for CB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LMT
Downloading data for MMC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PLD
Downloading data for KLAC


[*********************100%%**********************]  1 of 1 completed


Downloading data for BA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MDLZ
Downloading data for FI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for REGN
Downloading data for PANW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BMY


[*********************100%%**********************]  1 of 1 completed

Downloading data for BX



[*********************100%%**********************]  1 of 1 completed


Downloading data for CMG
Downloading data for TMUS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CVS
Downloading data for SNPS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GILD
Downloading data for AMT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SO
Downloading data for EOG


[*********************100%%**********************]  1 of 1 completed


Downloading data for WM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CDNS
Downloading data for MO


[*********************100%%**********************]  1 of 1 completed


Downloading data for TGT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CME
Downloading data for ICE


[*********************100%%**********************]  1 of 1 completed


Downloading data for DUK


[*********************100%%**********************]  1 of 1 completed


Downloading data for CL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MPC
Downloading data for ZTS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FCX
Downloading data for SHW


[*********************100%%**********************]  1 of 1 completed


Downloading data for APH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MCK
Downloading data for ABNB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PH
Downloading data for PYPL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SLB
Downloading data for TDG


[*********************100%%**********************]  1 of 1 completed


Downloading data for TT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EQIX
Downloading data for NOC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ITW
Downloading data for BDX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CSX
Downloading data for ANET


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PSX
Downloading data for GD


[*********************100%%**********************]  1 of 1 completed


Downloading data for USB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PXD
Downloading data for EMR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NXPI
Downloading data for PNC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ORLY
Downloading data for FDX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HCA
Downloading data for CEG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MAR
Downloading data for MCO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PCAR
Downloading data for CTAS


[*********************100%%**********************]  1 of 1 completed


Downloading data for MSI


[*********************100%%**********************]  1 of 1 completed


Downloading data for AON


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VLO
Downloading data for ECL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ROP
Downloading data for COF


[*********************100%%**********************]  1 of 1 completed


Downloading data for NSC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GM
Downloading data for APD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EW
Downloading data for AIG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WELL
Downloading data for HLT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AZO
Downloading data for TFC


[*********************100%%**********************]  1 of 1 completed


Downloading data for MMM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CARR
Downloading data for MCHP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AJG
Downloading data for F


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NEM
Downloading data for TRV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CPRT
Downloading data for DXCM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for WMB





Downloading data for OKE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for URI
Downloading data for ADSK


[*********************100%%**********************]  1 of 1 completed


Downloading data for SPG


[*********************100%%**********************]  1 of 1 completed


Downloading data for KMB


[*********************100%%**********************]  1 of 1 completed


Downloading data for ROST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HES
Downloading data for SRE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AEP
Downloading data for OXY


[*********************100%%**********************]  1 of 1 completed


Downloading data for O


[*********************100%%**********************]  1 of 1 completed


Downloading data for ALL


[*********************100%%**********************]  1 of 1 completed


Downloading data for AFL


[*********************100%%**********************]  1 of 1 completed


Downloading data for JCI


[*********************100%%**********************]  1 of 1 completed


Downloading data for DHI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TEL
Downloading data for BK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DLR
Downloading data for MET


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SMCI
Downloading data for NUE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IQV
Downloading data for D


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for STZ
Downloading data for LULU


[*********************100%%**********************]  1 of 1 completed


Downloading data for GEV


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GEV']: Exception("%ticker%: Data doesn't exist for startDate = 1104555600, endDate = 1704085200")


Downloading data for IDXX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AMP
Downloading data for FIS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GWW
Downloading data for AME


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FTNT
Downloading data for PSA


[*********************100%%**********************]  1 of 1 completed


Downloading data for LHX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CCI
Downloading data for A


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GIS
Downloading data for DOW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for COR
Downloading data for PRU


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for YUM
Downloading data for CMI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CNC
Downloading data for MNST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RSG
Downloading data for FAST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SYY
Downloading data for CTVA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LEN
Downloading data for PAYX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PWR
Downloading data for IR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MSCI
Downloading data for CSGP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for OTIS
Downloading data for HUM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MLM
Downloading data for FANG


[*********************100%%**********************]  1 of 1 completed


Downloading data for EXC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for GEHC
Downloading data for PCG


[*********************100%%**********************]  1 of 1 completed


Downloading data for KR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MRNA
Downloading data for KMI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KDP
Downloading data for IT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VMC
Downloading data for HAL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EL
Downloading data for DVN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PEG


[*********************100%%**********************]  1 of 1 completed


Downloading data for ACGL
Downloading data for CTSH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RCL
Downloading data for BKR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ODFL
Downloading data for KVUE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CDW
Downloading data for GPN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MPWR
Downloading data for ROK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DAL
Downloading data for ED


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VRSK
Downloading data for ADM


[*********************100%%**********************]  1 of 1 completed


Downloading data for RMD


[*********************100%%**********************]  1 of 1 completed


Downloading data for DFS


[*********************100%%**********************]  1 of 1 completed


Downloading data for DD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for XYL
Downloading data for EA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DG
Downloading data for PPG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KHC
Downloading data for BIIB


[*********************100%%**********************]  1 of 1 completed


Downloading data for XEL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TSCO
Downloading data for WAB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VICI
Downloading data for ON


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ANSS
Downloading data for HIG


[*********************100%%**********************]  1 of 1 completed


Downloading data for EXR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HSY
Downloading data for FICO


[*********************100%%**********************]  1 of 1 completed


Downloading data for EFX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EBAY
Downloading data for AVB


[*********************100%%**********************]  1 of 1 completed


Downloading data for EIX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MTD
Downloading data for FTV


[*********************100%%**********************]  1 of 1 completed


Downloading data for WST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for KEYS
Downloading data for CHD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HWM
Downloading data for CBRE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for LYB
Downloading data for WTW



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TRGP
Downloading data for WEC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TROW
Downloading data for CAH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DLTR
Downloading data for DOV


[*********************100%%**********************]  1 of 1 completed


Downloading data for FITB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CHTR


[*********************100%%**********************]  1 of 1 completed

Downloading data for ZBH
Downloading data for PHM



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NVR
Downloading data for MTB


[*********************100%%**********************]  1 of 1 completed


Downloading data for HPQ


[*********************100%%**********************]  1 of 1 completed


Downloading data for GLW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AWK
Downloading data for BLDR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for RJF





Downloading data for WDC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TTWO
Downloading data for BR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NDAQ
Downloading data for WY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for STT
Downloading data for IRM


[*********************100%%**********************]  1 of 1 completed


Downloading data for DTE


[*********************100%%**********************]  1 of 1 completed


Downloading data for ETR


[*********************100%%**********************]  1 of 1 completed


Downloading data for GPC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EQR
Downloading data for ALGN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HPE
Downloading data for GRMN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CPAY
Downloading data for BALL


[*********************100%%**********************]  1 of 1 completed


Downloading data for HUBB


[*********************100%%**********************]  1 of 1 completed


Downloading data for AXON


[*********************100%%**********************]  1 of 1 completed


Downloading data for IFF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PTC
Downloading data for DECK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CTRA
Downloading data for SBAC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NTAP
Downloading data for ES


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VLTO
Downloading data for STLD


[*********************100%%**********************]  1 of 1 completed


Downloading data for BAX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for APTV
Downloading data for STE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for INVH
Downloading data for ULTA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MOH
Downloading data for PPL


[*********************100%%**********************]  1 of 1 completed


Downloading data for FE


[*********************100%%**********************]  1 of 1 completed


Downloading data for HBAN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ILMN
Downloading data for TYL


[*********************100%%**********************]  1 of 1 completed


Downloading data for BRO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FSLR
Downloading data for AEE


[*********************100%%**********************]  1 of 1 completed


Downloading data for MKC


[*********************100%%**********************]  1 of 1 completed


Downloading data for OMC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CBOE
Downloading data for DRI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SYF
Downloading data for WAT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for J
Downloading data for RF


[*********************100%%**********************]  1 of 1 completed


Downloading data for CNP


[*********************100%%**********************]  1 of 1 completed


Downloading data for CLX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for ARE
Downloading data for EXPE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WBD
Downloading data for HOLX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LDOS
Downloading data for AVY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VTR
Downloading data for TDY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for COO
Downloading data for TER


[*********************100%%**********************]  1 of 1 completed


Downloading data for CINF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for DPZ



[*********************100%%**********************]  1 of 1 completed


Downloading data for ATO
Downloading data for CMS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PFG
Downloading data for TSN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for UAL
Downloading data for NTRS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TXT
Downloading data for LH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LVS
Downloading data for EQT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SWKS
Downloading data for STX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IEX
Downloading data for EXPD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NRG
Downloading data for CFG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MRO
Downloading data for LUV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FDS
Downloading data for ESS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EG
Downloading data for MAS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for WRB
Downloading data for VRSN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PKG
Downloading data for CE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AKAM
Downloading data for CF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for ENPH
Downloading data for ZBRA



[*********************100%%**********************]  1 of 1 completed


Downloading data for K


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CCL
Downloading data for JBL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MAA
Downloading data for TRMB


[*********************100%%**********************]  1 of 1 completed


Downloading data for DGX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BG
Downloading data for CAG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BBY
Downloading data for POOL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SNA
Downloading data for NDSN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for VTRS
Downloading data for L


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SWK
Downloading data for KEY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for ALB



[*********************100%%**********************]  1 of 1 completed


Downloading data for EPAM
Downloading data for HST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for DOC
Downloading data for JBHT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PNR
Downloading data for AMCR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LYV
Downloading data for WBA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for ROL
Downloading data for RVTY



[*********************100%%**********************]  1 of 1 completed


Downloading data for LNT


[*********************100%%**********************]  1 of 1 completed


Downloading data for KIM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for IPG
Downloading data for LW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for JKHY
Downloading data for WRK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for EVRG
Downloading data for IP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for LKQ
Downloading data for CRL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for SJM
Downloading data for GEN


[*********************100%%**********************]  1 of 1 completed


Downloading data for TAP


[*********************100%%**********************]  1 of 1 completed


Downloading data for PODD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for AES
Downloading data for UDR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for QRVO
Downloading data for EMN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MGM
Downloading data for NI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for JNPR
Downloading data for KMX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HII
Downloading data for ALLE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for FFIV
Downloading data for CPT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BBWI
Downloading data for HRL


[*********************100%%**********************]  1 of 1 completed


Downloading data for AOS


[*********************100%%**********************]  1 of 1 completed


Downloading data for UHS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TECH
Downloading data for CTLT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for APA
Downloading data for MOS


[*********************100%%**********************]  1 of 1 completed


Downloading data for TFX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for INCY
Downloading data for REG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for HSIC
Downloading data for DAY


[*********************100%%**********************]  1 of 1 completed


Downloading data for NWSA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for PAYC
Downloading data for WYNN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for TPR
Downloading data for AIZ


[*********************100%%**********************]  1 of 1 completed


Downloading data for AAL


[*********************100%%**********************]  1 of 1 completed


Downloading data for SOLV


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SOLV']: Exception("%ticker%: Data doesn't exist for startDate = 1104555600, endDate = 1704085200")


Downloading data for CPB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1wk 2005-01-01 -> 2024-01-01)')


Downloading data for BF.B
Downloading data for MTCH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for BXP


[*********************100%%**********************]  1 of 1 completed


Downloading data for GNRC
Downloading data for PNW


[*********************100%%**********************]  1 of 1 completed


Downloading data for HAS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CHRW
Downloading data for ETSY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NCLH
Downloading data for BWA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CZR
Downloading data for FOXA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MKTX
Downloading data for FRT


[*********************100%%**********************]  1 of 1 completed


Downloading data for FMC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RHI
Downloading data for DVA


[*********************100%%**********************]  1 of 1 completed


Downloading data for BEN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for CMA
Downloading data for IVZ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for RL
Downloading data for GL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for MHK
Downloading data for BIO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Downloading data for PARA
Downloading data for FOX



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Downloading data for NWS
All data downloaded and saved.


# FEATURE ENGINEERING


### TECHNICAL INDICATORS:

* MACD (Moving Average Convergence Divergence)
* RSI (Relative Strength Index)
* Bollinger Bands High (the low is commented out)
* ADX (Average Directional Index)
* ATR (Average True Range)
* CCI (Commodity Channel Index)
* MFI (Money Flow Index)
* Stochastic Oscillator
* Keltner Channel
* VWAP (Volume Weighted Average Price)
* EOM (Ease Of Movement)
* VPT (Volume Price Trend)

In [4]:
# 1. Load data
data_yf = pd.read_csv('data/historical_stock_data_final.csv')

In [5]:
data_yf

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2005-01-01,26.799999,27.100000,26.620001,26.670000,18.611694,392522300.0,MSFT
1,2005-01-08,26.600000,26.860001,26.040001,26.120001,18.227879,390071600.0,MSFT
2,2005-01-15,26.030001,26.350000,25.639999,25.650000,17.899891,262141600.0,MSFT
3,2005-01-22,25.760000,26.650000,25.639999,26.180000,18.269747,405236700.0,MSFT
4,2005-01-29,26.350000,26.520000,26.100000,26.320000,18.367455,332545200.0,MSFT
...,...,...,...,...,...,...,...,...
457157,2023-11-27,22.910000,23.530001,22.780001,23.150000,23.065542,5837000.0,NWS
457158,2023-12-04,23.059999,23.469999,22.209999,22.940001,22.856310,3872900.0,NWS
457159,2023-12-11,22.950001,24.600000,22.850000,24.530001,24.440508,6877000.0,NWS
457160,2023-12-18,24.570000,25.020000,24.320000,24.940001,24.849012,3202000.0,NWS


In [6]:
# 2. Clean data_yf: DROP NAs
data_yf.dropna(inplace=True)

# 3. Add technical indicators

data_yf['macd'] = trend.MACD(data_yf['Close']).macd()
data_yf['macd_signal'] = trend.MACD(data_yf['Close']).macd_signal()
data_yf['rsi'] = momentum.RSIIndicator(data_yf['Close']).rsi()
data_yf['bb_high'] = volatility.BollingerBands(data_yf['Close']).bollinger_hband()
#data_yf['bb_low'] = volatility.BollingerBands(data_yf['Close']).bollinger_lband()
data_yf['adx'] = trend.ADXIndicator(data_yf['High'], data_yf['Low'], data_yf['Close']).adx()
data_yf['atr'] = volatility.AverageTrueRange(data_yf['High'], data_yf['Low'], data_yf['Close']).average_true_range()
data_yf['cci'] = trend.CCIIndicator(data_yf['High'], data_yf['Low'], data_yf['Close']).cci()
data_yf['mfi'] = volume.MFIIndicator(data_yf['High'], data_yf['Low'], data_yf['Close'], data_yf['Volume']).money_flow_index()
data_yf['so'] = momentum.StochasticOscillator(data_yf['High'], data_yf['Low'], data_yf['Close']).stoch()
# Calculate additional indicators
data_yf['kc'] = volatility.KeltnerChannel(data_yf['High'], data_yf['Low'], data_yf['Close']).keltner_channel_mband()
data_yf['vwap'] = volume.VolumeWeightedAveragePrice(data_yf['High'], data_yf['Low'], data_yf['Close'], data_yf['Volume']).volume_weighted_average_price()
data_yf['eom'] = volume.EaseOfMovementIndicator(data_yf['High'], data_yf['Low'], data_yf['Close'], data_yf['Volume']).ease_of_movement()
data_yf['vpt'] = volume.VolumePriceTrendIndicator(data_yf['Close'], data_yf['Volume']).volume_price_trend()

In [7]:
data_yf.columns


Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Ticker',
       'macd', 'macd_signal', 'rsi', 'bb_high', 'adx', 'atr', 'cci', 'mfi',
       'so', 'kc', 'vwap', 'eom', 'vpt'],
      dtype='object')

In [8]:
data_yf.shape

(457162, 21)

#### MOVING AVERAGES

In [9]:
# Create lagged features for each indicator for the previous 5 days
indicator_cols = [col for col in data_yf.columns if col not in ['win', 'gain_loss_pct', 'Open', 'High', 'Low', 'Close', 'Volume','Adj Close']]
# Only including the moving average of 4 weeks and 12 weeks since this is long term trading
lags_to_include = [4, 12]

for col in indicator_cols:
    for i in lags_to_include:
        data_yf[f'{col}_lag_{i}'] = data_yf[col].shift(i)

# Remove rows with NaN values after shifting
data_yf.dropna(inplace=True)


In [10]:
# 4. Calculate gain/loss percentage and assign -1, 0, 1
data_yf['gain_loss_pct'] = (data_yf['Close'] - data_yf['Open']) / data_yf['Open']
data_yf['win'] = np.where(data_yf['gain_loss_pct'] > 0.005, 1, 
                          np.where(data_yf['gain_loss_pct'] < -0.0025, -1, 0)) # 1 if 0.5% gain, -1 if 0.25% loss, 0 otherwise
data_yf['Date'] = pd.to_datetime(data_yf['Date'])


In [11]:
# 5. Remove stocks that have not existed before 01/01/2005
first_appearance_dates = data_yf.groupby('Ticker')['Date'].min().sort_values(ascending=False)
first_appearance_dates

Ticker
VLTO   2023-10-02
KVUE   2023-05-01
GEHC   2022-12-12
CEG    2022-01-17
ABNB   2020-12-07
          ...    
F      2005-01-01
EXR    2005-01-01
EXPD   2005-01-01
EXC    2005-01-01
ISRG   2005-01-01
Name: Date, Length: 499, dtype: datetime64[ns]

In [12]:
#check when the stocks first appeared
first_appearance_dates = pd.to_datetime(first_appearance_dates)

#cutoff date 2005-01-01
cutoff_date = pd.Timestamp('2005-01-01')

#Only keeop stocks that have appeared on/before 2005-01-01
valid_tickers = first_appearance_dates[first_appearance_dates <= cutoff_date].index

data_yf = data_yf[data_yf['Ticker'].isin(valid_tickers)]

In [13]:
data_yf.to_csv('data/raw_data_yf.csv', index=False)

In [14]:
from sklearn.preprocessing import StandardScaler

float_columns = data_yf.select_dtypes(include=['float64']).columns

# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize only the float columns
data_yf[float_columns] = scaler.fit_transform(data_yf[float_columns])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_yf[float_columns] = scaler.fit_transform(data_yf[float_columns])


In [15]:
# create csv with features data
data_yf.to_csv('data/all_stock_data.csv', index=False)