In [38]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import requests
from io import StringIO
from datetime import timedelta

# 1. 獲取 S&P 500 成分股列表
def get_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'id': 'constituents'})
    tickers = pd.read_html(StringIO(str(table)))[0]['Symbol'].tolist()
    return tickers


# 2. 下載數據，並處理無法下載的股票
def get_stock_data(tickers, start_date, end_date):
    df = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    return df
    
# 獲取 S&P 500 成分股列表
sp500_tickers = get_sp500_tickers()
# 設定參數
bt_start_date = '2000-01-01'
bt_end_date = '2024-12-19'

# 下載前 50 支股票的數據（避免 API 限制）
top_tickers = sp500_tickers
data = get_stock_data(top_tickers, bt_start_date, bt_end_date)
#data.index = pd.to_datetime(data.index).strftime('%Y-%m-%d')
#data.index = data.index.strftime('%Y-%m-%d')


[*********************100%***********************]  503 of 503 completed

2 Failed downloads:
['BF.B']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2000-01-01 -> 2024-12-19)')
['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


In [39]:
data.index = data.index.tz_localize(None)
data.to_csv('S&P_stock_data.csv')

In [40]:
test_data = pd.read_csv('S&P_stock_data.csv', index_col=0, parse_dates=True)

In [41]:
print(test_data.index)

DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06',
               '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12',
               '2000-01-13', '2000-01-14',
               ...
               '2024-12-05', '2024-12-06', '2024-12-09', '2024-12-10',
               '2024-12-11', '2024-12-12', '2024-12-13', '2024-12-16',
               '2024-12-17', '2024-12-18'],
              dtype='datetime64[ns]', name='Date', length=6281, freq=None)


In [42]:
test_data.loc['2014-01']

Unnamed: 0_level_0,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,36.747051,17.234301,33.291313,,30.904148,19.123333,67.04966,59.290001,38.885151,31.97467,...,97.533386,20.373344,166.445068,19.504488,63.254978,29.677324,43.759407,82.166473,53.18,29.8307
2014-01-03,37.211197,16.855732,33.496254,,31.2356,18.756666,67.272789,59.16,39.145531,32.123425,...,96.650314,20.418949,164.813705,19.497398,63.102787,29.946627,44.033295,82.522797,53.580002,29.544918
2014-01-06,37.028172,16.947643,32.272961,,31.647863,18.58,66.56205,58.119999,38.924599,32.190361,...,96.672409,20.210453,164.69603,19.391045,63.197922,29.894524,43.998333,83.057266,53.400002,29.480389
2014-01-07,37.557682,16.826448,32.337017,,31.405334,18.586666,67.371964,58.970001,39.129742,31.870535,...,96.959381,20.269102,169.446991,19.724283,64.092056,29.981388,44.616062,84.714142,53.950001,29.591009
2014-01-08,38.172211,16.933002,32.253757,,31.688272,18.476667,67.892624,58.900002,39.224442,31.5061,...,97.113892,20.080153,172.625549,19.646292,63.882786,29.964006,44.598591,86.78965,53.91,29.259146
2014-01-09,38.18528,16.716764,32.804558,,31.744865,18.643333,68.553795,59.09,39.074512,31.357334,...,97.798286,20.197432,172.188354,19.724283,63.261345,30.085632,43.736099,85.934525,54.16,29.46195
2014-01-10,38.525234,16.605221,32.599606,,31.987364,18.536667,68.760414,59.529999,39.082409,31.223463,...,97.599579,20.236523,173.945755,20.029142,63.743271,30.328899,43.718613,86.121574,54.279999,29.996626
2014-01-13,38.525234,16.69215,32.167088,,31.792282,18.379999,66.991814,58.599998,38.561626,30.888765,...,97.290543,19.923794,171.095215,19.880262,62.494019,30.424473,42.780373,85.204079,53.66,29.296021
2014-01-14,39.146297,17.024298,32.664162,,32.166203,18.49,67.727325,60.369999,39.224442,31.483778,...,97.908669,20.15834,174.568024,19.936975,62.855473,30.91099,42.675472,86.228455,54.599998,29.360554
2014-01-15,39.44701,17.366093,32.347843,,32.133694,18.299999,69.603401,61.68,39.469048,31.573036,...,98.306053,20.230005,176.670242,19.866083,62.639881,31.597303,42.857063,87.039101,54.619999,29.323681


In [43]:
test_data.index = test_data.index.tz_localize(None)

In [44]:
test_data.loc['2014-01']

Unnamed: 0_level_0,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,36.747051,17.234301,33.291313,,30.904148,19.123333,67.04966,59.290001,38.885151,31.97467,...,97.533386,20.373344,166.445068,19.504488,63.254978,29.677324,43.759407,82.166473,53.18,29.8307
2014-01-03,37.211197,16.855732,33.496254,,31.2356,18.756666,67.272789,59.16,39.145531,32.123425,...,96.650314,20.418949,164.813705,19.497398,63.102787,29.946627,44.033295,82.522797,53.580002,29.544918
2014-01-06,37.028172,16.947643,32.272961,,31.647863,18.58,66.56205,58.119999,38.924599,32.190361,...,96.672409,20.210453,164.69603,19.391045,63.197922,29.894524,43.998333,83.057266,53.400002,29.480389
2014-01-07,37.557682,16.826448,32.337017,,31.405334,18.586666,67.371964,58.970001,39.129742,31.870535,...,96.959381,20.269102,169.446991,19.724283,64.092056,29.981388,44.616062,84.714142,53.950001,29.591009
2014-01-08,38.172211,16.933002,32.253757,,31.688272,18.476667,67.892624,58.900002,39.224442,31.5061,...,97.113892,20.080153,172.625549,19.646292,63.882786,29.964006,44.598591,86.78965,53.91,29.259146
2014-01-09,38.18528,16.716764,32.804558,,31.744865,18.643333,68.553795,59.09,39.074512,31.357334,...,97.798286,20.197432,172.188354,19.724283,63.261345,30.085632,43.736099,85.934525,54.16,29.46195
2014-01-10,38.525234,16.605221,32.599606,,31.987364,18.536667,68.760414,59.529999,39.082409,31.223463,...,97.599579,20.236523,173.945755,20.029142,63.743271,30.328899,43.718613,86.121574,54.279999,29.996626
2014-01-13,38.525234,16.69215,32.167088,,31.792282,18.379999,66.991814,58.599998,38.561626,30.888765,...,97.290543,19.923794,171.095215,19.880262,62.494019,30.424473,42.780373,85.204079,53.66,29.296021
2014-01-14,39.146297,17.024298,32.664162,,32.166203,18.49,67.727325,60.369999,39.224442,31.483778,...,97.908669,20.15834,174.568024,19.936975,62.855473,30.91099,42.675472,86.228455,54.599998,29.360554
2014-01-15,39.44701,17.366093,32.347843,,32.133694,18.299999,69.603401,61.68,39.469048,31.573036,...,98.306053,20.230005,176.670242,19.866083,62.639881,31.597303,42.857063,87.039101,54.619999,29.323681


In [45]:
test_data.loc['2014-01-02':'2014-01-09']

Unnamed: 0_level_0,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,36.747051,17.234301,33.291313,,30.904148,19.123333,67.04966,59.290001,38.885151,31.97467,...,97.533386,20.373344,166.445068,19.504488,63.254978,29.677324,43.759407,82.166473,53.18,29.8307
2014-01-03,37.211197,16.855732,33.496254,,31.2356,18.756666,67.272789,59.16,39.145531,32.123425,...,96.650314,20.418949,164.813705,19.497398,63.102787,29.946627,44.033295,82.522797,53.580002,29.544918
2014-01-06,37.028172,16.947643,32.272961,,31.647863,18.58,66.56205,58.119999,38.924599,32.190361,...,96.672409,20.210453,164.69603,19.391045,63.197922,29.894524,43.998333,83.057266,53.400002,29.480389
2014-01-07,37.557682,16.826448,32.337017,,31.405334,18.586666,67.371964,58.970001,39.129742,31.870535,...,96.959381,20.269102,169.446991,19.724283,64.092056,29.981388,44.616062,84.714142,53.950001,29.591009
2014-01-08,38.172211,16.933002,32.253757,,31.688272,18.476667,67.892624,58.900002,39.224442,31.5061,...,97.113892,20.080153,172.625549,19.646292,63.882786,29.964006,44.598591,86.78965,53.91,29.259146
2014-01-09,38.18528,16.716764,32.804558,,31.744865,18.643333,68.553795,59.09,39.074512,31.357334,...,97.798286,20.197432,172.188354,19.724283,63.261345,30.085632,43.736099,85.934525,54.16,29.46195


In [48]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# 定義目標 URL（以 Wikipedia 羅素 1000 頁面為例）
url = "https://en.wikipedia.org/wiki/Russell_1000_Index"

# 發送 HTTP GET 請求
response = requests.get(url)
if response.status_code == 200:
    # 解析網頁內容
    soup = BeautifulSoup(response.content, "html.parser")
    
    # 找到成分股表格
    table = soup.find("table", {"class": "wikitable sortable"})
    
    # 確認表格是否存在
    if table:
        # 提取表格內容
        rows = table.find_all("tr")
        data = []
        for row in rows[1:]:  # 跳過表頭
            cells = row.find_all("td")
            if len(cells) > 1:  # 確保行有足夠的列
                company = cells[0].text.strip()
                symbol = cells[1].text.strip()
                sector = cells[2].text.strip() if len(cells) > 2 else None
                data.append({"Company": company, "Symbol": symbol, "Sector": sector})
        
        # 將資料轉為 DataFrame 並保存為 CSV
        df = pd.DataFrame(data)
        df.to_csv("russell_1000_components.csv", index=False)
        print("已成功提取羅素 1000 指數成分股並儲存為 CSV！")
    else:
        print("無法找到目標表格，請檢查 URL 或網頁結構是否變更。")
else:
    print(f"無法訪問網頁，狀態碼：{response.status_code}")


已成功提取羅素 1000 指數成分股並儲存為 CSV！


In [53]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import requests
from io import StringIO
from datetime import timedelta



# 2. 下載數據，並處理無法下載的股票
def get_stock_data(tickers, start_date, end_date):
    df = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    return df
    
# 獲取 S&P 500 成分股列表
russel_1000_tickers = list(pd.read_csv('russell_1000_components.csv')['Symbol'])
#print(russel_1000_tickers)
# 設定參數
#bt_start_date = '2000-01-01'
bt_start_date = '2000-01-01'
bt_end_date = '2024-12-19'

# 下載前 50 支股票的數據（避免 API 限制）
data = get_stock_data(russel_1000_tickers , bt_start_date, bt_end_date)


#data.index = pd.to_datetime(data.index).strftime('%Y-%m-%d')
#data.index = data.index.strftime('%Y-%m-%d')


[*********************100%***********************]  1006 of 1006 completed

7 Failed downloads:
['BF.A', 'CWEN.A', 'HEI.A', 'BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
['BF.B', 'LEN.B', 'UHAL.B']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2000-01-01 -> 2024-12-19)')


In [54]:
data.index = data.index.tz_localize(None)
data.to_csv('russell_1000_stock_data.csv')