# import library

In [1]:
import datetime as dt
import pandas as pd
import requests

# 1. Wiki Pedia 데이터 추출

In [2]:
# 봇 방지용 헤더
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

wiki_response = requests.get(url, headers=headers)

if wiki_response.status_code == 200:
    wiki_data = pd.read_html(wiki_response.text)
    print(f"We have a {type(wiki_data)} with length {len(wiki_data)}.")
    # print(wiki_data)
else:
    print(f"Error: {wiki_response.status_code}")

  wiki_data = pd.read_html(wiki_response.text)


We have a <class 'list'> with length 2.


# 2. 현재 S&P 500 종목

- Symbol : ticker
- Security : 종목명
- GICS Sector : 섹터
- GICS Sub-Industry : 하위 섹터
- Headquarters Location
- Date added
- CIK : 고유 식별 번호
- Founded

GICS는 11개의 주요 섹터로 구성되어 있습니다.

에너지 (Energy)

소재 (Materials)

산업재 (Industrials)

경기소비재 (Consumer Discretionary)

필수소비재 (Consumer Staples)

헬스케어 (Health Care)

금융 (Financials)

정보기술 (Information Technology)

통신서비스 (Communication Services)

유틸리티 (Utilities)

부동산 (Real Estate)

In [3]:
old_cols = list(wiki_data[0].columns)
new_cols = ['ticker', 'security', 'gics_sector', 'gics_sub_industry', 'headquarters_location', 'date_added', 'cik', 'founded']

wiki_data[0].columns = new_cols
# wiki_data[0].head()

sp500_current = wiki_data[0].copy()
sp500_current.loc[:, 'cik'] = sp500_current['cik'].apply(str).str.zfill(10)
# sp500_current.head()

 '0000796343' '0000002488' '0000874761' '0000004977' '0001090872'
 '0000002969' '0001559720' '0001086222' '0000915913' '0001035443'
 '0001097149' '0001579241' '0000352541' '0000899051' '0001652044'
 '0001652044' '0000764180' '0001018724' '0001748790' '0001002910'
 '0000004904' '0000004962' '0000005272' '0001053507' '0001410636'
 '0000820027' '0001037868' '0000318154' '0000820313' '0000006281'
 '0000315293' '0001841666' '0001858681' '0000320193' '0000006951'
 '0001521332' '0000947484' '0000007084' '0001596532' '0000354190'
 '0001267238' '0000732717' '0000731802' '0000769397' '0000008670'
 '0000866787' '0000915912' '0000008818' '0001069183' '0001701605'
 '0000009389' '0000070858' '0000010456' '0000010795' '0001067983'
 '0000764478' '0000842023' '0000875045' '0002012383' '0001393818'
 '0001512673' '0001390777' '0000012927' '0001075531' '0000885725'
 '0000014272' '0001730168' '0001383312' '0000079282' '0000014693'
 '0001316835' '0001996862' '0001037540' '0001043277' '0000813672'
 '00015908

# 3. 조정 기록
- 일부 S&P500 편입/퇴출 기록

In [4]:
# deep copy
sp500_inout = wiki_data[1].copy()
# edit column
sp500_inout.columns = ['date_modified', 'ticker_added', 'security_added', 'ticker_removed', 'security_removed', 'reason']
# edit date column
sp500_inout.date_modified = pd.to_datetime(sp500_inout.date_modified)
sp500_inout.date_modified = sp500_inout.date_modified.dt.strftime('%Y-%m-%d')

# sp500_added
sp500_added = sp500_inout[~sp500_inout['ticker_added'].isnull()][['date_modified','ticker_added','security_added','reason']]
sp500_added.columns = ['date_modified', 'ticker', 'security', 'reason']
sp500_added['action'] = 'added'

# sp500_removed
sp500_removed = sp500_inout[~sp500_inout['ticker_removed'].isnull()][['date_modified','ticker_removed','security_removed','reason']]
sp500_removed.columns = ['date_modified', 'ticker', 'security', 'reason']
sp500_removed['action'] = 'removed'

# sp500_history
sp500_history = pd.concat([sp500_added, sp500_removed])

# unique ticker


In [14]:
len(sp500_current.ticker)

503

In [13]:
len(sp500_history.ticker)

724

In [26]:
ticker_unique = set(sp500_history.ticker) | set(sp500_current.ticker)
len(ticker_unique)

858

# 외부 자료
- Trading Evolved 자료, 96년부터 히스토리 저장됨

In [20]:
ticker_start_end = pd.read_csv('sp500_ticker_start_end.csv')
len(ticker_start_end)

1230

In [27]:
trading_evolved_ticker = ticker_start_end.ticker.unique()
len(trading_evolved_ticker)

1181

# YFinance 활용
- ticker 정보 받아오기
- ticker history 받아오기

In [30]:
ticker_list = trading_evolved_ticker.tolist()

In [32]:
import yfinance as yf

tickers = yf.Tickers(ticker_list)

In [33]:
tickers

yfinance.Tickers object <A,AABA,AAL,AAMRQ,AAP,AAPL,ABBV,ABC,ABI,ABKFQ,ABMD,ABNB,ABS,ABT,ABX,ACAS,ACGL,ACKH,ACN,ACS,ACV,ADBE,ADCT,ADI,ADM,ADP,ADS,ADSK,ADT,AEE,AEP,AES,AET,AFL,AFS.A,AGC,AGN,AHM,AIG,AIT,AIV,AIZ,AJG,AKAM,AKS,AL,ALB,ALGN,ALK,ALL,ALLE,ALTR,ALXN,AM,AMAT,AMCC,AMCR,AMD,AME,AMG,AMGN,AMH,AMP,AMT,AMTM,AMZN,AN,ANDV,ANDW,ANET,ANF,ANRZQ,ANSS,ANTM,ANV,AON,AOS,APA,APC,APCC,APD,APH,APO,APOL,APTV,AR,ARC,ARE,ARG,ARNC,AS,ASC,ASH,ASN,ASND,ASO,AT,ATGE,ATI,ATO,ATVI,AV,AVB,AVGO,AVP,AVY,AW,AWE,AWK,AXON,AXP,AYE,AYI,AZA.A,AZO,BA,BAC,BALL,BAX,BAY,BBBY,BBI,BBT,BBWI,BBY,BC,BCO,BCR,BDK,BDX,BEAM,BEN,BEV,BF.B,BFI,BFO,BG,BGEN,BGG,BHF,BHGE,BHMSQ,BIG,BIIB,BIO,BJS,BK,BKB,BKNG,BKR,BLDR,BLK,BLL,BLS,BLY,BMC,BMET,BMGCA,BMS,BMY,BNI,BNL,BOAT,BOL,BR,BRCM,BRK.B,BRL,BRO,BSC,BSX,BT,BTUUQ,BUD,BVSN,BWA,BX,BXLT,BXP,C,CA,CAG,CAH,CAL,CAM,CAR,CARR,CAT,CB,CBB,CBE,CBH,CBOE,CBRE,CBS,CBSS,CCB,CCE,CCEP,CCI,CCK,CCL,CCTYQ,CCU,CDAY,CDNS,CDW,CE,CEG,CELG,CEN,CEPH,CERN,CF,CFC,CFG,CFL,CFN,CG,CGP,CHA,CHD,CHIR,CHK,CHRS,CHRW,CHTR,CI,CIE

In [37]:
tickers.tickers['A'].info

{'address1': '5301 Stevens Creek Boulevard',
 'city': 'Santa Clara',
 'state': 'CA',
 'zip': '95051',
 'country': 'United States',
 'phone': '800 227 9770',
 'fax': '866 497 1134',
 'website': 'https://www.agilent.com',
 'industry': 'Diagnostics & Research',
 'industryKey': 'diagnostics-research',
 'industryDisp': 'Diagnostics & Research',
 'sector': 'Healthcare',
 'sectorKey': 'healthcare',
 'sectorDisp': 'Healthcare',
 'longBusinessSummary': "Agilent Technologies, Inc. provides application focused solutions to the life sciences, diagnostics, and applied chemical markets worldwide. The company operates in three segments: Life Sciences and Applied Markets, Diagnostics and Genomics, and Agilent CrossLab. The Life Sciences and Applied Markets segment offers liquid chromatography systems and components; liquid chromatography mass spectrometry systems; gas chromatography systems and components; gas chromatography mass spectrometry systems; inductively coupled plasma mass spectrometry instr

In [40]:
tickers.tickers['A'].news

[{'id': 'fc8efa1f-3a92-3819-814a-d2d3d985975a',
  'content': {'id': 'fc8efa1f-3a92-3819-814a-d2d3d985975a',
   'contentType': 'STORY',
   'title': '3 Reasons to Sell A and 1 Stock to Buy Instead',
   'description': '',
   'summary': 'While the S&P 500 is up 15.7% since March 2025, Agilent (currently trading at $128.85 per share) has lagged behind, posting a return of 5.7%. This might have investors contemplating their next move.',
   'pubDate': '2025-09-09T04:02:59Z',
   'displayTime': '2025-09-09T04:02:59Z',
   'isHosted': True,
   'bypassModal': False,
   'previewUrl': None,
   'thumbnail': {'originalUrl': 'https://media.zenfs.com/en/stockstory_922/275775ceb6d8493aad634ebe586a4254',
    'originalWidth': 1400,
    'originalHeight': 700,
    'caption': 'A Cover Image',
    'resolutions': [{'url': 'https://s.yimg.com/uu/api/res/1.2/CdcKJkL9Cc_qGPuujKaBfw--~B/aD03MDA7dz0xNDAwO2FwcGlkPXl0YWNoeW9u/https://media.zenfs.com/en/stockstory_922/275775ceb6d8493aad634ebe586a4254',
      'width': 1

## Alpha Advantage API

In [23]:
import requests

# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
url = 'https://www.alphavantage.co/query?function=OVERVIEW&symbol=AN&apikey=undefined'
r = requests.get(url)
data = r.json()

print(data)

{'Symbol': 'AN', 'AssetType': 'Common Stock', 'Name': 'AutoNation Inc', 'Description': 'AutoNation, Inc. is an automobile retailer in the United States. The company is headquartered in Fort Lauderdale, Florida.', 'CIK': '350698', 'Exchange': 'NYSE', 'Currency': 'USD', 'Country': 'USA', 'Sector': 'CONSUMER CYCLICAL', 'Industry': 'AUTO & TRUCK DEALERSHIPS', 'Address': '200 SW 1ST AVENUE, FORT LAUDERDALE, FL, UNITED STATES, 33301', 'OfficialSite': 'https://www.autonation.com', 'FiscalYearEnd': 'December', 'LatestQuarter': '2025-06-30', 'MarketCapitalization': '8566049000', 'EBITDA': '1641600000', 'PERatio': '14.26', 'PEGRatio': '0.955', 'BookValue': '65.49', 'DividendPerShare': 'None', 'DividendYield': 'None', 'EPS': '15.93', 'RevenuePerShareTTM': '702.41', 'ProfitMargin': '0.0231', 'OperatingMarginTTM': '0.0506', 'ReturnOnAssetsTTM': '0.066', 'ReturnOnEquityTTM': '0.272', 'RevenueTTM': '27464100000', 'GrossProfitTTM': '4919700000', 'DilutedEPSTTM': '15.93', 'QuarterlyEarningsGrowthYOY': 

In [25]:
data["Symbol"]

'AN'