**Importing SET top 20 stocks**

In [2]:
import sklearn
import numpy
sklearn.__version__
numpy.__version__

'1.26.0'

In [3]:
import yfinance as yf
import pandas as pd

# Get all the tickers from SET
def get_SET_tickers():
    link = "https://en.wikipedia.org/wiki/SET50_Index_and_SET100_Index"
    SET_table = pd.read_html(link, header=0)[0]
    return SET_table[['Symbol', 'Securities Name', 'Sector']]
SET_data = get_SET_tickers()

In [4]:
# since the Thai stock name in yfinance is continue with .bk , all the name get above need to change to ....bk
SET_data['Symbol'] = [f"{symbol}.bk" for symbol in SET_data['Symbol']]

In [5]:
SET_data

Unnamed: 0,Symbol,Securities Name,Sector
0,ADVANC.bk,Advanced Info Service,Information & Communication Technology
1,AOT.bk,Airports of Thailand,Transportation & Logistics
2,AWC.bk,Asset World Corp,Property Development
3,BANPU.bk,Banpu,Energy & Utilities
4,BBL.bk,Bangkok Bank,Banking
5,BDMS.bk,Bangkok Dusit Medical Service,Health Care Services
6,BEM.bk,Bangkok Expressway and Metro,Transportation & Logistics
7,BGRIM.bk,B.Grimm Power,Energy & Utilities
8,BH.bk,Bumrungrad International Hospital,Health Care Services
9,BTS.bk,BTS Group Holdings,Transportation & Logistics


In [6]:
# function to get historical stock prices
def download_stock_data(tickers, start_date, end_date):
    return yf.download(tickers, start=start_date, end=end_date, progress=False)

In [7]:
# find market cap from yfinance
def get_market_cap(ticker):
        stock = yf.Ticker(ticker)
        return stock.info.get('marketCap', None)  # Market cap in USD
SET_data['MarketCap'] = SET_data['Symbol'].apply(get_market_cap)

# select top 20
top20_stocks = SET_data.sort_values(by='MarketCap', ascending=False).head(24)  # wants 20 but put 24 because some of the stocks are just IPO

# Combine the top 20 tickers
data = download_stock_data(top20_stocks['Symbol'].tolist(), start_date="2014-01-01", end_date="2024-12-12")

# Save the data to a CSV file
data.to_csv("../Data/top20_set_prices.csv");

In [8]:
data['Adj Close'].columns

Index(['ADVANC.BK', 'AOT.BK', 'BBL.BK', 'BDMS.BK', 'BH.BK', 'CPALL.BK',
       'CPF.BK', 'CPN.BK', 'CRC.BK', 'DELTA.BK', 'GULF.BK', 'INTUCH.BK',
       'IVL.BK', 'KBANK.BK', 'KTB.BK', 'MINT.BK', 'OR.BK', 'PTT.BK',
       'PTTEP.BK', 'SCB.BK', 'SCC.BK', 'TLI.BK', 'TRUE.BK', 'TTB.BK'],
      dtype='object', name='Ticker')

In [19]:
# Assuming SET_data is your DataFrame
symbols = [
    'ADVANC.BK', 'AOT.BK', 'BBL.BK', 'BDMS.BK', 'BH.BK', 'CPALL.BK',
    'CPF.BK', 'CPN.BK', 'CRC.BK', 'DELTA.BK', 'GULF.BK', 'INTUCH.BK',
    'IVL.BK', 'KBANK.BK', 'KTB.BK', 'MINT.BK', 'OR.BK', 'PTT.BK',
    'PTTEP.BK', 'SCB.BK', 'SCC.BK', 'TLI.BK', 'TRUE.BK', 'TTB.BK'
]
updated_symbols = [symbol.replace('.BK', '.bk') for symbol in symbols]
df = SET_data[SET_data['Symbol'].isin(updated_symbols)]

In [29]:
df = df.sort_values(by = "MarketCap", ascending = False).head(20)

In [35]:
df.to_excel("../Data/sector_visual_set.xlsx")