**Importing SET top 20 stocks**

In [5]:
import sklearn
import numpy
sklearn.__version__
numpy.__version__

'1.26.0'

In [5]:
import yfinance as yf
import pandas as pd

# Get all the tickers from SET
def get_SET_tickers():
    link = "https://en.wikipedia.org/wiki/SET50_Index_and_SET100_Index"
    SET_table = pd.read_html(link, header=0)[0]
    return SET_table[['Symbol', 'Securities Name', 'Sector']]
SET_data = get_SET_tickers()

In [12]:
# since the Thai stock name in yfinance is continue with .bk , all the name get above need to change to ....bk
SET_data['Symbol'] = [f"{symbol}.bk" for symbol in SET_data['Symbol']]

In [14]:
SET_data

Unnamed: 0,Symbol,Securities Name,Sector
0,ADVANC.bk,Advanced Info Service,Information & Communication Technology
1,AOT.bk,Airports of Thailand,Transportation & Logistics
2,AWC.bk,Asset World Corp,Property Development
3,BANPU.bk,Banpu,Energy & Utilities
4,BBL.bk,Bangkok Bank,Banking
5,BDMS.bk,Bangkok Dusit Medical Service,Health Care Services
6,BEM.bk,Bangkok Expressway and Metro,Transportation & Logistics
7,BGRIM.bk,B.Grimm Power,Energy & Utilities
8,BH.bk,Bumrungrad International Hospital,Health Care Services
9,BTS.bk,BTS Group Holdings,Transportation & Logistics


In [18]:
# function to get historical stock prices
def download_stock_data(tickers, start_date, end_date):
    return yf.download(tickers, start=start_date, end=end_date, progress=False)

In [34]:
# find market cap from yfinance
def get_market_cap(ticker):
        stock = yf.Ticker(ticker)
        return stock.info.get('marketCap', None)  # Market cap in USD
SET_data['MarketCap'] = SET_data['Symbol'].apply(get_market_cap)

# select top 20
top20_stocks = SET_data.sort_values(by='MarketCap', ascending=False).head(24)  # wants 20 but put 24 because some of the stocks are just IPO

# Combine the top 20 tickers
data = download_stock_data(top20_stocks['Symbol'].tolist(), start_date="2014-01-01", end_date="2024-12-12")

# Save the data to a CSV file
data.to_csv("../Data/top20_set_prices.csv");

In [35]:
data

Price,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,ADVANC.BK,AOT.BK,BBL.BK,BDMS.BK,BH.BK,CPALL.BK,CPF.BK,CPN.BK,CRC.BK,DELTA.BK,...,KBANK.BK,KTB.BK,MINT.BK,OR.BK,PTT.BK,PTTEP.BK,SCB.BK,SCC.BK,TRUE.BK,TTB.BK
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,115.707695,13.004045,112.167664,8.865041,68.680244,33.136929,22.704105,31.580967,,4.015339,...,17348400,84143600,11274079,,34484000,5548200,,1199400,194202011,258678302
2014-01-03,121.276527,12.870900,113.856918,8.824374,69.914032,33.136929,22.704105,31.159885,,3.846470,...,14007700,57976000,9399961,,46203000,5289100,,2618000,296473487,287311397
2014-01-06,124.370323,12.648986,117.235466,9.678345,69.091530,33.344040,21.773605,31.580967,,3.696364,...,17816000,68818900,19638150,,62204000,7342300,,1589200,192726833,189481489
2014-01-07,129.939133,13.447869,120.276161,9.556350,68.474625,33.965351,22.704105,32.844200,,3.771417,...,11880600,76791600,11183958,,76936000,11419700,,2154100,149445951,257526618
2014-01-08,127.464096,12.870900,119.938301,9.312360,67.446480,33.551147,22.145803,32.633663,,3.790180,...,16142600,72450600,10281072,,60965000,6399400,,1190200,161748897,245327978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-03,296.000000,61.209999,150.500000,26.000000,212.000000,63.750000,24.200001,60.500000,34.50,151.000000,...,17862300,20105200,16294100,5921900.0,17691100,4910000,11866600.0,3259900,37514700,94356000
2024-12-04,289.000000,61.500000,151.500000,25.750000,211.000000,64.000000,24.100000,60.500000,33.75,151.000000,...,16038900,37336500,25590400,4270200.0,30338600,4505000,12380400.0,3534900,52679100,136620600
2024-12-06,295.000000,61.000000,152.000000,25.500000,210.000000,63.000000,24.100000,60.250000,34.00,150.500000,...,7544400,22619700,9642900,6112100.0,24310500,2427800,5165800.0,2436600,52769200,286700300
2024-12-09,295.000000,60.500000,153.000000,25.250000,202.000000,62.750000,23.900000,59.750000,34.00,152.500000,...,17093200,86469800,10810400,6942300.0,12600200,5833300,6849700.0,2925600,22723500,152718400
