**Importing SET top 20 stocks**

In [1]:
import sklearn
import numpy
sklearn.__version__
numpy.__version__

'1.26.0'

In [2]:
import yfinance as yf
import pandas as pd

# Get all the tickers from SET
def get_SET_tickers():
    link = "https://en.wikipedia.org/wiki/SET50_Index_and_SET100_Index"
    SET_table = pd.read_html(link, header=0)[0]
    return SET_table[['Symbol', 'Securities Name', 'Sector']]
SET_data = get_SET_tickers()

In [3]:
# since the Thai stock name in yfinance is continue with .bk , all the name get above need to change to ....bk
SET_data['Symbol'] = [f"{symbol}.bk" for symbol in SET_data['Symbol']]

In [4]:
SET_data

Unnamed: 0,Symbol,Securities Name,Sector
0,ADVANC.bk,Advanced Info Service,Information & Communication Technology
1,AOT.bk,Airports of Thailand,Transportation & Logistics
2,AWC.bk,Asset World Corp,Property Development
3,BANPU.bk,Banpu,Energy & Utilities
4,BBL.bk,Bangkok Bank,Banking
5,BDMS.bk,Bangkok Dusit Medical Service,Health Care Services
6,BEM.bk,Bangkok Expressway and Metro,Transportation & Logistics
7,BGRIM.bk,B.Grimm Power,Energy & Utilities
8,BH.bk,Bumrungrad International Hospital,Health Care Services
9,BTS.bk,BTS Group Holdings,Transportation & Logistics


In [5]:
# function to get historical stock prices
def download_stock_data(tickers, start_date, end_date):
    return yf.download(tickers, start=start_date, end=end_date, progress=False)

In [16]:
# find market cap from yfinance
def get_market_cap(ticker):
        stock = yf.Ticker(ticker)
        return stock.info.get('marketCap', None)  # Market cap in USD
SET_data['MarketCap'] = SET_data['Symbol'].apply(get_market_cap)

# select top 20
top20_stocks = SET_data.sort_values(by='MarketCap', ascending=False).head(24)  # wants 20 but put 24 because some of the stocks are just IPO

# Combine the top 20 tickers
data = download_stock_data(top20_stocks['Symbol'].tolist(), start_date="2014-01-01", end_date="2023-12-12")

# Save the data to a CSV file
data.to_csv("../Data/top20_set_prices_2023.csv");

In [17]:
data

Price,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,ADVANC.BK,AOT.BK,BBL.BK,BDMS.BK,BH.BK,CPALL.BK,CPF.BK,CPN.BK,CRC.BK,DELTA.BK,...,KTB.BK,MINT.BK,OR.BK,PTT.BK,PTTEP.BK,SCB.BK,SCC.BK,TLI.BK,TRUE.BK,TTB.BK
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,115.707710,13.004046,112.167671,8.865039,68.680229,33.136932,22.704105,31.580961,,4.015339,...,84143600,11274079,,34484000,5548200,,1199400,,194202011,258678302
2014-01-03,121.276581,12.870898,113.856934,8.824374,69.914032,33.136932,22.704105,31.159883,,3.846470,...,57976000,9399961,,46203000,5289100,,2618000,,296473487,287311397
2014-01-06,124.370300,12.648988,117.235466,9.678346,69.091507,33.344040,21.773603,31.580961,,3.696363,...,68818900,19638150,,62204000,7342300,,1589200,,192726833,189481489
2014-01-07,129.939148,13.447870,120.276161,9.556350,68.474632,33.965359,22.704105,32.844204,,3.771417,...,76791600,11183958,,76936000,11419700,,2154100,,149445951,257526618
2014-01-08,127.464096,12.870898,119.938316,9.312359,67.446480,33.551136,22.145803,32.633663,,3.790180,...,72450600,10281072,,60965000,6399400,,1190200,,161748897,245327978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-01,209.930481,57.408073,142.665573,25.124046,216.196136,52.335838,18.756201,66.116920,37.843410,76.990875,...,33355600,8198400,9524100.0,62257800,12384100,9681100.0,515600,6649800.0,55371400,120103500
2023-12-04,211.847656,58.880070,142.665573,25.611895,215.217865,52.335838,18.952599,65.630768,38.089146,75.997444,...,23830400,11264400,10004400.0,77737900,6361100,5770800.0,1527200,4693700.0,38289700,132025100
2023-12-06,211.847656,58.880070,142.665573,25.124046,212.283081,52.335838,18.854401,65.144615,37.843410,77.735947,...,43358100,6197300,8599200.0,55977900,16395500,7223200.0,1052600,2771800.0,54433500,195694800
2023-12-07,210.889069,58.880070,141.714462,25.124046,212.283081,52.335838,18.559799,63.929230,37.843410,75.500732,...,32145700,9858300,4867300.0,66960800,29602800,7261300.0,1306800,12274700.0,104695000,102350200
