##### **Install Package and Libraries**

In [None]:
# Install package
!pip install selenium
!pip install webdriver-manager

# Install library
import datetime as dt
import datetime
import numpy as np
import os
import pandas as pd
from selenium import webdriver

# Install wget
!apt-get install wget

# Download the chromedriver.exe from GitHub
os.system('wget https://github.com/SamapanThongmee/Scrap_SET50_Index_Options_Data/blob/main/chromedriver.exe')

def web_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--verbose")
    options.add_argument('--no-sandbox')
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument("--window-size=1920, 1200")
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=options)
    return driver

driver = web_driver()

##### **Check Options Symbol Year-by-Year**

In [None]:
def getCheckOptions(symbol='S50Z23P900'):
    options = []
    try:
        options_df = pd.DataFrame()

        for page in range(1):
            url = f'https://www.tfex.co.th/tfex/historicalTrading.html?symbol={symbol}&series=&page={page}&periodView=A&locale=en_US'
            driver.get(url)
            data = driver.page_source
            scrap = pd.read_html(data)[0]

            mask = scrap['Date'] == 'Grand Total'
            indices = int(scrap.index[mask][0])
            options_data = scrap.iloc[:indices, :][['Date', 'Open', 'High', 'Low', 'Close', 'SP', 'Vol', 'OI']].replace('-', 0)

            options_data['Date'] = pd.to_datetime(options_data['Date'], format='%d/%m/%Y').dt.strftime('%Y-%m-%d')
            options_data['SP'] = pd.to_numeric(options_data['SP'])
            options_data['Vol'] = pd.to_numeric(options_data['Vol'])
            options_data['OI'] = pd.to_numeric(options_data['OI'])

            options_df = pd.concat([options_df, options_data], axis=0)

        options_df['Symbol'] = symbol
        options_df = options_df[['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'SP', 'Vol', 'OI']]
        options_df = options_df.sort_values(by='Date').reset_index(drop=True)
        options_df = options_df.drop_duplicates(subset=['Date'], keep='last')
        options = options_df

    except:
      pass

    return options

In [None]:
def generate_option_symbols(year, letters):
    symbols_to_check = {}

    for letter in letters:
        current_list_C = []

        start_value = 250
        end_value = 1250
        interval = 25

        current_value = start_value
        while current_value <= end_value:
            symbol_C = f'S50{letter}{year}C{current_value}'
            current_list_C.append(symbol_C)
            current_value += interval

        symbols_to_check[f'S50{letter}{year}_C'] = current_list_C

    symbols_to_check = {key.replace('_C', '_Call'): value for key, value in symbols_to_check.items()}

    all_symbols = []
    for value in symbols_to_check.values():
        all_symbols.extend(value)

    series_options = []

    for symbol in all_symbols:
        options = getCheckOptions(symbol)

        if len(options) > 0:
            series_options.append(symbol)

    Call_Options_Symbols = series_options
    Put_Options_Symbols = [symbol.replace('C', 'P') for symbol in Call_Options_Symbols]
    return Call_Options_Symbols + Put_Options_Symbols

current_year = datetime.datetime.now().year
last_two_digits = current_year % 100
letters = ['M', 'H', 'U', 'Z']

for year in range(7, last_two_digits + 1):
    year_str = str(year).zfill(2)
    symbols = generate_option_symbols(year_str, letters)

    with open(f'{year + 2000}_Options_Symbols.txt', 'w') as file:
      file.write(' '.join(map(str, symbols)))

In [None]:
current_year = datetime.datetime.now().year
last_two_digits = current_year % 100

years = [str(i).zfill(2) for i in range(21, last_two_digits + 1)]
letters = ['M', 'H', 'U', 'Z']

for year in years:
    symbols_to_check = {}

    for letter in letters:
        current_list_C = []

        start_value = 250
        end_value = 1250
        interval = 25

        current_value = start_value
        while current_value <= end_value:
            symbol_C = f'S50{letter}{year}C{current_value}'
            current_list_C.append(symbol_C)
            current_value += interval

        symbols_to_check[f'S50{letter}{year}_C'] = current_list_C

    symbols_to_check = {key.replace('_C', '_Call'): value for key, value in symbols_to_check.items()}

    year_options_symbols = []
    for letter in letters:
        year_options_symbols += symbols_to_check[f'S50{letter}{year}_Call']

    filename = f'{year}_OptionsSymbols.txt'
    with open(filename, 'w') as file:
        for symbol in year_options_symbols:
            file.write(f'{symbol}\n')


##### **Pull Options Data**

In [None]:
def getOptions(symbol='S50U23P900'):
    options = []
    try:
        options_df = pd.DataFrame()

        for page in range(15):
            url = f'https://www.tfex.co.th/tfex/historicalTrading.html?symbol={symbol}&series=&page={page}&periodView=A&locale=en_US'
            driver.get(url)
            data = driver.page_source
            scrap = pd.read_html(data)[0]

            mask = scrap['Date'] == 'Grand Total'
            indices = int(scrap.index[mask][0])
            options_data = scrap.iloc[:indices, :][['Date', 'Open', 'High', 'Low', 'Close', 'SP', 'Vol', 'OI']].replace('-', 0)

            options_data['Date'] = pd.to_datetime(options_data['Date'], format='%d/%m/%Y').dt.strftime('%Y-%m-%d')
            options_data['SP'] = pd.to_numeric(options_data['SP'])
            options_data['Vol'] = pd.to_numeric(options_data['Vol'])
            options_data['OI'] = pd.to_numeric(options_data['OI'])

            options_df = pd.concat([options_df, options_data], axis=0)

        options_df['Symbol'] = symbol
        options_df = options_df[['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'SP', 'Vol', 'OI']]
        options_df = options_df.sort_values(by='Date').reset_index(drop=True)
        options_df = options_df.drop_duplicates(subset=['Date'], keep='last')
        options = options_df

    except Exception as e:
        print(f"An error occurred: {e}")

    return options

def get_options_data(options_list):
  options_data = []
  for symbol in options_list:
    data = getOptions(symbol=symbol)  # Replace with your actual getOptions function
    options_data.append(data)
  options_data = pd.concat(options_data, ignore_index=True)