In [None]:
import requests
import json
from bs4 import BeautifulSoup

In [8]:
def scrape_tcas_calendar(url):
    """
    Fetches the TCAS calendar data from the MyTCAS website
    and formats it as a JSON object.
    """
    
    try:
        # 1. Fetch the HTML content
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses (4xx or 5xx)
        response.encoding = 'utf-8'  # Ensure Thai characters are handled correctly

        # 2. Parse the HTML with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # 3. Find the main calendar container
        main_container = soup.find('div', class_='s-container t-sec-cal')
        
        if not main_container:
            print("Error: Could not find the main calendar container (div.s-container.t-sec-cal).")
            return None

        # 4. Initialize the data dictionary
        tcas_data = {
            'title': '',
            'rounds': [],
            'remarks': []
        }

        # 5. Extract the main title
        title_element = main_container.find('h2', class_='t-title')
        if title_element:
            tcas_data['title'] = title_element.get_text(strip=True)

        # 6. Extract data from the desktop table (more structured)
        table = main_container.find('div', class_='t-rounds-d').find('table')
        if table:
            # Get table headers
            headers = [th.get_text(strip=True) for th in table.find('thead').find_all('th')]
            
            # Get table rows
            for row in table.find('tbody').find_all('tr'):
                round_details = {}
                cells = row.find_all('td')
                
                # Use headers to create a key-value pair for each cell
                for i, header in enumerate(headers):
                    if i < len(cells):
                        # get_text() with separator handles <br> tags gracefully
                        cell_text = cells[i].get_text(separator=' ', strip=True)
                        round_details[header] = cell_text
                        
                tcas_data['rounds'].append(round_details)
        
        # 7. Extract the remarks
        remarks_container = main_container.find('div', class_='t-remark')
        if remarks_container:
            for li in remarks_container.find('ul').find_all('li'):
                # .text strips all tags, strip=True cleans whitespace
                tcas_data['remarks'].append(li.get_text(strip=True))
                
        return tcas_data

    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except Exception as e:
        print(f"An error occurred during parsing: {e}")
        return None

In [9]:
# --- --- --- --- ---
#      RUN SCRIPT
# --- --- --- --- ---

# The URL for the main TCAS page
TARGET_URL = 'https://www.mytcas.com'

print(f"Scraping data from {TARGET_URL}...")
calendar_data = scrape_tcas_calendar(TARGET_URL)

if calendar_data:
    # Convert the Python dictionary to a JSON string
    # ensure_ascii=False is crucial for printing Thai characters
    json_output = json.dumps(calendar_data, indent=4, ensure_ascii=False)
    
    print("\n--- JSON Output ---")
    print(json_output)
    
    # Optional: Save to a file
    with open('tcas_calendar.json', 'w', encoding='utf-8') as f:
        f.write(json_output)
    print("\nSuccessfully saved data to tcas_calendar.json")

Scraping data from https://www.mytcas.com...

--- JSON Output ---
{
    "title": "ปฏิทิน TCAS69",
    "rounds": [
        {
            "รอบ": "1",
            "รูปแบบ": "Portfolio",
            "รับสมัคร": "มหาวิทยาลัยกำหนด",
            "ประกาศผลในระบบ": "6 ก.พ. 69",
            "ยืนยันสิทธิ์": "6-7 ก.พ. 69",
            "สละสิทธิ์ในระบบ1": "1 8 ก.พ. 69 2 4 พ.ค. 69"
        },
        {
            "รอบ": "2",
            "รูปแบบ": "Quota",
            "รับสมัคร": "มหาวิทยาลัยกำหนด",
            "ประกาศผลในระบบ": "2 พ.ค. 69",
            "ยืนยันสิทธิ์": "2-3 พ.ค. 69",
            "สละสิทธิ์ในระบบ1": "4 พ.ค. 69"
        },
        {
            "รอบ": "3",
            "รูปแบบ": "Admission",
            "รับสมัคร": "ระบบ mytcas.com 6-12 พ.ค. 69",
            "ประกาศผลในระบบ": "1 20 พ.ค. 69 2 26 พ.ค. 69",
            "ยืนยันสิทธิ์": "20-21 พ.ค. 69",
            "สละสิทธิ์ในระบบ1": "27 พ.ค. 69 2"
        },
        {
            "รอบ": "4",
            "รูปแบบ": "Direct Admission 3",
   