In [2]:
import selenium
import pandas as pd
import random
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
from datetime import datetime
import csv
import re
import logging
import os
import keyboard
import urllib.parse

In [34]:
# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Initialize browser
firefox_options = Options()
# firefox_options.add_argument("--headless")
driver = webdriver.Firefox(options=firefox_options)
wait = WebDriverWait(driver, 10)

All_data = pd.DataFrame(columns=[
                "ปี",
                "เทอม",
                "รหัสวิชา",
                "ชื่อไทย",
                "รหัสกลุ่ม",
                "ชื่อภาษาอังกฤษ",
                "ภาควิชา",
                "หน่วยกิต",
                "รายละเอียดวัน-เวลา-ห้องเรียน-อาคาร-ประเภทการสอน",
                "รายละเอียดวิชา",
                "เงื่อนไขรายวิชา",
                "อาจารย์",
                "สอบกลางภาค",
                "สอบปลายภาค",
                "หมายเหตุ",
                'อ้างอิง'
                ]
)

def select_filters(year, semester):

    print('processing')
    logger.info(f"Filtering for year {year} semester {semester}")
    
    try:
        # Level 1: ID Status
        lv1_id_status = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(2) > td.normaldetail > select'))
        )
        lv1_id_status.click()
        lv1_available_id = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(2) > td.normaldetail > select > option:nth-child(1)'))
        )
        lv1_available_id.click()
        logger.info(f"Selected {lv1_available_id.text}")

        # Level 2: Faculty
        lv2_faculty = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(3) > td.normaldetail > select'))
        )
        lv2_faculty.click()
        lv2_science_option = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'option[value="007วิทยาศาสตร์"]'))
        )
        lv2_science_option.click()
        logger.info(f"Selected {lv2_science_option.text}")

        # Level 3: Max item
        lv3 = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(4) > td.normaldetail > select'))
        )
        lv3.click()
        lv3_maxitem = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'option[value="250"]'))
        )
        lv3_maxitem.click()
        logger.info(f"Selected {lv3_maxitem.text}")

        # Level 4.1: Year
        lv4_1 = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(5) > td:nth-child(2) > table > tbody > tr:nth-child(1) > td:nth-child(3) > select:nth-child(1)'))
        )
        lv4_1.click()
        lv4_1_year = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, f'option[value="{year}"]'))
        )
        lv4_1_year.click()
        logger.info(f"Selected {lv4_1_year.text}")

        # Level 4.2: Semester
        lv4_2 = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(5) > td:nth-child(2) > table > tbody > tr:nth-child(1) > td:nth-child(3) > select:nth-child(2)'))
        )
        lv4_2.click()
        lv4_2_semester = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, f'option[value="{semester}"]'))
        )
        lv4_2_semester.click()
        logger.info(f"Selected {lv4_2_semester.text}")

        # Level 4.3: Campus
        lv4_3 = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(5) > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(2) > select'))
        )
        lv4_3.click()
        lv4_3_campus = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(5) > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(2) > select > option:nth-child(3)'))
        )
        lv4_3_campus.click()
        logger.info(f"Selected {lv4_3_campus.text}")

        # Level 4.4: Level of Education
        lv4_4_lvl_of_ed = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(5) > td:nth-child(2) > table > tbody > tr:nth-child(3) > td:nth-child(2) > select'))
        )
        lv4_4_lvl_of_ed.click()
        lv4_4_bachelor = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#page > table:nth-child(4) > tbody > tr > td:nth-child(3) > table:nth-child(2) > tbody > tr:nth-child(5) > td:nth-child(2) > table > tbody > tr:nth-child(3) > td:nth-child(2) > select > option:nth-child(3)'))
        )
        lv4_4_bachelor.click()
        logger.info(f"Selected {lv4_4_bachelor.text}")

        search_button = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'input[type="submit"][value="ค้นหา"]'))
        )
        search_button.click()
        logger.info("Searching")

        # Wait for the new page to load
        return True

    except Exception as e:
        logger.error(f"Error during filter selection: {e}")
        return False

def split_groups_with_embedded_label(data):
    groups = []
    current_group = []
    counter = 1 

    for item in data:
        if 'อาจารย์:' in item[0] and current_group:  # เจอ "อาจารย์:" ใหม่ และมีข้อมูลในกลุ่ม
            # ฝัง Label ในรายการย่อยของกลุ่ม
            for sub_item in current_group:
                sub_item.insert(0, f'{counter:02}')
            groups.append(current_group)  # เพิ่มกลุ่ม
            current_group = []  # เริ่มกลุ่มใหม่
            counter += 1  # เพิ่มค่า Label
        
        current_group.append(item)  # เพิ่มข้อมูลในกลุ่มปัจจุบัน
    
    if current_group:  # เพิ่มกลุ่มสุดท้ายถ้ามีข้อมูล
        for sub_item in current_group:
            sub_item.insert(0, f'{counter:02}')
        groups.append(current_group)
    
    return groups

def transform_clead_cd(cdcd):
    clead_cd_dict = {}
    for group_data in cdcd:
        group_id = group_data[0][0] 
        details = {item[1]: item[2] for item in group_data}
        clead_cd_dict[group_id] = details
    return clead_cd_dict

def thai(text):
    cleaned = re.sub(r'(หมายเหตุ.*|หมวด.*|CON:.*|CO:.*)', '', text)
    cleaned = re.sub(r'[A-Za-z0-9]+', '', cleaned)
    cleaned = cleaned.strip()
    
    return cleaned


def details(course_link, year, semester):
    global All_data
    try:
        link_web = f'https://reg3.su.ac.th/registrar/{course_link}'
        driver.get(link_web)
        time.sleep(3)
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        data1 = []
        tr = soup.find_all('font',class_='normaldetail')
        for u in tr:
            text = u.get_text()
            data1.append(text)
        #print('-----------------------data1-------------------')
        #print(len(data1))
        #print(data1)
        data_chai = [0, 1, 2, 4, 5, 6, 8]
        new_list = [data1[i] for i in data_chai if i < len(data1)]
        data = []
        final_list = [] 
        cd = []
        rows = soup.find_all('tr', class_='normalDetail')
        for row in rows:
            cells = row.find_all('td')
            data = [cell.get_text(strip=True) for cell in cells]
            if len(data) == 2 :
                data_chai2 = [data[1]]
                #print(data_chai2)
            elif len(data) == 14 :
                #print(len(data),data) 
                new_list2 = [data[1],data[3],data[4],data[5],data[6],data[7],data[8],data[9],data[10],data[11],data[12],data[13]]
                #print(new_list2)
                final_list.append(new_list2)
                cleaned_data = []
                current_group = None
                for row in final_list:
                    if row[0]:
                        current_group = row[0]
                    if current_group:
                        cleaned_data.append([current_group, row[1], row[2],row[3],row[4],row[5]])
            elif len(data) == 5 :
                data_chai11 = [data[3],data[4]]
                cd.append(data_chai11)
            else:
                data_chai6 = [data[3],data[4],data[5]]
        descriptions = soup.find_all('td',class_='normalDetail')
        text_yaw = [i.get_text() for i in descriptions]
        cleaned_data1 = [thai(text) for text in text_yaw]
        #print(cleaned_data1)
        #print(new_list)
        #print(data_chai2)
        #print(cleaned_data)
        #print(s)
        #print(cd)
        clead_cd = split_groups_with_embedded_label(cd)
        clead_cd_dict = transform_clead_cd(clead_cd)
        #print(clead_cd_dict)
        grouped_schedule = {}
        for entry in cleaned_data:
            group = entry[0]
            detail = f"วัน : {entry[1]} _ เวลา : {entry[2]} _ ห้องเรียน : {entry[3]} _ อาคาร : {entry[4]} _ ประเภทการสอน : {entry[5]}"
            if group not in grouped_schedule:
                grouped_schedule[group] = []
            grouped_schedule[group].append(detail)

        subjects = []  # เก็บข้อมูลเป็น list
        condition = soup.find('font', color="#800000", string="เงื่อนไขรายวิชา:")
        if condition:
            table = condition.find_next('table', class_='normalDetail')
            if table:
                for row in table.find_all('tr'):
                    tds = row.find_all('td')  # กรองเฉพาะ td ที่มี bgcolor
                    for td in tds:
                        # ดึงข้อความจาก td ทั้งหมด
                        text = td.get_text(strip=True)  # ใช้ separator เพื่อเก็บข้อความทั้งหมด
                        if text:
                            subjects.append(text)  # เพิ่มข้อความลงใน list
                    # ดึงข้อมูลจาก <a>
                    links = row.find_all('a')
                    for a in links:
                        href = a.get_text(strip=True)
                        if href:  # ตรวจสอบว่ามีข้อความ
                            subjects.append(href)

        # ลบค่าซ้ำ แต่ยังคงลำดับเดิม
        subjects = list(dict.fromkeys(subjects))
        print("รายวิชา:", subjects)
                    
        data0 = []
        for group, details in grouped_schedule.items():
            group_info = clead_cd_dict.get(group, {})
            data0.append({
                "ปี": year,
                "เทอม": semester,
                "รหัสวิชา": new_list[0],
                "ชื่อภาษาอังกฤษ" : new_list[1],
                "ชื่อไทย": new_list[2],
                "รหัสกลุ่ม": group,
                "ภาควิชา":new_list[3],
                "หน่วยกิต" : new_list[5],
                "รายละเอียดวัน-เวลา-ห้องเรียน-อาคาร-ประเภทการสอน": f"เรียนทั้งหมด {len(details)} คาบดังนี้ " + ", ".join(details),
                "รายละเอียดวิชา":cleaned_data1,
                "เงื่อนไขรายวิชา":subjects,
                "อาจารย์": group_info.get("อาจารย์:", ""),
                "สอบกลางภาค": group_info.get("สอบกลางภาค:", ""),
                "สอบปลายภาค": group_info.get("สอบปลายภาค:", ""),
                "หมายเหตุ": group_info.get("หมายเหตุ:", ""),
                'อ้างอิง':link_web
            })
        data = pd.DataFrame(data0)
        All_data = pd.concat([All_data,data])
        #print(All_data)

        return {}
    except Exception as e:
        #logger.error(f"Error scraping course details: {e}")
        return {}

def scrape_course_data(year, semester):
    global All_data
    try:
        base_url = "https://reg3.su.ac.th/registrar/"
        # Get the page source after search results load
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        #print(s)
        course = soup.find_all('tr', class_='normalDetail')
        #keyboard.wait('enter')
        data_list = []
        for i in range(len(course)):
            td_tags = course[i].find_all('td', string=lambda text: text and text.strip() == "W")
            if td_tags:
                print(f"พบ W ในแถวที่ {i}:", [td.get_text(strip=True) for td in td_tags])

                course_info = course[i].find('a')
                if course_info:
                    course_link = course_info.get('href')
                    course_id = course_info.text
                    #print(f'Course Link: {course_link}')
                    #print(f'Course ID: {course_id}')
                    if course_id in All_data['รหัสวิชา'].unique():
                        print(f"Found Duplicated Course ID: {course_id}, Skipped..")
                        pass
                    else:
                        details(course_link, year, semester)
                        print(f'Course ID: {course_id}')
                        All_data.to_csv(f'Data_{year}.csv',index=False, encoding='utf-8-sig')
                    data_list.append({'Id': course_id, 'Link': course_link, 'Status': td_tags})

                    Data = pd.DataFrame(data_list)
                    Data.to_csv('Status.csv',index = False,encoding = 'utf-8-sig')
                    #details(course_link)
                else:
                    print(f"Link มีสถานะเป็น C ในช่อง : {i}")
        
            if course_id in All_data['รหัสวิชา'].unique():
                    print(f"Found Duplicated Course ID: {course_id}, Skipped..")
                    pass
            else:
                details(course_link, year, semester)
                print(f'Course ID: {course_id}')
                All_data.to_csv(f'Data_{year}.csv',index=False, encoding='utf-8-sig')

        next_page = soup.find('a', string='[หน้าต่อไป]')
        if next_page:
            link_tadpai = next_page.get('href')
            next_link = urllib.parse.urljoin(base_url, link_tadpai)
            print(next_link)
            driver.get(next_link)
            scrape_course_data(year, semester)
        else:
            print("ไม่พบหน้าต่อไป, ข้าม..")
    except Exception as e:
        logger.error(f"Error during scraping: {e}")
        return []

def filter_item(main_url, year, semester):
    # current_year = datetime.now().year + 543
    # years = [2566,2567,2568]
    # years2 = [year for year in range(2561, current_year + 1)]
    # print(years2)
    # sems = [1, 2, 3]
    # for y in years:
    #     for s in sems:
    if select_filters(year, semester):
        retry_limit = 20
        for retry in range(retry_limit):
            time.sleep(10)
            if driver.current_url != main_url:
                logger.info(f'Forwarding to {driver.current_url}')
                scrape_course_data(year, semester)
                #driver.get('https://reg3.su.ac.th/registrar/class_info.asp?avs924956177=1')
                #driver.quit()
                #time.sleep(10)
                break
            
            elif retry == 19:
                print('timed out')
                return []
            
            else:
                print(f'{retry}/{retry_limit} Access denied, retrying')


In [20]:
'''if course_id in All_data['รหัสวิชา'].unique():
                # ดึงปีทั้งหมดที่เกี่ยวข้องกับ course_id
                existing_years = All_data.loc[All_data['รหัสวิชา'] == course_id, 'ปี']
                if year > existing_years.max():
                    # ลบข้อมูลเก่า
                    All_data = All_data[~((All_data['รหัสวิชา'] == course_id) & (All_data['ปี'] == existing_years.max()))]
                    # เพิ่มข้อมูลใหม่
                    details(course_link, year, semester)
                    print(f"Updated Course ID: {course_id} with Year: {year}")
                else:
                    print(f"Found Duplicated Course ID: {course_id} with Year {year}, Skipped..")
            else:
                # เพิ่มข้อมูลใหม่หากไม่มีใน All_data
                details(course_link, year, semester)
                print(f"Added New Course ID: {course_id} with Year: {year}")'''

'if course_id in All_data[\'รหัสวิชา\'].unique():\n                # ดึงปีทั้งหมดที่เกี่ยวข้องกับ course_id\n                existing_years = All_data.loc[All_data[\'รหัสวิชา\'] == course_id, \'ปี\']\n                if year > existing_years.max():\n                    # ลบข้อมูลเก่า\n                    All_data = All_data[~((All_data[\'รหัสวิชา\'] == course_id) & (All_data[\'ปี\'] == existing_years.max()))]\n                    # เพิ่มข้อมูลใหม่\n                    details(course_link, year, semester)\n                    print(f"Updated Course ID: {course_id} with Year: {year}")\n                else:\n                    print(f"Found Duplicated Course ID: {course_id} with Year {year}, Skipped..")\n            else:\n                # เพิ่มข้อมูลใหม่หากไม่มีใน All_data\n                details(course_link, year, semester)\n                print(f"Added New Course ID: {course_id} with Year: {year}")'

In [36]:
url_registration = 'https://reg3.su.ac.th/registrar/class_info.asp?avs924956177=1'

# Parameters
current_year = [datetime.now().year + 543, datetime.now().year + 542]
sems = [1,2,3]

for year in current_year:
    for semester in sems:
        
        if len(All_data) == 0 and semester == 2:
            print(f'Data not found on year {year}, Running previous year instead.')
            break

        driver.get(url_registration)
        selected_categories = filter_item(url_registration, year, semester)

2025-01-30 01:16:35,663 - INFO - Filtering for year 2568 semester 1


processing


2025-01-30 01:16:36,136 - INFO - Selected รหัสวิชา ที่ยังใช้อยู่
2025-01-30 01:16:36,602 - INFO - Selected วิทยาศาสตร์
2025-01-30 01:16:37,066 - INFO - Selected 250
2025-01-30 01:16:37,530 - INFO - Selected 2568
2025-01-30 01:16:37,995 - INFO - Selected 1
2025-01-30 01:16:38,461 - INFO - Selected 2 : พระราชวังสนามจันทร์
2025-01-30 01:16:38,926 - INFO - Selected 1 : ปริญญาตรี
2025-01-30 01:16:39,264 - INFO - Searching
2025-01-30 01:16:49,271 - INFO - Forwarding to https://reg3.su.ac.th/registrar/class_info_1.asp?backto=home&avs279655917=524
2025-01-30 01:16:49,290 - ERROR - Error during scraping: cannot access local variable 'course_id' where it is not associated with a value
2025-01-30 01:16:49,570 - INFO - Filtering for year 2568 semester 2


processing


2025-01-30 01:16:50,054 - INFO - Selected รหัสวิชา ที่ยังใช้อยู่
2025-01-30 01:16:50,520 - INFO - Selected วิทยาศาสตร์
2025-01-30 01:16:50,969 - INFO - Selected 250
2025-01-30 01:16:51,419 - INFO - Selected 2568
2025-01-30 01:16:51,870 - INFO - Selected 2
2025-01-30 01:16:52,336 - INFO - Selected 2 : พระราชวังสนามจันทร์
2025-01-30 01:16:52,802 - INFO - Selected 1 : ปริญญาตรี
2025-01-30 01:16:53,141 - INFO - Searching
2025-01-30 01:17:03,145 - INFO - Forwarding to https://reg3.su.ac.th/registrar/class_info_1.asp?backto=home&avs279655917=527
2025-01-30 01:17:03,153 - ERROR - Error during scraping: cannot access local variable 'course_id' where it is not associated with a value
2025-01-30 01:17:03,365 - INFO - Filtering for year 2568 semester 3


processing


2025-01-30 01:17:03,842 - INFO - Selected รหัสวิชา ที่ยังใช้อยู่
2025-01-30 01:17:04,306 - INFO - Selected วิทยาศาสตร์
2025-01-30 01:17:04,755 - INFO - Selected 250
2025-01-30 01:17:05,219 - INFO - Selected 2568
2025-01-30 01:17:05,685 - INFO - Selected 3
2025-01-30 01:17:06,146 - INFO - Selected 2 : พระราชวังสนามจันทร์
2025-01-30 01:17:06,611 - INFO - Selected 1 : ปริญญาตรี
2025-01-30 01:17:06,942 - INFO - Searching
2025-01-30 01:17:16,946 - INFO - Forwarding to https://reg3.su.ac.th/registrar/class_info_1.asp?backto=home&avs279655917=530
2025-01-30 01:17:16,955 - ERROR - Error during scraping: cannot access local variable 'course_id' where it is not associated with a value
2025-01-30 01:17:17,172 - INFO - Filtering for year 2567 semester 1


processing


2025-01-30 01:17:17,671 - INFO - Selected รหัสวิชา ที่ยังใช้อยู่
2025-01-30 01:17:18,134 - INFO - Selected วิทยาศาสตร์
2025-01-30 01:17:18,589 - INFO - Selected 250
2025-01-30 01:17:19,051 - INFO - Selected 2567
2025-01-30 01:17:19,517 - INFO - Selected 1
2025-01-30 01:17:19,977 - INFO - Selected 2 : พระราชวังสนามจันทร์
2025-01-30 01:17:20,441 - INFO - Selected 1 : ปริญญาตรี
2025-01-30 01:17:25,679 - INFO - Searching


0/20 Access denied, retrying
1/20 Access denied, retrying
2/20 Access denied, retrying


2025-01-30 01:18:05,690 - INFO - Forwarding to https://reg3.su.ac.th/registrar/class_info_1.asp?backto=home&avs279655917=533


พบ W ในแถวที่ 0: ['W']
Found Duplicated Course ID: 511100-165, Skipped..
Found Duplicated Course ID: 511100-165, Skipped..
พบ W ในแถวที่ 1: ['W']
Found Duplicated Course ID: 511100-165, Skipped..
Found Duplicated Course ID: 511100-165, Skipped..
Found Duplicated Course ID: 511100-165, Skipped..
Found Duplicated Course ID: 511100-165, Skipped..
พบ W ในแถวที่ 4: ['W']
Found Duplicated Course ID: 511101-165, Skipped..
Found Duplicated Course ID: 511101-165, Skipped..
Found Duplicated Course ID: 511101-165, Skipped..
พบ W ในแถวที่ 6: ['W']
รายวิชา: []
Course ID: 511103-165
Found Duplicated Course ID: 511103-165, Skipped..
พบ W ในแถวที่ 7: ['W']
Found Duplicated Course ID: 511103-165, Skipped..
Found Duplicated Course ID: 511103-165, Skipped..
พบ W ในแถวที่ 8: ['W']
รายวิชา: []
Course ID: 511104-165
Found Duplicated Course ID: 511104-165, Skipped..
พบ W ในแถวที่ 9: ['W']
รายวิชา: []
Course ID: 511104-2560
Found Duplicated Course ID: 511104-2560, Skipped..
พบ W ในแถวที่ 10: ['W']
รายวิชา: []

2025-01-30 01:27:34,939 - ERROR - Error during scraping: cannot access local variable 'course_id' where it is not associated with a value
2025-01-30 01:27:35,191 - INFO - Filtering for year 2567 semester 2


processing


2025-01-30 01:27:35,686 - INFO - Selected รหัสวิชา ที่ยังใช้อยู่
2025-01-30 01:27:36,151 - INFO - Selected วิทยาศาสตร์
2025-01-30 01:27:36,615 - INFO - Selected 250
2025-01-30 01:27:37,080 - INFO - Selected 2567
2025-01-30 01:27:37,544 - INFO - Selected 2
2025-01-30 01:27:38,010 - INFO - Selected 2 : พระราชวังสนามจันทร์
2025-01-30 01:27:38,501 - INFO - Selected 1 : ปริญญาตรี
2025-01-30 01:27:44,161 - INFO - Searching


0/20 Access denied, retrying
1/20 Access denied, retrying


2025-01-30 01:28:14,169 - INFO - Forwarding to https://reg3.su.ac.th/registrar/class_info_1.asp?backto=home&avs279655917=1550


พบ W ในแถวที่ 0: ['W']
Found Duplicated Course ID: 511100-165, Skipped..
Found Duplicated Course ID: 511100-165, Skipped..
Found Duplicated Course ID: 511100-165, Skipped..
Found Duplicated Course ID: 511100-165, Skipped..
พบ W ในแถวที่ 3: ['W']
Found Duplicated Course ID: 511101-165, Skipped..
Found Duplicated Course ID: 511101-165, Skipped..
Found Duplicated Course ID: 511101-165, Skipped..
Found Duplicated Course ID: 511101-165, Skipped..
พบ W ในแถวที่ 6: ['W']
รายวิชา: ['511101-165']
Course ID: 511102-165
Found Duplicated Course ID: 511102-165, Skipped..
Found Duplicated Course ID: 511102-165, Skipped..
พบ W ในแถวที่ 8: ['W']
Found Duplicated Course ID: 511103-165, Skipped..
Found Duplicated Course ID: 511103-165, Skipped..
พบ W ในแถวที่ 9: ['W']
Found Duplicated Course ID: 511104-165, Skipped..
Found Duplicated Course ID: 511104-165, Skipped..
Found Duplicated Course ID: 511104-165, Skipped..
พบ W ในแถวที่ 11: ['W']
Found Duplicated Course ID: 511105-165, Skipped..
Found Duplicate

2025-01-30 01:57:46,058 - INFO - Filtering for year 2567 semester 3


processing


2025-01-30 01:57:46,544 - INFO - Selected รหัสวิชา ที่ยังใช้อยู่
2025-01-30 01:57:47,023 - INFO - Selected วิทยาศาสตร์
2025-01-30 01:57:47,472 - INFO - Selected 250
2025-01-30 01:57:47,923 - INFO - Selected 2567
2025-01-30 01:57:48,373 - INFO - Selected 3
2025-01-30 01:57:48,823 - INFO - Selected 2 : พระราชวังสนามจันทร์
2025-01-30 01:57:49,273 - INFO - Selected 1 : ปริญญาตรี
2025-01-30 01:57:49,636 - INFO - Searching
2025-01-30 01:57:59,639 - INFO - Forwarding to https://reg3.su.ac.th/registrar/class_info_1.asp?backto=home&avs279655917=3846
2025-01-30 01:57:59,649 - ERROR - Error during scraping: cannot access local variable 'course_id' where it is not associated with a value


In [1]:
"รหัสวิชา 5123 วิชา pafmkds มีกลุ่มทั้งหมด 3 กลุ่ม กลุ่มที่ 1 เรียนวันจันทร์ เวลา "

'รหัสวิชา 5123 วิชา pafmkds มีกลุ่มทั้งหมด 3 กลุ่ม กลุ่มที่ 1 เรียนวันจันทร์ เวลา '

In [2]:
t = 'https://reg3.su.ac.th/registrar/class_info_2.asp?backto=home&option=0&courseid=131921&acadyear=2567&semester=1&avs920327394=4'

In [3]:
## ALL Data

# main_url = 'https://reg3.su.ac.th/registrar/class_info_2.asp?backto=home&option=0&courseid=131921&acadyear=2567&semester=1&avs920327394=4'
# url_registration = 'https://reg3.su.ac.th/registrar/class_info.asp?avs924956177=1'
# time.sleep(1)

# # Parameters
# current_year = datetime.now().year + 543
# #years = [2561,2562]
# years = [year for year in range(2561, current_year + 1)]
# #print(years2)
# sems = [1,2,3]


# for y in years:
#     for s in sems:
#         driver.get(url_registration)
#         selected_categories = filter_item(url_registration, y, s)


# time.sleep(3)
# # driver.back()
# # driver.quit()


In [5]:
 if course_id in All_data['รหัสวิชา'].unique():
                # ดึงปีทั้งหมดที่เกี่ยวข้องกับ course_id
                existing_years = All_data.loc[All_data['รหัสวิชา'] == course_id, 'ปี']
                if year > existing_years.max():
                    # ลบข้อมูลเก่า
                    All_data = All_data[~((All_data['รหัสวิชา'] == course_id) & (All_data['ปี'] == existing_years.max()))]
                    # เพิ่มข้อมูลใหม่
                    details(course_link, year, semester)
                    print(f"Updated Course ID: {course_id} with Year: {year}")
                else:
                    print(f"Found Duplicated Course ID: {course_id} with Year {year}, Skipped..")
            else:
                # เพิ่มข้อมูลใหม่หากไม่มีใน All_data
                details(course_link, year, semester)
                print(f"Added New Course ID: {course_id} with Year: {year}")

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 12)

In [21]:
#page = 'https://reg6.su.ac.th/registrar/class_info.asp?avs1049896730=62'
url_registration = 'https://reg3.su.ac.th/registrar/class_info.asp?'
soup = BeautifulSoup(url_registration, 'html.parser')

  soup = BeautifulSoup(url_registration, 'html.parser')


In [22]:
s = soup.find_all('td')
print(s)
'https://reg3.su.ac.th/registrar/class_info.asp'

[]


'https://reg3.su.ac.th/registrar/class_info.asp'

แยก beutifulsoup4