# Collect Data by Scraping using Script

In [1]:
# Import packages
import bs4
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException

ModuleNotFoundError: No module named 'webdriver_manager'

## Selenium setup

In [2]:
driver_path = r'C:\Program Files (x86)\chromedriver-win64\chromedriver.exe'

def create_driver():
    options = Options()
    options.add_argument("--start-maximized")  # Start the browser maximized
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
    return driver


# Scraping Data from Jobsdb Data

In [7]:
# Define the base URL and the page URL
base_url = 'https://th.jobsdb.com'
search_data = '/data-jobs'
sec_page_url = '?page=2'

# Fetch the webpage content
page1 = requests.get(urljoin(base_url, search_data))

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(page1.content, 'html.parser')

In [8]:
box = soup.find('div',{'class':'_1decxdv0 _110qf3s4y _110qf3s4w'})

In [9]:
company_name = soup.find('a', {'data-automation': 'jobCompany'})
position = soup.find('a', {'data-automation':'jobTitle'})
location = soup.find('a', {'data-automation': 'jobLocation'})
industry = soup.find('a',{'data-automation':'jobClassification'})
date = soup.find('span',{'data-automation':'jobListingDate'})

In [10]:
company_name.text

'Universal Food Public Company Limited'

In [11]:
position.text

'Data Analyst'

In [12]:
location.text

'Bangkok'

In [13]:
industry.text

'(Accounting)'

In [14]:
date.text

'4h ago'

In [15]:
print(company_name.text)
print(position.text)
print(location.text)
print(industry.text)
print(date.text)

Universal Food Public Company Limited
Data Analyst
Bangkok
(Accounting)
4h ago


In [16]:
company_name_list = []
position_list = []
location_list = []

## Scrap one page to test

In [17]:
# Extract job titles
job_title_elements = soup.find_all('a', {'data-automation':'jobTitle'})
job_titles = [div.get_text(strip=True) for div in job_title_elements]

# Extract company names
company_name_elements = soup.find_all('a', {'data-automation': 'jobCompany'})
company_names = [a.get_text(strip=True) for a in company_name_elements]

# Extract Locations
location_elements = soup.find_all('a', {'data-automation': 'jobLocation'})
location = [a.get_text(strip=True) for a in location_elements]

# Extract industry
industry_elements = soup.find_all('a',{'data-automation':'jobClassification'})
industry = [a.get_text(strip=True) for a in industry_elements]

# Extract time
time_elements = soup.find_all('span',{'data-automation':'jobListingDate'})
time = [a.get_text(strip=True) for a in time_elements]

# Combine job titles and company names into a list of tuples
data = list(zip(job_titles, company_names, location, industry,time))

# Create DataFrame
data_jobs = pd.DataFrame(data, columns=['job_title', 'company_name','location','industry','time'])

In [18]:
data_jobs.head(3)

Unnamed: 0,job_title,company_name,location,industry,time
0,Data Analyst,Universal Food Public Company Limited,Bangkok,(Accounting),4h ago
1,Data Analyst,THAI UNION GROUP PCL.,Phaya Thai,(Information & Communication Technology),22h ago
2,Data Analytics,PTG Energy Public Company Limited,Bangkok,(Information & Communication Technology),1d ago


In [19]:
all_jobs = []

page_number = 1
previous_content = ""

while True:
    url = f"https://th.jobsdb.com/data-jobs?page={page_number}"
    response = requests.get(url)

    if response.status_code != 200:
        print("Failed to retrieve page")
        break

    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract job titles
    job_title_elements = soup.find_all('a', {'data-automation':'jobTitle'})
    job_titles = [div.get_text(strip=True) for div in job_title_elements]

    # Extract company names
    company_name_elements = soup.find_all('a', {'data-automation': 'jobCompany'})
    company_names = [a.get_text(strip=True) for a in company_name_elements]

    # Extract locations
    location_elements = soup.find_all('a', {'data-automation': 'jobLocation'})
    locations = [a.get_text(strip=True) for a in location_elements]

    # Extract industry
    industry_elements = soup.find_all('a',{'data-automation':'jobClassification'})
    industries = [a.get_text(strip=True) for a in industry_elements]

    # Extract time (job posting date)
    time_elements = soup.find_all('span',{'data-automation':'jobListingDate'})
    times = [a.get_text(strip=True) for a in time_elements]

    # Check if extraction was successful
    if len(job_titles) == 0 or len(company_names) == 0 or len(locations) == 0 or len(industries) == 0 or len(times) == 0:
        print(f"No new jobs found on page {page_number}, stopping.")
        break
        
    page_data = list(zip(job_titles, company_names, locations, industries, times))
    all_jobs.extend(page_data)

    # Compare content to determine if the page is the same
    current_content = "".join(job_titles)
    if current_content == previous_content:
        print(f"No new content found on page {page_number}, stopping the loop.")
        break

    # Success Message
    print(f"Successfully scraped page {page_number}")

    previous_content = current_content
    page_number += 1

# Scraped dataframe name 'jobsdb_data'
jobsdb_data = pd.DataFrame(all_jobs, columns=['job_title', 'company_name', 'location', 'industry', 'time'])

Successfully scraped page 1
Successfully scraped page 2
Successfully scraped page 3
Successfully scraped page 4
Successfully scraped page 5
Successfully scraped page 6
Successfully scraped page 7
Successfully scraped page 8
Successfully scraped page 9
Successfully scraped page 10


KeyboardInterrupt: 

In [None]:
jobsdb_data.shape

NameError: name 'jobsdb_data' is not defined

In [None]:
jobsdb_data.isnull().sum()

NameError: name 'jobsdb_data' is not defined

In [None]:
jobsdb_data['company_name'].value_counts()

company_name
Agoda Services Co., Ltd.                                    326
PERSOLKELLY HR Services Recruitment (Thailand) Co., Ltd.    158
Company Confidential                                        117
Western Digital (Thailand) Co., Ltd.                         89
TikTok Pte. Ltd.                                             80
                                                           ... 
NISSAN MOTOR ASIA PACIFIC (THAILAND) CO., LTD.                1
Syaqua Siam Co., Ltd.                                         1
SPIE Global Services Energy (Thailand) Co., Ltd.              1
Herba Bangkok Sociedad Limitada                               1
Thai Christian School (TCS)                                   1
Name: count, Length: 1893, dtype: int64

In [None]:
jobsdb_data['location'].value_counts()

location
Bangkok                       3327
Chon Buri                      201
Pathum Wan                     170
Samut Prakan                   168
Sathon                         144
                              ... 
Mueang Nakhon Si Thammarat       1
Chai Prakan                      1
Mueang Nakhon Ratchasima         1
Nakhon Luang                     1
Phetchabun                       1
Name: count, Length: 158, dtype: int64

In [None]:
jobsdb_data['location'].nunique()

158

In [None]:
jobsdb_data['location'].unique().tolist()

['Bang Rak',
 'Bangkok',
 'Samut Prakan',
 'Chatuchak',
 'Phra Khanong',
 'Phaya Thai',
 'Bang Kho Laem',
 'Ratchathewi',
 'Pathum Wan',
 'Lak Si',
 'Yan Nawa',
 'Bang Na',
 'Rayong',
 'Mueang Samut Prakan',
 'Khlong Toei',
 'Mueang Nonthaburi',
 'Nonthaburi',
 'Bang Kapi',
 'Lat Phrao',
 'Sathon',
 'Huai Khwang',
 'Bangkok Metropolitan Region',
 'Chon Buri',
 'Din Daeng',
 'Vadhana',
 'Thung Khru',
 'Bang Yai',
 'Bang Sao Thong',
 'Don Mueang',
 'Pathum Thani',
 'Saphan Sung',
 'Ayutthaya',
 'Dusit',
 'Wang Noi',
 'Chachoengsao',
 'Pak Kret',
 'Bangkok Noi',
 'Bang Phlat',
 'Suan Luang',
 'Sung Noen',
 'Nakhon Ratchasima',
 'Khon Kaen',
 'Bueng Kum',
 'Prawet',
 'Sattahip',
 'Phasi Charoen',
 'Thanyaburi',
 'Khlong San',
 'Others',
 'Surin',
 'Si Maha Phot',
 'Prachin Buri',
 'Min Buri',
 'Khlong Luang',
 'Mueang Pathum Thani',
 'Sam Phran',
 'Nakhon Pathom',
 'Phuket',
 'Mueang Lamphun',
 'Lamphun',
 'Lat Krabang',
 'Bang Phli',
 'Ban Chang',
 'Si Racha',
 'Pluak Daeng',
 'Samut Song

## Jobtopgun

In [3]:
jobtopgun_data_1st = 'https://www.jobtopgun.com/en/jobs?keywords=data'
jobtopgun_data_morepages = '&page=2'
jobtopgun_data_secondp = jobtopgun_data_1st + jobtopgun_data_morepages

In [3]:
driver_jobtopgun = webdriver.Chrome()  
jobtopgun_jobs_data = []

# Starting page number
page_number = 1

while True:
    # Construct URL with the current page number
    url = f'https://www.jobtopgun.com/en/jobs?keywords=data&page={page_number}'
    print(f"Navigating to jobtopgun 'data' search page number: {page_number}")
    
    try:
        # Navigate to the webpage
        driver_jobtopgun.get(url)
        wait = WebDriverWait(driver_jobtopgun, 10)
        original_window = driver_jobtopgun.current_window_handle  # Hold Current windows

        jobtopgun_data_firstpage = []
        i = 1  # Start with the first job listing

        while True:
            xpath = f'//*[@id="scrollable-job-cards-container"]/a[{i}]/div[1]'
            try:
                # Wait for the element to be clickable
                element = wait.until(EC.element_to_be_clickable((By.XPATH, xpath)))
                element.click()

                # Wait for the new window/tab to open and switch to it
                wait.until(EC.number_of_windows_to_be(2))
                new_window = [window for window in driver_jobtopgun.window_handles if window != original_window][0]
                driver_jobtopgun.switch_to.window(new_window)

                # Scrape data from the job detail page
                try:
                    # Details to scrape data
                    job_title = driver_jobtopgun.find_element(By.XPATH, '//h1[@class="font-medium text-sub-primary text-lg"]').text
                    company_name = driver_jobtopgun.find_element(By.XPATH, '//span[@class="flex-1 font-medium text-lg"]').text
                    industry = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[1]/div/div[1]/div[1]/section/div[1]/span').text
                    posted_time = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[1]/div/div[1]/div[1]/section/div/span').text
                    experience = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[2]/span').text
                    salary = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[2]/span').text
                    education = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[4]/span').text
                    location = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[5]/span').text
                    responsibility = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[2]/div').text
                    requirements = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[3]').text
                    welfare_and_benefits = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[4]').text
                    
                    job_info = {
                        "job_title": job_title,
                        "company_name": company_name,
                        "industry": industry,
                        "job_url": driver_jobtopgun.current_url,
                        "posted_time": posted_time,
                        "experience": experience,
                        "salary": salary,
                        "education": education,
                        "location": location,
                        "responsibility": responsibility,
                        "requirements": requirements,
                        "welfare_and_benefits": welfare_and_benefits
                    }
                    jobtopgun_data_firstpage.append(job_info)
                    print(f"Scraped data for job [{i}]: {job_info}")
                except Exception as e:
                    print(f"Failed to scrape data for job [{i}]: {e}")
                
                driver_jobtopgun.close()
                driver_jobtopgun.switch_to.window(original_window)

            except Exception as e:
                print(f"Failed to handle element a[{i}]: {e}")
                break  # Exit the loop if element is not found or not clickable

            i += 1  # Move to the next job listing

        # If no jobs are found, assume there are no more pages to scrape
        if not jobtopgun_data_firstpage:
            print(f"No data found on page {page_number}. Stopping.")
            break

        # Append the current page's data to the overall data list
        jobtopgun_jobs_data.extend(jobtopgun_data_firstpage)

        # Increment page number to move to the next page
        page_number += 1

    except Exception as main_exception:
        print(f"Error while navigating to page {page_number}: {main_exception}")
        break

driver_jobtopgun.quit()

print(f"Scraping complete. Total jobs scraped: {len(jobtopgun_jobs_data)}")


Navigating to jobtopgun 'data' search page number: 1
Scraped data for job [1]: {'job_title': 'A Data Protection Officer (DPO)', 'company_name': 'Sri Trang Agro-Industry Public Co., Ltd.', 'industry': 'Today', 'job_url': 'https://www.jobtopgun.com/en/job/4084/505', 'posted_time': 'Today', 'experience': '1 - 3 Year', 'salary': '1 - 3 Year', 'education': "Master's Degree", 'location': 'Negotiable', 'responsibility': "1. Ensure Compliance: Monitor and ensure the organization's compliance with data protection laws, regulations, and policies. 2. Advisory Role: Act as an advisor to the organization and its employees on matters related to data protection and privacy. 3. Risk Assessment: Conduct risk assessments to identify and evaluate data protection risks associated with the processing of personal data. 4. Policy Development: Develop and implement data protection policies, procedures, and guidelines to ensure adherence to legal requirements and best practices. 5. Training and Awareness: Prov

KeyboardInterrupt: 

In [13]:
jobtopgun_jobs_data = pd.DataFrame(jobtopgun_jobs_data)

In [14]:
jobtopgun_jobs_data

Unnamed: 0,job_title,company_name,industry,job_url,posted_time,experience,salary,education,location,responsibility,requirements,welfare_and_benefits
0,A Data Protection Officer (DPO),"Sri Trang Agro-Industry Public Co., Ltd.","Sri Trang Agro-Industry Public Co., Ltd.",https://www.jobtopgun.com/en/job/4084/505,Today,1 - 3 Year,1 - 3 Year,Master's Degree,Negotiable,1. Ensure Compliance: Monitor and ensure the o...,Requirements\nBachelor's or Master's degree in...,Welfare and Benefits\nMedical insurance\nProvi...
1,Data Governance and Data Analytic Specialist,"Isuzu Motors (Thailand) Co.,Ltd.","Isuzu Motors (Thailand) Co.,Ltd.",https://www.jobtopgun.com/en/job/1309/274,Today,10 - 20 Year,10 - 20 Year,Bachelor's Degree or Higher,Negotiable,Key Responsibilities\n• Management and promote...,Requirements\nProfile & Qualifications Require...,Welfare and Benefits\nAnnual leave maximum 14 ...
2,Data analysis officer,"Isuzu Motors (Thailand) Co.,Ltd.","Isuzu Motors (Thailand) Co.,Ltd.",https://www.jobtopgun.com/en/job/1309/273,Today,0 - 3 Year,0 - 3 Year,Bachelor's Degree,Negotiable,Key Responsibilities\n• Data analysis\n- Analy...,Requirements\nProfile & Qualifications Require...,Welfare and Benefits\nAnnual leave maximum 14 ...
3,เจ้าหน้าที่ Data Support / Data Mining,Advice IT Infinite PCL,Advice IT Infinite PCL,https://www.jobtopgun.com/en/job/20196/92,Today,1 Year,1 Year,Bachelor's Degree,"18,000 - 20,000 baht/month + Commission",- Maintenance ดูแลฐานข้อมูล\n- Validate ตรวจสอ...,Requirements\n- ไม่จำกัดเพศ อายุ 22-30 ปี\n- ว...,Welfare and Benefits\nProvident Fund\nStaff tr...
4,Data Engineer,Advice IT Infinite PCL,Advice IT Infinite PCL,https://www.jobtopgun.com/en/job/20196/94,Today,1 - 3 Year,1 - 3 Year,Bachelor's Degree,"18,000 - 20,000 baht/month + Commission",- ออกแบบ สร้าง ดูแลระบบการจัดเก็บข้อมูล การประ...,Requirements\n- ไม่จำกัดเพศ อายุ 23 - 30 ปี\n-...,Welfare and Benefits\nProvident Fund\nStaff tr...
...,...,...,...,...,...,...,...,...,...,...,...,...
460,พนักงานคีย์ข้อมูล,"Honda Trading Asia Co., Ltd.","Honda Trading Asia Co., Ltd.",https://www.jobtopgun.com/en/job/5351/258,Trading/Import/Export,0 - 3 Year,0 - 3 Year,Diploma of Vocational Education or higher,"10,000 - 12,000 baht/month",บริษัท ฮอนด้า เทรดดิ้ง เอเชีย จำกัด ดำเนินธุรก...,Requirements\nเพศชายหรือหญิง อายุระหว่าง 21-35...,Welfare and Benefits\nFlexible Working Hour (0...
461,O&M Engineer Solar farm (Lopburi),Sermsang Power Corporation PLC.,Sermsang Power Corporation PLC.,https://www.jobtopgun.com/en/job/34210/89,Petroleum/Energy/Mining,0 - 2 Year,0 - 2 Year,Bachelor's Degree or Higher,Negotiable,• Maintaining site performances in terms of Pe...,Requirements\n• Bachelor Bachelor’s Degree in ...,Welfare and Benefits\nLife insurance\nProviden...
462,Sales Engineer (chachoengsao),"Bangkok Cable Co., Ltd.","Bangkok Cable Co., Ltd.",https://www.jobtopgun.com/en/job/17794/43,02/09/2024,2 - 5 Year,2 - 5 Year,Bachelor's Degree,Negotiable,• Searching for new clients who could benefit ...,Requirements\n• BA/BE Degree in Business Admin...,Welfare and Benefits\nAnnual salary adjustment...
463,เจ้าหน้าที่พัฒนาธุรกิจ(CSR) เจ้าหน้าที่ธนบดีธน...,Bank of Ayudhya PLC (BAY),Bank of Ayudhya PLC (BAY),https://www.jobtopgun.com/en/job/3349/2125,Financial/Banking/Securities,2 - 20 Year,2 - 20 Year,Bachelor's Degree,Negotiable + Commission,นำเสนอ ให้คำแนะนำและปิดการขายในผลิตภัณฑ์และบริ...,Requirements\n1.จบวุฒิการศึกษาระดับปริญญาตรีขึ...,Welfare and Benefits\nProvident Fund\nStaff tr...


## Job Bkk

In [6]:
search_word = 'data'

In [7]:
driver_jobbkk = webdriver.Chrome()  
jobbkk_jobs_data = []

In [18]:
# Initialize WebDriver and WebDriverWait
driver_jobbkk = webdriver.Chrome()
wait = WebDriverWait(driver_jobbkk, 10)  # Adjust the timeout as needed

# Open the webpage
driver_jobbkk.get("https://www.jobbkk.com/jobs/lists/1/%E0%B8%AB%E0%B8%B2%E0%B8%87%E0%B8%B2%E0%B8%99,data,%E0%B8%97%E0%B8%B8%E0%B8%81%E0%B8%88%E0%B8%B1%E0%B8%87%E0%B8%AB%E0%B8%A7%E0%B8%B1%E0%B8%94,%E0%B8%97%E0%B8%B1%E0%B9%89%E0%B8%87%E0%B8%AB%E0%B8%A1%E0%B8%94.html?keyword_type=3&member_user_id=1")

# Store the original window handle
original_window = driver_jobbkk.current_window_handle

# Wait for the element to be present and then scroll into view
element = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[7]/article/section/div[1]/div[6]/div/div[2]/div[2]/div/div[1]')))
driver_jobbkk.execute_script("arguments[0].scrollIntoView();", element)

# Wait for the link that opens in a new tab to be clickable and click it
wait = WebDriverWait(driver_jobbkk, 10)
link = wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/section[7]/article/section/div[1]/div[6]/div/div[3]/div/ul/li[3]/a')))
link.click()

# test to scrape data
company_name = driver_jobtopgun.find_element(By.XPATH,'/html/body/section[5]/article/div/article/section[2]/article[1]/div/div[2]/p').text
jobbkk_info = {'company_name':company_name}

driver_jobbkk.back()

TimeoutException: Message: 


In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize WebDriver and WebDriverWait
driver_jobbkk = webdriver.Chrome()
wait = WebDriverWait(driver_jobbkk, 10)  # Adjust the timeout as needed

try:
    # Open the webpage
    driver_jobbkk.get("https://www.jobbkk.com/jobs/lists/1/%E0%B8%AB%E0%B8%B2%E0%B8%87%E0%B8%B2%E0%B8%99,data,%E0%B8%97%E0%B8%B8%E0%B8%81%E0%B8%88%E0%B8%B1%E0%B8%87%E0%B8%AB%E0%B8%A7%E0%B8%B1%E0%B8%94,%E0%B8%97%E0%B8%B1%E0%B8%87%E0%B8%AB%E0%B8%A1%E0%B8%94.html?keyword_type=3&member_user_id=1")

    # Store the original window handle
    original_window = driver_jobbkk.current_window_handle

    # Wait for the element to be present and then scroll into view
    element = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[7]/article/section/div[1]/div[6]/div/div[2]/div[2]/div/div[1]')))
    driver_jobbkk.execute_script("arguments[0].scrollIntoView();", element)

    # Wait for the link to be present and then use JavaScript to open it in a new tab
    link = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[7]/article/section/div[1]/div[6]/div/div[3]/div/ul/li[3]/a')))
    driver_jobbkk.execute_script("window.open(arguments[0].href);", link)

    # Wait for new window/tab to open
    time.sleep(5)  # Ensure the new tab has time to open
    windows = driver_jobbkk.window_handles
    if len(windows) == 2:
        new_window = [window for window in windows if window != original_window][0]
        driver_jobbkk.switch_to.window(new_window)

        # Test to scrape data
        try:
            company_name = driver_jobbkk.find_element(By.XPATH, '/html/body/section[5]/article/div/article/section[2]/article[1]/div/div[2]/p').text
            jobbkk_info = {'company_name': company_name}
            print(jobbkk_info)
        except Exception as e:
            print(f"Error scraping data: {e}")

    else:
        print("Expected a new window to open, but it did not.")

finally:
    # Close the browser
    driver_jobbkk.quit()


{'company_name': 'Data Engineer รับสมัครด่วน'}


In [22]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize WebDriver and WebDriverWait
driver_jobbkk = webdriver.Chrome()
wait = WebDriverWait(driver_jobbkk, 10)  # Adjust the timeout as needed

try:
    # Step 1: Open the main URL
    driver_jobbkk.get("https://www.jobbkk.com/jobs/lists/1/%E0%B8%AB%E0%B8%B2%E0%B8%87%E0%B8%B2%E0%B8%99,data,%E0%B8%97%E0%B8%B8%E0%B8%81%E0%B8%88%E0%B8%B1%E0%B8%87%E0%B8%AB%E0%B8%A7%E0%B8%B1%E0%B8%94,%E0%B8%97%E0%B8%B1%E0%B8%87%E0%B8%AB%E0%B8%A1%E0%B8%94.html?keyword_type=3&member_user_id=1")

    # Step 2: Click the element to open a new tab
    element_to_open_new_tab = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[7]/article/section/div[1]/div[6]/div/div[3]/div/ul/li[3]/a')))
    driver_jobbkk.execute_script("window.open(arguments[0].href);", element_to_open_new_tab)

    # Step 3: Switch to the new tab
    WebDriverWait(driver_jobbkk, 10).until(lambda d: len(d.window_handles) == 2)  # Wait for the new tab to open
    original_window = driver_jobbkk.current_window_handle
    new_window = [window for window in driver_jobbkk.window_handles if window != original_window][0]
    driver_jobbkk.switch_to.window(new_window)

    # Step 4: Scrape data from the new tab
    try:
        company_name = driver_jobbkk.find_element(By.XPATH, '/html/body/section[5]/article/div/article/section[2]/article[1]/div/div[2]/p').text
        jobbkk_info = {'company_name': company_name}
        print(jobbkk_info)
    except Exception as e:
        print(f"Error scraping data: {e}")

    # Step 5: Close the new tab and switch back to the main tab
    driver_jobbkk.close()
    driver_jobbkk.switch_to.window(original_window)

finally:
    # Close the browser
    driver_jobbkk.quit()


{'company_name': 'Data Engineer รับสมัครด่วน'}


In [None]:
/html/body/section[7]/article/section/div[1]/div[6]/div/div[3]/div/ul/li[3]/a

In [None]:
/html/body/section[7]/article/section/div[1]/div[8]/div/div[3]/div/ul/li[3]/a

In [None]:
/html/body/section[7]/article/section/div[1]/div[10]/div/div[3]/div/ul/li[3]/a

In [None]:
/html/body/section[7]/article/section/div[1]/div[24]/div/div[3]/div/ul/li[3]/a

In [14]:
company_name = driver_jobtopgun.find_element(by.XPATH,'/html/body/section[5]/article/div/article/section[2]/article[1]/div/div[2]/p')
jobbkk_info = {'company_name':company_name}

SyntaxError: invalid syntax (2421741417.py, line 2)

In [None]:
job_title = driver_jobtopgun.find_element(By.XPATH, '//h1[@class="font-medium text-sub-primary text-lg"]').text
                    company_name = driver_jobtopgun.find_element(By.XPATH, '//span[@class="flex-1 font-medium text-lg"]').text
                    industry = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[1]/div/div[1]/div[1]/section/div[1]/span').text
                    posted_time = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[1]/div/div[1]/div[1]/section/div/span').text
                    experience = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[2]/span').text
                    salary = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[2]/span').text
                    education = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[4]/span').text
                    location = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[1]/div/div[5]/span').text
                    responsibility = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[2]/div').text
                    requirements = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[3]').text
                    welfare_and_benefits = driver_jobtopgun.find_element(By.XPATH, '//*[@id="job-details"]/div[2]/section[4]').text
                    
                    job_info = {
                        "job_title": job_title,
                        "company_name": company_name,
                        "industry": industry,
                        "job_url": driver_jobtopgun.current_url,
                        "posted_time": posted_time,
                        "experience": experience,
                        "salary": salary,
                        "education": education,
                        "location": location,
                        "responsibility": responsibility,
                        "requirements": requirements,
                        "welfare_and_benefits": welfare_and_benefits
                    }