In [2]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException
from time import sleep

# Define .csv

In [3]:
import pandas as pd

# Define columns for the DataFrame
columns = [
    'company_name', 
    'recruiter_name',
    'job_title', 
    'location', 
    'employment_type',
    'job_level', 
    'job_function', 
    'salary_lower', 
    'salary_upper', 
    'salary_period',
    'num_applications', 
    'posted_date',
    'address',
    'experience',
    'closing_date',
    'job_description'
]

# Create an empty DataFrame with the defined columns
jobs = pd.DataFrame(columns=columns)

# Functions

In [4]:
# Define dictionaries/lists for categories
locations = ['Central', 'East', 'North', 'South', 'West', 'Islandwide']
employment_types = ['Permanent', 'Full Time', 'Part Time', 'Contract', 'Flexi-work', 'Temporary', 'Freelance', 'Internship/Attachment']
job_levels = ['Senior Management', 'Middle Management', 'Manager', 'Professional', 'Senior Executive', 'Executive', 'Junior Executive', 'Non-executive', 'Fresh/Entry Level']

# Example strings
example_strings = [
    "CentralContractJunior ExecutiveEngineering ...",
    "IslandwideContract ...Fresh/Entry LevelEducation And Training ...",
    "SouthInternship/AttachmentFresh/Entry LevelHealthcare / Pharmaceutical",
    "IslandwideContract ...ProfessionalBanking And Finance ...",
    "Contract ...ProfessionalBanking And Finance ..."
]

# Function to split the string and determine values for each category
def parse_contract_type(string):
    clean_str = string.replace(" ...", "")
    res_location, res_employment_type, res_job_level, res_job_function = None, None, None, None
    for location in locations:
        if location in clean_str:
            res_location = location
            clean_str = clean_str.replace(location, '')
            break
    for employment_type in employment_types:
        if employment_type in clean_str:
            res_employment_type = employment_type
            clean_str = clean_str.replace(employment_type, '')
            break
    for job_level in job_levels:
        if job_level in clean_str:
            res_job_level = job_level
            clean_str = clean_str.replace(job_level, '')
            break
    res_job_function = clean_str.strip()

    return res_location, res_employment_type, res_job_level, res_job_function  

# Print test
for i in example_strings:
    print(list(parse_contract_type(i)))

['Central', 'Contract', 'Executive', 'Junior Engineering']
['Islandwide', 'Contract', 'Fresh/Entry Level', 'Education And Training']
['South', 'Internship/Attachment', 'Fresh/Entry Level', 'Healthcare / Pharmaceutical']
['Islandwide', 'Contract', 'Professional', 'Banking And Finance']
[None, 'Contract', 'Professional', 'Banking And Finance']


In [5]:
def parse_salary_range(salary_range):
    # Extract upper and lower salary values
    salary_values = re.findall(r'(\d+(?:,\d+)?)', salary_range)
    salary_lower = int(salary_values[0].replace(',', '')) if salary_values else None
    salary_upper = int(salary_values[1].replace(',', '')) if len(salary_values) > 1 else None
    
    return salary_lower, salary_upper

In [6]:
import re

def parse_applications(applications):
    if applications:
        # Extract only the integer number
        match = re.search(r'\d+', applications)
        num_applications = int(match.group()) if match else None
    else:
        num_applications = None
    return num_applications

In [7]:
'''
GMP RECRUITMENT SERVICES (S) PTE LTD
Junior Data Scientist
CentralContractJunior ExecutiveEngineering ...
0% skills matched
$3,500to$7,000
Monthly
0 application
Posted yesterday
'''

def store_card_info(card, row_no):
    # Split the card information into lines
    lines = card.split('\n')
    
    # Extract relevant information
    company_name, recruiter_name, job_title, contract_type, salary_range, salary_period, applications, posted_date = (None, None, None, None, None, None, None, None)
    if(len(lines) > 8): 
        '''
        for cases such as
        PETROS-CONSULTING PTE. LTD.RECRUITER
        for ST ENGINEERING IHQ PTE. LTD.
        '''
        recruiter_name = lines[0].replace("RECRUITER", "")
        company_name = lines[1].replace("for ", "")
        job_title = lines[2]
        contract_type = lines[3]
        # skills_match = lines[4]
        salary_range = lines[5]
        salary_period = lines[6]
        applications = None #lines[7]
        posted_date = None #lines[8]
    else:
        company_name = lines[0]
        recruiter_name = None
        job_title = lines[1]
        contract_type = lines[2]
        # skills_match = lines[3]
        salary_range = lines[4]
        salary_period = lines[5]
        applications = None #lines[6]
        posted_date = None #lines[7]
    
    # Parse contract_type
    location, employment_type, job_level, job_function = parse_contract_type(contract_type)
    
    # Parse salary_range
    salary_lower, salary_upper = parse_salary_range(salary_range)
    
    # Parse applications
    num_applications = applications #parse_applications(applications)

    # Initialize additional columns
    address = None
    experience = None
    closing_date = None
    job_description = None
    
    # Write
    jobs.loc[row_no] = [
        company_name, 
        recruiter_name,
        job_title, 
        location, 
        employment_type,
        job_level, 
        job_function, 
        salary_lower, 
        salary_upper, 
        salary_period,
        num_applications, 
        posted_date,
        address,
        experience,
        closing_date,
        job_description
    ]

In [8]:
def parse_posting_info(posting_info):
    # Define the pattern to match "applicationsPosted" and "Closing on" as separators
    pattern = r'( applicationPosted | applicationsPosted |Closing on )'

    applications, filler_1, posted_date, filler_2, closing_date = re.split(pattern, posting_info)

    return applications, posted_date, closing_date

posting_info_eg = [
    "4 applicationsPosted 30 Mar 2024Closing on 29 Apr 2024",
    "30 applicationsPosted 29 Mar 2024Closing on 05 Apr 2024",
    "240 applicationsPosted 29 Mar 2024Closing on 28 Apr 2024",
    "0 applicationPosted 31 Mar 2024Closing on 30 Apr 2024",
    "1 applicationPosted 31 Mar 2024Closing on 30 Apr 2024"
]

# Split each string and print the results
for string in posting_info_eg:
    print(parse_posting_info(string))

('4', '30 Mar 2024', '29 Apr 2024')
('30', '29 Mar 2024', '05 Apr 2024')
('240', '29 Mar 2024', '28 Apr 2024')
('0', '31 Mar 2024', '30 Apr 2024')
('1', '31 Mar 2024', '30 Apr 2024')


## Web Scrape function

In [9]:
from random import uniform
# uniform(0.1, 1)

In [10]:
from datetime import datetime
# datetime.today() > datetime.strptime('01 Jan 2024', '%d %b %Y')

In [11]:
def scrape_mycareersfuture(search_term, furthest_date='01 Jan 2023'):
    '''
    furthest_date format: DD Mmm YYYY
    sleep to avoid kick out as bot
    '''
    driver = webdriver.Chrome()
    wait = WebDriverWait(driver, 1)

    page_no = 0
    driver.get(f"https://www.mycareersfuture.gov.sg/search?search={search_term}&sortBy=new_posting_date&page={page_no}")
    row_no = -1
    proceed_to_next_page = True
    furthest_date = datetime.strptime(furthest_date, '%d %b %Y')
    cur_date = datetime.today()
    while proceed_to_next_page and cur_date > furthest_date:
        sleep(uniform(1, 5))
        for card_id in range(22):
            sleep(uniform(1, 2))
            try:
                print(f"ROW NO: {row_no}, CARD NO: {card_id}, PAGE NO: {page_no}")
                card_element = wait.until(EC.element_to_be_clickable((By.ID, f"job-card-{card_id}")))
                card = card_element.text
                # print(card)
                row_no += 1
                store_card_info(card, row_no)
                card_element.click() # wait.until(EC.element_to_be_clickable((By.ID, f"job-card-{card_id}"))).click() # driver.find_element(By.ID, f"job-card-{card_id}").click()
                sleep(uniform(1, 3))
                try:
                    job_description = wait.until(EC.presence_of_element_located((By.ID, "job_description"))).text
                    jobs.at[row_no, 'job_description'] = job_description
                    # print(job_description)
                except:
                    print("no job description")

                try:
                    posting_info = wait.until(EC.presence_of_element_located((By.XPATH, "//section[@id='job-details']/div[@class='w-70-l w-60-ms w-100 pr2-l pr2-ms relative']/div[@class='bg-white pa4'][1]/div[@class='jobInfo w-100 dib v-top relative']/section[2]"))).text
                    # print(posting_info)
                    applications, posted_date, closing_date = parse_posting_info(posting_info)

                    cur_date = datetime.strptime(posted_date, '%d %b %Y')

                    jobs.at[row_no, 'num_applications'] = applications
                    jobs.at[row_no, 'posted_date'] = posted_date
                    jobs.at[row_no, 'closing_date'] = closing_date
                    # print(f"{posted_date}, {closing_date}, {applications}")
                except:
                    print("no posted info")

                try: # <p data-testid="job-details-info-min-experience" class="black-80 f6 fw4 mv1 dib pr3 mr1 icon-bw-period">2 years exp</p>
                    experience = wait.until(EC.presence_of_element_located((By.XPATH, "//p[@data-testid='job-details-info-min-experience']"))).text.replace(" years exp", "").replace(" year exp", "")
                    jobs.at[row_no, 'experience'] = experience
                    # print(experience)
                except:
                    print("no experience")

                try:            
                    address = wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='db mt3 mb4']/section[@class='w-100 pb3 pb0-ns lh-copy']/p[@class='black-80 f6 fw4 mv1 dib pr3 mr1 icon-bw-location']/a[@class='link brand-sec underline-hover']"))).text
                    jobs.at[row_no, 'address'] = address
                    # print(address)
                except:
                    print("no address")
                
                driver.back()
            except TimeoutException as err:
                print(f"reached card no: {card_id}, error: {err.msg}")
                break            
                
        try:
            print('...next page')
            wait.until(EC.element_to_be_clickable((By.XPATH, "//section[@id='search-results']/div[@class='tc pv3']/button[@class='f5-5 pv2 ph3 mh1 dib black-80 hover-bg-white pointer'][3]"))).click()
            page_no += 1
        except TimeoutException as err:
            '''no more pages left'''
            proceed_to_next_page = False
    print("no more jobs to scrape")
    driver.quit()
    

In [12]:
# driver = webdriver.Chrome()
# wait = WebDriverWait(driver, 3)
# search_term = "hrwoerowhefo" 
# page_no = 0
# driver.get(f"https://www.mycareersfuture.gov.sg/search?search={search_term}&sortBy=new_posting_date&page={page_no}")
# try:
#     wait.until(EC.element_to_be_clickable((By.XPATH, "//section[@id='search-results']/div[@class='tc pv3']/button[@class='f5-5 pv2 ph3 mh1 dib black-80 hover-bg-white pointer'][3]"))).click()
#     sleep(5)
# except TimeoutException as err:
#     print(err.msg)
# driver.quit()

# Execute scraping

In [57]:
jobs = pd.DataFrame(columns=columns)

## AI

In [58]:
scrape_mycareersfuture("ai")

ROW NO: -1, CARD NO: 0, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 0, CARD NO: 1, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 1, CARD NO: 2, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 2, CARD NO: 3, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 3, CARD NO: 4, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 4, CARD NO: 5, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 5, CARD NO: 6, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 6, CARD NO: 7, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 7, CARD NO: 8, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 8, CARD NO: 9, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 9, CARD NO: 10, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 10, CARD NO: 11, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 11, CARD NO: 12, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 12, CARD NO: 13, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 13, CARD NO: 14, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 14, CARD NO: 15, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 15, CARD NO: 16, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 16, CARD NO: 17, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 17, CARD NO: 18, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 18, CARD NO: 19, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 19, CARD NO: 20, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 20, CARD NO: 21, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
...next page
ROW NO: 21, CARD NO: 0, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 22, CARD NO: 1, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 23, CARD NO: 2, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 24, CARD NO: 3, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 25, CARD NO: 4, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 26, CARD NO: 5, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 27, CARD NO: 6, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 28, CARD NO: 7, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 29, CARD NO: 8, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 30, CARD NO: 9, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 31, CARD NO: 10, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 32, CARD NO: 11, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
no address
ROW NO: 33, CARD NO: 12, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 34, CARD NO: 13, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 35, CARD NO: 14, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 36, CARD NO: 15, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 37, CARD NO: 16, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 38, CARD NO: 17, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 39, CARD NO: 18, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 40, CARD NO: 19, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
ROW NO: 41, CARD NO: 20, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 42, CARD NO: 21, PAGE NO: 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no experience
...next page
ROW NO: 43, CARD NO: 0, PAGE NO: 2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 44, CARD NO: 1, PAGE NO: 2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 45, CARD NO: 2, PAGE NO: 2
reached card no: 2, error: 
...next page
no more jobs to scrape


In [59]:
jobs

Unnamed: 0,company_name,recruiter_name,job_title,location,employment_type,job_level,job_function,salary_lower,salary_upper,salary_period,num_applications,posted_date,address,experience,closing_date,job_description
0,SGMP PTE. LTD.,,Creative AI Website Designer (AI Technology Fo...,East,Contract,Professional,Customer Service,2000,3000,Monthly,0,31 Mar 2024,"ESR BIZPARK @ CHAI CHEE, 750C CHAI CHEE ROAD 4...",2.0,30 Apr 2024,Roles & Responsibilities\nJob Overview:\nWe ar...
1,ONEBYTE PTE. LTD.,,AI Engineer,East,Permanent,Professional,Engineering,6000,8000,Monthly,12,28 Mar 2024,"UB POINT, 61 UBI AVENUE 1 408941",3.0,18 Apr 2024,Roles & Responsibilities\nResponsibilities:\n\...
2,INTEL TECHNOLOGY ASIA PTE LTD,Recommended based on your skills & job applica...,Technical Sales Specialist (TSS): AI for Edge,Central,Permanent,Senior Management,Information Technology10 Years Exp,15000,30000,Monthly,23,13 Mar 2024,"Aperia Tower, 10 Kallang Avenue 339510",10.0,12 Apr 2024,Roles & Responsibilities\nProvides technical e...
3,KNOVEL ENGINEERING PTE. LTD.,,Software Developer (AI Solutions),Central,Permanent,Executive,Engineering,5500,9000,Monthly,54,28 Mar 2024,"TAI SENG CENTRE, 3 IRVING ROAD 369522",,27 Apr 2024,Roles & Responsibilities\nDescription:\nYou wi...
4,GOOGLE ASIA PACIFIC PTE. LTD.,,"Software Engineer III, AI / Machine Learning, ...",South,Full Time,Professional,Information Technology,7000,14000,Monthly,19,28 Mar 2024,"Google Singapore, 70 Pasir Panjang Rd 117371",2.0,11 Apr 2024,Roles & Responsibilities\nPRODUCT AREA\nWhethe...
5,SAP ASIA PTE. LTD.,,Associate AI Scientist / AI Scientist – Artifi...,South,Permanent,Executive,Information Technology,6000,9000,Monthly,16,28 Mar 2024,"MAPLETREE BUSINESS CITY, 30 PASIR PANJANG ROAD...",,27 Apr 2024,Roles & Responsibilities\n*SAP will be priorit...
6,PENTAS VISION PTE. LTD.,,AI Researcher,East,Full Time,Professional,Sciences / Laboratory / R&D,7500,15000,Monthly,9,28 Mar 2024,"THE PLAZA, 7500A BEACH ROAD 199591",3.0,27 Apr 2024,Roles & Responsibilities\nJob Responsibilities...
7,A*STAR RESEARCH ENTITIES,,"Scientist / Senior Scientist, AI, ARTC",West,Contract,Professional,Sciences / Laboratory / R&D,5900,11800,Monthly,2,27 Mar 2024,"Cleantech Two, 3 Cleantech Loop, #01/01 637143",5.0,26 Apr 2024,Roles & Responsibilities\nAbout the role\nThe ...
8,A*STAR RESEARCH ENTITIES,,"Scientist / Senior Scientist, AI, ARTC",West,Contract,Professional,Sciences / Laboratory / R&D,4750,9500,Monthly,1,27 Mar 2024,"Cleantech Two, 3 Cleantech Loop, #01/01 637143",2.0,26 Apr 2024,Roles & Responsibilities\nAbout the role\nThe ...
9,RANDSTAD PTE. LIMITED,,Sales Manager- AI Solutions -Enterprise Clients,Central,Full Time,Manager,Information Technology,8000,10000,Monthly,9,27 Mar 2024,"ONE RAFFLES PLACE, 1 RAFFLES PLACE 048616",8.0,26 Apr 2024,Roles & Responsibilities\nAbout my client\nI a...


In [60]:
jobs.to_csv("jobs_ai.csv", encoding='utf-8', index=False)
jobs = pd.DataFrame(columns=columns)

## Data analytics

In [13]:
scrape_mycareersfuture("data%20analytics")

ROW NO: -1, CARD NO: 0, PAGE NO: 0
ROW NO: 0, CARD NO: 1, PAGE NO: 0
no experience
ROW NO: 1, CARD NO: 2, PAGE NO: 0
no experience
no address
ROW NO: 2, CARD NO: 3, PAGE NO: 0
ROW NO: 3, CARD NO: 4, PAGE NO: 0
no address
ROW NO: 4, CARD NO: 5, PAGE NO: 0
no address
ROW NO: 5, CARD NO: 6, PAGE NO: 0
ROW NO: 6, CARD NO: 7, PAGE NO: 0
no address
ROW NO: 7, CARD NO: 8, PAGE NO: 0
no address
ROW NO: 8, CARD NO: 9, PAGE NO: 0
ROW NO: 9, CARD NO: 10, PAGE NO: 0
ROW NO: 10, CARD NO: 11, PAGE NO: 0
ROW NO: 11, CARD NO: 12, PAGE NO: 0
ROW NO: 12, CARD NO: 13, PAGE NO: 0
ROW NO: 13, CARD NO: 14, PAGE NO: 0
no address
ROW NO: 14, CARD NO: 15, PAGE NO: 0
no address
ROW NO: 15, CARD NO: 16, PAGE NO: 0
ROW NO: 16, CARD NO: 17, PAGE NO: 0
no address
ROW NO: 17, CARD NO: 18, PAGE NO: 0
ROW NO: 18, CARD NO: 19, PAGE NO: 0
ROW NO: 19, CARD NO: 20, PAGE NO: 0
reached card no: 20, error: 
...next page
ROW NO: 19, CARD NO: 0, PAGE NO: 1
ROW NO: 20, CARD NO: 1, PAGE NO: 1
no address
ROW NO: 21, CARD NO: 2, P

In [14]:
jobs

Unnamed: 0,company_name,recruiter_name,job_title,location,employment_type,job_level,job_function,salary_lower,salary_upper,salary_period,num_applications,posted_date,address,experience,closing_date,job_description
0,FORTE EMPLOYMENT SERVICES PTE. LTD.,,Senior Finance Executive / Share Service (Data...,Central,Contract,Manager,Accounting / Auditing / Taxation,3000,5000,Monthly,0,31 Mar 2024,"CT HUB, 2 KALLANG AVENUE 339407",2.0,30 Apr 2024,Roles & Responsibilities\nLocation - Irrawaddy...
1,GMP RECRUITMENT SERVICES (S) PTE LTD,,Junior Data Scientist,Central,Contract,Executive,Junior Engineering,3500,7000,Monthly,7,30 Mar 2024,"ONE FINLAYSON GREEN, 1 FINLAYSON GREEN 049246",,29 Apr 2024,Roles & Responsibilities\n⭐Junior Data Scienti...
2,CORNERSTONE GLOBAL PARTNERS PTE. LTD.,,[Healthy Dining Programme] Data Analytic| Up t...,Islandwide,Contract,Fresh/Entry Level,Education And Training,2500,3000,Monthly,3,29 Mar 2024,,,05 Apr 2024,Roles & Responsibilities\nWorking Location: Ce...
3,BIOLOGIC TECHNIK PRIVATE LIMITED,,Data Analyst Intern,South,Internship/Attachment,Fresh/Entry Level,Healthcare / Pharmaceutical,1000,1200,Monthly,240,29 Mar 2024,81 AYER RAJAH CRESCENT 139967,2.0,28 Apr 2024,Roles & Responsibilities\nWe are seeking talen...
4,A-IT SOFTWARE SERVICES PTE LTD,,Fraud Risk Data analyst (Bank) (JT),Islandwide,Contract,Professional,Banking And Finance,4000,5500,Monthly,11,29 Mar 2024,,1.0,28 Apr 2024,Roles & Responsibilities\n1. Job Overview\nWe ...
5,A-IT SOFTWARE SERVICES PTE LTD,,Data Analyst (Bank) (JT),Islandwide,Contract,Professional,Banking And Finance,4000,5500,Monthly,15,29 Mar 2024,,1.0,28 Apr 2024,Roles & Responsibilities\nJob Description\n1. ...
6,YUSEN LOGISTICS (SINGAPORE) PTE. LTD.,,Regional Data Analyst & CRM Senior Executive t...,East,Permanent,Senior Executive,Information Technology,4400,6000,Monthly,5,28 Mar 2024,"YAS LOGISTICS CENTRE, 2 CHANGI SOUTH AVENUE 2 ...",2.0,27 Apr 2024,Roles & Responsibilities\nPosition Overview\nT...
7,SCIENTEC CONSULTING PTE. LTD.,,Data Engineer | Oracle | AWS,Islandwide,Permanent,Executive,Information Technology,5000,7200,Monthly,1,28 Mar 2024,,3.0,27 Apr 2024,Roles & Responsibilities\nData Engineer | Orac...
8,PERSOLKELLY SINGAPORE PTE. LTD.,,Data Engineer,Islandwide,Contract,Professional,Information Technology,6500,9750,Monthly,7,28 Mar 2024,,5.0,11 Apr 2024,Roles & Responsibilities\nAbout the Client\nRe...
9,ACCENTURE PTE LTD,,Data and Analytics Architect (Senior Manager),East,Permanent,Middle Management,Information Technology,17000,20000,Monthly,20,28 Mar 2024,"RAFFLES CITY TOWER, 250 NORTH BRIDGE ROAD 179101",8.0,27 Apr 2024,Roles & Responsibilities\nAbout Accenture:\nAc...


In [15]:
jobs.to_csv("jobs_data-analytics.csv", encoding='utf-8', index=False)
jobs = pd.DataFrame(columns=columns)

## Data science

In [None]:
scrape_mycareersfuture("data%20science")

ROW NO: -1, CARD NO: 0, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 0, CARD NO: 1, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


ROW NO: 1, CARD NO: 2, PAGE NO: 0
reached card no: 2, error: 
...next page
no more jobs to scrape


In [None]:
jobs

Unnamed: 0,company_name,recruiter_name,job_title,location,employment_type,job_level,job_function,salary_lower,salary_upper,salary_period,num_applications,posted_date,address,experience,closing_date,job_description
0,HYUNDAI MOTOR GROUP INNOVATION CENTER IN SINGA...,Recommended based on your skills & job applica...,Artificial Intelligence (AI) Engineer,West,Permanent,Professional,Sciences / Laboratory / R&D2 Years Exp,6000,12000,Monthly,112,15 Mar 2024,,2,14 Apr 2024,Roles & Responsibilities\nOverview\nThis posit...
1,MERQURI PTE. LTD.,Recommended based on your skills & job applica...,Head of Data Science,Central,Permanent,Manager,Information Technology8 Years Exp,13000,17000,Monthly,66,05 Mar 2024,78 SHENTON WAY 079120,8,04 Apr 2024,Roles & Responsibilities\nOverview\nAs the Hea...


In [None]:
jobs.to_csv("jobs_data-science.csv", encoding='utf-8', index=False)
jobs = pd.DataFrame(columns=columns)

## Machine learning

In [None]:
scrape_mycareersfuture("machine%20learning")

ROW NO: -1, CARD NO: 0, PAGE NO: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs.loc[row_no] = [


no address
ROW NO: 0, CARD NO: 1, PAGE NO: 0
ROW NO: 1, CARD NO: 2, PAGE NO: 0
reached card no: 2, error: 
...next page
no more jobs to scrape


In [None]:
jobs

Unnamed: 0,company_name,recruiter_name,job_title,location,employment_type,job_level,job_function,salary_lower,salary_upper,salary_period,num_applications,posted_date,address,experience,closing_date,job_description
0,DYSON OPERATIONS PTE. LTD.,Recommended based on your skills & job applica...,Senior Machine Learning Research Manager,Islandwide,Permanent,Manager,Information Technology8 Years Exp,17000,23000,Monthly,23,06 Mar 2024,,8,05 Apr 2024,Roles & Responsibilities\nAbout us\nThe Machin...
1,DKATALIS PRIVATE LIMITED,Recommended based on your skills & job applica...,Machine Learning Engineer,Central,Full Time,Professional,Banking And Finance3 Years Exp,8000,16000,Monthly,27,18 Mar 2024,"ABI PLAZA, 11 KEPPEL ROAD 089057",3,17 Apr 2024,Roles & Responsibilities\nAbout the Role\n\nAs...


In [None]:
jobs.to_csv("jobs_ml.csv", encoding='utf-8', index=False)
jobs = pd.DataFrame(columns=columns)