In [8]:
# import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy

In [9]:
# def function to scrape internsg 
def internSgScrapper(pages=39, output_file_path='internSG_jobs.csv'):
    # get all pages first 
    list_of_all_pages = []
    for i in range(1,pages):
        url = f'https://www.internsg.com/jobs/{i}/?f_p=107&f_i&filter_s#isg-top'
        list_of_all_pages.append(url)

    # get title of all jobs 
    data = []
    for url in list_of_all_pages:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        divs = soup.find_all('div', class_='ast-col-lg-3')
        for div in divs:
            # Try to find an <a> tag within the <div>
            a_tag = div.find('a')
            # If an <a> tag is found and it has a 'href' attribute
            if a_tag and 'href' in a_tag.attrs:
                # Extract the URL and the job title
                url = a_tag['href']
                job_title = a_tag.get_text().strip()
                # Append the data to the list
                date = 'Not Available'
                # Try to find the next sibling 'div' which might contain the date
                date_div = div.find_next_sibling('div', class_='ast-col-lg-1')
                if date_div:
                    date_span = date_div.find('span', class_='text-monospace')
                    if date_span:
                        date = date_span.get_text().strip()
                data.append({'URL': url, 'Job Title': job_title, 'Date':date})

    title_url_df = pd.DataFrame(data)
    col_names = ['Company','Designation','Date Listed','Job Type','Job Period','Profession',
             'Industry','Location Name','Allowance / Remuneration','Company Profile',
             'Job Description']
    jobs_info = []
    for url in title_url_df['URL']:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        res_dict = dict.fromkeys(col_names, '')
        res_dict["url"] = url
        for col_name in col_names:
            # Find the div that contains the column name
            col_div = soup.find('div', text=col_name, class_='font-weight-bold')
            if col_div:
                # The actual data is in the next sibling of the parent of col_div
                next_div = col_div.find_next_sibling()
                if next_div:
                    # Extract the text and store it in the dictionary
                    for span in next_div.find_all('span'):
                        span.decompose()
                    res_dict[col_name] = next_div.get_text(strip=True)
        jobs_info.append(res_dict)
    
    # return results 
    internSG_jobs = pd.DataFrame(jobs_info)
    internSG_jobs.to_csv(output_file_path,index=False)
    return internSG_jobs

In [10]:
internSgScrapper(2)

                                                  URL  \
0   https://www.internsg.com/job/holmusk-kkt-techn...   
1   https://www.internsg.com/job/ncs-group-it-supp...   
2   https://www.internsg.com/job/univers-security-...   
3   https://www.internsg.com/job/youapp-pte-ltd-ai...   
4   https://www.internsg.com/job/youapp-pte-ltd-it...   
5   https://www.internsg.com/job/youapp-pte-ltd-ai...   
6   https://www.internsg.com/job/strides-digital-d...   
7   https://www.internsg.com/job/clearsk-healthcar...   
8   https://www.internsg.com/job/hummingbird-biosc...   
9   https://www.internsg.com/job/youapp-pte-ltd-ai...   
10  https://www.internsg.com/job/youapp-pte-ltd-it...   
11  https://www.internsg.com/job/seagate-technolog...   
12  https://www.internsg.com/job/youapp-pte-ltd-ai...   
13  https://www.internsg.com/job/curvegrid-student...   
14  https://www.internsg.com/job/snaphunt-pte-ltd-...   
15  https://www.internsg.com/job/dstnct-pte-ltd-so...   
16  https://www.internsg.com/jo

Unnamed: 0,Company,Designation,Date Listed,Job Type,Job Period,Profession,Industry,Location Name,Allowance / Remuneration,Company Profile,Job Description,url
0,Holmusk (KKT Technology Pte Ltd),Data Science Intern,24 Apr 2024,Entry Level / Junior Executive,"From May 2024, For At Least 3 Months",IT / Information Technology,Healthcare / Fitness / Sports,"71 Ayer Rajah Crescent, Singapore","$1,500 monthly",At Holmusk we leverage technology & data scien...,ResponsibilitiesReview and test Holmusk specif...,https://www.internsg.com/job/holmusk-kkt-techn...
1,NCS Group,IT Support Engineer,24 Apr 2024,Entry Level / Junior Executive,Immediate Start - 31 Mar 2026,IT / Information Technology,Computer and IT,Singapore,"$1,900 - 2,400 monthly","NCS is a leading technology services firm, ope...",This position provides the desktop support for...,https://www.internsg.com/job/ncs-group-it-supp...
2,Univers,Security Analyst Intern,24 Apr 2024,Entry Level / Junior Executive,From May 2024 - May 2025,IT / Information Technology,Computer and IT,"1 HarbourFront Avenue, Keppel Bay Tower, Singa...","$2,300 monthly",Univers provides the world’s most comprehensiv...,SOC Intern Security Analyst InternResponsibili...,https://www.internsg.com/job/univers-security-...
3,YouApp Pte Ltd,AI / Ml Research Intern,23 Apr 2024,Entry Level / Junior Executive,"Immediate Start, For At Least 3 Months",IT / Information Technology,Computer and IT,Singapore,"$800 - 1,500 monthly","YouApp integrates Western, Indian, and Eastern...",Our team is made up of people from diverse bac...,https://www.internsg.com/job/youapp-pte-ltd-ai...
4,YouApp Pte Ltd,IT Intern,23 Apr 2024,Entry Level / Junior Executive,"Immediate Start, For At Least 3 Months",IT / Information Technology,Computer and IT,Singapore,"$800 - 1,500 monthly","YouApp integrates Western, Indian, and Eastern...",We are seeking a talented and motivated IT int...,https://www.internsg.com/job/youapp-pte-ltd-it...
5,YouApp Pte Ltd,AI Intern,23 Apr 2024,Entry Level / Junior Executive,"Immediate Start, For At Least 3 Months",IT / Information Technology,Computer and IT,Singapore,"$800 - 1,500 monthly","YouApp integrates Western, Indian, and Eastern...",Our team is made up of people from diverse bac...,https://www.internsg.com/job/youapp-pte-ltd-ai...
6,Strides Digital,Data Science Intern,22 Apr 2024,Entry Level / Junior Executive,"From Jul 2024, For At Least 6 Months",IT / Information Technology,Computer and IT,"Paya Lebar Road, Paya-lebar Quarter Mall, Sing...","$1,500 monthly",Strides Digital is a company that aims to acce...,Job descriptionWe are looking for data analyti...,https://www.internsg.com/job/strides-digital-d...
7,ClearSK Healthcare Pte Ltd,No Code Developer Intern with AI & Chatbot Exp...,21 Apr 2024,Entry Level / Junior Executive,"Flexible Start, For At Least 6 Months",IT / Information Technology,Healthcare / Fitness / Sports,"Novena, Singapore","$1,400 - 2,200 monthly",Join Our Team and Shape the Future of the Medi...,Job ResponsibilitiesCollaborate with the team ...,https://www.internsg.com/job/clearsk-healthcar...
8,Hummingbird Bioscience,"Intern, Automation",18 Apr 2024,Entry Level / Junior Executive,"Flexible Start, For At Least 3 Months",IT / Information Technology,Manufacturing General / Biomedical / Pharmaceu...,Singapore,"$800 - 1,000 monthly",ABOUT HUMMINGBIRD BIOSCIENCEHummingbird Biosci...,ABOUT THE ROLEWe are looking for a detail-orie...,https://www.internsg.com/job/hummingbird-biosc...
9,YouApp Pte Ltd,AI / ML Research Intern,18 Apr 2024,Entry Level / Junior Executive,"Immediate Start, For At Least 3 Months",IT / Information Technology,Computer and IT,Singapore,"$800 - 1,500 monthly","YouApp integrates Western, Indian, and Eastern...",Our team is made up of people from diverse bac...,https://www.internsg.com/job/youapp-pte-ltd-ai...
