# ***Importing Libraries:***

In [1]:
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin

#***Website URL Setup:***
url: This is the website you want to read/scrape.

headers: We pretend to be a browser (to avoid getting blocked). This is like saying:

"Hi! I'm Chrome. Can I see the page?"

This is a job finding site named "dice".

In [2]:
url = "https://www.dice.com/jobs?q=software+developer&location=United+States"
headers = {
    "User-Agent": "Mozilla/5.0"
}

#***HTML Parser:***
The HTML parser is the tool that reads the raw HTML code (all the tags and text) and converts it into a structured format (called a "parse tree" or "DOM tree").

This makes it easy to find elements like h1, p, img, etc., by their tags, classes, or attributes.

In [3]:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

In [4]:
from bs4 import BeautifulSoup as bs

In [5]:
soup = bs(response.text)
type(soup) #here soup object act as parser like it takes the raw html tags and convert it into parsed tree.
soup.find('title').text


'Search Jobs | Dice.com'

#***Web Scraping Using BeautifulSoup:***

In [6]:
import pandas as pd
from bs4 import BeautifulSoup

# Assuming `soup` is already created using BeautifulSoup and contains the HTML
job_cards = soup.find_all("div", class_="flex flex-col gap-6 overflow-hidden rounded-lg border bg-surface-primary p-6 relative mx-auto h-full w-full border-transparent shadow-none transition duration-300 ease-in-out sm:border-zinc-100 sm:shadow")

# List to store job info
job_list = []

for job in job_cards:
    try:
        title = job.find("a", class_="outline-offset-2 outline-stroke-focus forced-colors:outline-[Highlight] items-center gap-1 visited:text-interaction-visited outline-0 line-clamp-1 shrink grow basis-0 text-xl font-semibold text-zinc-800 no-underline hover:underline").text.strip()
    except:
        title = "N/A"

    try:
        company = job.find("p", class_="mb-0 line-clamp-2 text-sm sm:line-clamp-1").text.strip()
    except:
        company = "N/A"

    try:
        location = job.find("p", class_="text-sm font-normal text-zinc-600").text.strip()
    except:
        location = "N/A"

    try:
        description = job.find("p", class_="line-clamp-2 h-10 shrink grow basis-0 text-sm font-normal text-zinc-900").text.strip()
    except:
        description = "N/A"

    try:
        job_url = job.find("a", {"data-testid": "job-search-job-detail-link"})["href"]
    except:
        job_url = "N/A"

    # Append to the list
    job_list.append([title, company, location, description, job_url])

# Create DataFrame
df = pd.DataFrame(job_list, columns=["Title", "Company", "Location", "Description", "URL"])

# Print DataFrame
print(df)

# Optional: Save to CSV
# df.to_csv("dice_jobs.csv", index=False)


                                                Title  \
0           Senior Software Developer (cleared)-61525   
1                           Senior Software Developer   
2                           Senior Software Developer   
3                       Appian Software Developer III   
4     Principal Software Developer - Lunar Permanence   
5                               Software Developer IV   
6                    Software Developer - Entry level   
7                    Software Developer - Entry level   
8                                  Software Developer   
9               Embedded DevSecOps Software Developer   
10                         LabVIEW Software Developer   
11                       Sr Software Developer (.Net)   
12                      Full Stack Software Developer   
13  Senior Frontend Software Developer - New Glenn...   
14                                 Software Developer   
15                                 Software Developer   
16                             

#***Saving Data into SQLite Database:***

In [7]:
import sqlite3

#***Creating Table and Connecting Databse:***

In [10]:
# Connect to (or create) SQLite database
conn = sqlite3.connect('data.db')
cursor = conn.cursor()

# Create table
cursor.execute('''CREATE TABLE job_cards (id INTEGER PRIMARY KEY AUTOINCREMENT,title TEXT, company TEXT, location TEXT,description TEXT,url TEXT)''')
conn.commit()


#***Inserting Data into Database:***

In [20]:
for job in job_list:
    title, company, location, description, url = job
    cursor.execute('''
        INSERT INTO job_cards (title, company, location, description, url)
        VALUES (?, ?, ?, ?, ?)
    ''', (title, company, location, description, url))

# Step 5: Commit and close the connection
conn.commit()


# STEP 4: Commit changes and close connection
conn.commit()

In [21]:
conn.close()