In [2]:
from bs4 import BeautifulSoup

with open('home.html', 'r') as html_file:
    content = html_file.read()

    # content of the html file, parser method we want to use
    soup = BeautifulSoup(content, 'lxml')
    tags = soup.find('h5')  # searches for the first element
    courses_html_tags = soup.find_all('h5')

    # Display all the courses
    for course in courses_html_tags:
        # We'll get only the text for each element in the list returned by soup.find_all
        print(course.text)


Python for beginners
Python Web Development
Python Machine Learning


In [51]:
# Grab the prices

from bs4 import BeautifulSoup

with open ('home.html','r') as html_file:
    content = html_file.read()
    # print(content)
    soup = BeautifulSoup(content, 'lxml')
    # class is a built-in method in Python, that's why we need to use class_
    course_cards = soup.find_all('div',class_='card' )
    for course in course_cards:
        course_name = course.h5.text
        course_price = course.a.text.split()[-1] # to access the last element, which will be the price
        
        # course.a or course.h5 can access different tags inside a div and returns a tag
        
        print(f"The course {course_name} costs {course_price}")
    
    

The course Python for beginners costs 20$
The course Python Web Development costs 50$
The course Python Machine Learning costs 100$


In [31]:
print(help(str.split))

Help on method_descriptor:

split(self, /, sep=None, maxsplit=-1) unbound builtins.str method
    Return a list of the substrings in the string, using sep as the separator string.

      sep
        The separator used to split the string.

        When set to None (the default value), will split on any whitespace
        character (including \n \r \t \f and spaces) and will discard
        empty strings from the result.
      maxsplit
        Maximum number of splits.
        -1 (the default value) means no limit.

    Splitting starts at the front of the string and works to the end.

    Note, str.split() is mainly useful for data that has been intentionally
    delimited.  With natural text that includes punctuation, consider using
    the regular expression module.

None


-----BeautifulSoup constructor-----


Parameters:
markup (required):
The string or file-like object containing the HTML or XML content you want to parse. This can be:

A string of HTML or XML

An open file or URL (with .read() called or passed directly)

parser (optional but recommended):
The parser you want BeautifulSoup to use. Common choices:

"html.parser" — built-in Python HTML parser (default)

"lxml" — faster, requires the lxml library

"xml" — for parsing XML with lxml

"html5lib" — parses like a web browser, very lenient, requires html5lib

listings is a ResultSet (a special list-like object from BeautifulSoup).

Each item in listings (e.g., listing) is a Tag.

Tag objects have the .find(), .find_all(), .text, etc. methods.

.get() to access any attribute of a Tag element

In [None]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import pandas as pd

def fetch_page(url):
    response = requests.get(url)
    response.raise_for_status()
    return BeautifulSoup(response.text, 'lxml')

def parse_job_links(soup, limit=5):
    # Extract the links to the job pages from the main page
    listings = soup.find_all('div', class_='srp-listing')
    if limit == -1:
        links = [listing.find('a').get('href') for listing in listings]
    else:
        links = [listing.find('a').get('href') for listing in listings[:limit]]
    return links

def parse_job_details(job_url):
    # Parse the page of a job and extract the useful details
    soup = fetch_page(job_url)
    outer_infos = soup.find('div', class_=['jd-page', 'ui-page', 'ui-page-theme-a',
                                           'ui-page-header-fixed', 'ui-page-footer-fixed', 'ui-page-active'])
    if not outer_infos:
        return None
    
    inner_infos = outer_infos.find('div', class_='jdpage-main')
    
    # Job info: title, company and the positing date
    job_information = inner_infos.find('div', id='jobTitle')
    job_title = job_information.h1.text.strip()
    company_name = job_information.h2.span.text.strip()
    posting_time = job_information.find('span', class_='posting-time').text.strip()
    posting_time_date = datetime.strptime(posting_time, '%d %b, %Y').date()
    
    # Location and experience
    location_exp_infos = inner_infos.find('div', class_='clearfix exp-loc')
    location_text = location_exp_infos.find('div', class_='srp-loc jd-loc').text.strip()
    location_list = location_text.split()
    location = location_list[1].translate(str.maketrans('', '', '()/,')) + ' - ' + location_list[2].translate(str.maketrans('', '', '()/,'))

    
    years_of_experience = location_exp_infos.find('div', class_='srp-exp').text.split()
    years_of_experience = years_of_experience[0] + ' '+ years_of_experience[1 ]
    
    return {
        'Title': job_title,
        'Company': company_name,
        'Posted on': str(posting_time_date),
        'Location': location,
        'Experience': years_of_experience
    }

def find_jobs():
    base_url = 'https://m.timesjobs.com/mobile/jobs-search-result.html?txtKeywords=python&cboWorkExp1=-1&txtLocation='
    soup = fetch_page(base_url)
    job_links = parse_job_links(soup, limit=-1)
    
    jobs = []
    for link in job_links:
        try:
            job = parse_job_details(link)
            if job:
                print(f"Found job: {job['Title']} at {job['Company']} posted on {job['Posted on']}")
                jobs.append(job)
        except Exception as e:
            print(f"Failed to parse job at {link}: {e}")
    
    if jobs:
        df = pd.DataFrame(jobs)
        csv_file = f"jobs_{datetime.today().strftime('%Y-%m-%d')}.csv"
        df.to_csv(csv_file, index=False)
        print(f"\nSaved {len(jobs)} jobs to {csv_file}")
    else:
        print("No jobs found.")

if __name__ == '__main__':
    find_jobs()


Found job: Python Developer at zenga tv posted on 2025-08-02
Found job: Python Developer at LAKSH HUMAN RESOURCE posted on 2025-08-02
Found job: Developer - Python at Wipro Technologies Ltd posted on 2025-07-31
Found job: Python Developer at Infocom Software posted on 2025-07-27
Found job: Python Developer at Hotelogix posted on 2025-07-26
Found job: Python Developer at Infocom Software posted on 2025-07-27
Found job: Python Developer at SYNECHRON posted on 2025-07-25
Found job: Python Developer at SEVEN CONSULTANCY posted on 2025-07-25
Found job: Python Developer at SYNECHRON posted on 2025-07-24
Found job: Python Developer at IQVIA posted on 2025-07-24
Found job: Python Developer at SEVEN CONSULTANCY posted on 2025-07-25
Found job: Python Developer at ORACLE posted on 2025-07-23
Found job: Python Developer at SYNECHRON posted on 2025-07-24
Found job: Python Developer at Techasoft Pvt Ltd posted on 2025-07-20
Found job: Python Developer at SEVEN CONSULTANCY posted on 2025-07-20
Found 