# Automated Job Search Using Web Scraping

### Import all required libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import os

### Now, we will scrap data from timesjobs.com website and save the data into csv file

In [2]:
urls = "https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation="

def findJobs():
    count=0
    html_text = requests.get(urls).text
    soup = BeautifulSoup(html_text, 'lxml')

    jobs = soup.find_all('li', class_="clearfix job-bx wht-shd-bx")

    jobs_data = []  # List to store job data

    for index, job in enumerate(jobs):
        location = job.find('ul', class_="top-jd-dtl clearfix").span.text
        skills = job.find('ul', class_="list-job-dtl clearfix").span.text
        company = job.find('h3', class_="joblist-comp-name").text
        link = job.find('header', class_="clearfix").h2.a['href']
        
        count+=1
        jobs_data.append({
            'Company': company.strip().replace('(More Jobs)',''),
            'Location': location.strip(),
            'Skills': skills.strip(),
            'Link': link
        })
        
        
   # Save job data to a CSV file
    if not os.path.exists('posts'):
        os.makedirs('posts')
    df = pd.DataFrame(jobs_data)
    df.to_csv('posts/Jobs.csv', index=False)

    return count

print(f'Fetching Data from\n{urls}\n.')
x = findJobs()
print(f'{x} new jobs updated in Jobs.csv.')

Fetching Data from
https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=
.
25 new jobs updated in Jobs.csv.


#### Now, we are going to filter the jobs data as required from the data scraped from the url and saved in the txt file

In [3]:
def findJobs():
  count=0
  html_text = requests.get(urls).text
  soup = BeautifulSoup(html_text, 'lxml')

  jobs = soup.find_all('li', class_="clearfix job-bx wht-shd-bx")
  unfamSkill = 'Django'  # Add your unfamiliar skill here

# Saving the jobs filtered in text file

  with open(f'posts/Pythonjobs_filter.txt','w') as f:  
    for index, job in enumerate(jobs):
      skills = job.find('ul', class_="list-job-dtl clearfix").span.text
      if unfamSkill.lower() not in skills.lower():  # Filter jobs that require the unfamiliar skill
        location = job.find('ul', class_="top-jd-dtl clearfix").span.text
        company = job.find('h3', class_="joblist-comp-name").text
        link = job.find('header', class_="clearfix").h2.a['href']
        
        count+=1
        f.write(f"Company: {company.strip().replace('(More Jobs)','')}\n")
        f.write(f"Location: {location.strip()}\n")
        f.write(f"Skills: {skills.strip()}\n")
        f.write(f"Link: {link}\n\n")
  
  return count

print(f'Fetching Data from\n{urls}\n.')
x = findJobs()
print(f'{x} new jobs updated in the Pythonjobs_filter.txt.')


Fetching Data from
https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=
.
3 new jobs updated in the Pythonjobs_filter.txt.


#### Finding jobs for a desired location and saving them in a new csv file named filterjobs.csv

In [4]:
def findJobs():
    count=0
    html_text = requests.get(urls).text
    soup = BeautifulSoup(html_text, 'lxml')

    jobs = soup.find_all('li', class_="clearfix job-bx wht-shd-bx")

    jobs_data = []  # List to store job data
    desired_location = 'Noida'  # Replace with your desired location

    for index, job in enumerate(jobs):
        location = job.find('ul', class_="top-jd-dtl clearfix").span.text
        if desired_location.lower() in location.lower():  # Filter jobs by location
            skills = job.find('ul', class_="list-job-dtl clearfix").span.text
            company = job.find('h3', class_="joblist-comp-name").text
            link = job.find('header', class_="clearfix").h2.a['href']
            
            count+=1
            jobs_data.append({
                'Company': company.strip().replace('(More Jobs)',''),
                'Location': location.strip(),
                'Skills': skills.strip(),
                'Link': link
            })

    # Save job data to a new CSV file
    
    if not os.path.exists('posts'):
        os.makedirs('posts')
    df = pd.DataFrame(jobs_data)
    df.to_csv('posts/filterjobs.csv', index=False)

    return count

print(f'Fetching Data from\n{urls}\n.')
x = findJobs()
print(f'{x} new jobs found and updated in the filterjobs.csv file.')


Fetching Data from
https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=
.
3 new jobs found and updated in the filterjobs.csv file.


##### Some more filteration can be done on the data extracted from the website, like:

In [5]:
def findJobs():
    count=0
    html_text = requests.get(urls).text
    soup = BeautifulSoup(html_text, 'lxml')

    jobs = soup.find_all('li', class_="clearfix job-bx wht-shd-bx")

    jobs_data = []  # List to store job data
    desired_job_type = 'Full Time'  # Replace with your desired job type

    for index, job in enumerate(jobs):
        location = job.find('ul', class_="top-jd-dtl clearfix").span.text
        job_type_elem = job.find('span', class_="type")  # Replace with actual class name
        if job_type_elem is not None:
            job_type = job_type_elem.text
            if desired_job_type.lower() in job_type.lower():  # Filter jobs by job type
                skills = job.find('ul', class_="list-job-dtl clearfix").span.text
                company = job.find('h3', class_="joblist-comp-name").text
                link = job.find('header', class_="clearfix").h2.a['href']
                
                count+=1
                jobs_data.append({
                    'Company': company.strip().replace('(More Jobs)',''),
                    'Location': location.strip(),
                    'Skills': skills.strip(),
                    'Link': link
                })

    # Save job data to another CSV file
    
    if not os.path.exists('posts'):
        os.makedirs('posts')
    df = pd.DataFrame(jobs_data)
    df.to_csv('posts/Desiredjobs.csv', index=False)

    return count

print(f'Fetching Data from\n{urls}\n.')
x = findJobs()
print(f'{x} new jobs found as per desired filter.')


Fetching Data from
https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=
.
0 new jobs found as per desired filter.
