In [7]:
def job_titles():
    while True:
        job_title = input("What is your dream job?")
        
        #check if something empty has been entered.
        # if empty, empty things in python are "False" thus this reads as if True 
        if not job_title.strip():  # The .strip() method removes any leading or trailing whitespace...
            print("Job Title cannot be blank. Please enter a valid job title\n")
            # go back to asking for the location..
            continue  
        
        #if job title is a number or contains a number and character
        if job_title.strip().isdigit():
            print("Job Title shouldn't contain solely numbers or a number. Please enter a valid job title\n")
            continue
        
        while True:
            validation = input(f"This is the Job title entered: {job_title}\n\n Should we proceed with this result? (y/n): ")
            if validation == "y":
                return job_title
            elif validation == "n":
                break
            else:
                print("Please enter 'y' or 'n': ")

def locations():
    while True:
        location = input("Please input a valid US location. This can be a ZIP code, city, state, city and state, or city and state abbreviation.\nPlease note that there is no built-in verification system to ensure the accuracy of the location you provide.\nThe validity of your results relies on the authenticity of the US location you enter.")
            
        #check if something empty has been entered.
        # if empty, empty things in python are "False" thus this reads as if True 
        if not location.strip():  # The .strip() method removes any leading or trailing whitespace...
            print("Location cannot be blank. Please enter a valid US location.\n")
            # go back to asking for the location..
            continue  

        while True:
            validation = input(f"This is the location entered: {location}\n\n Should we proceed with this result? (y/n): ")
            if validation == "y":
                return location
            elif validation == "n":
                break
            else:
                print("Please enter 'y' or 'n': ")
    
def time_frames():
    # 1 = last 24 hours
    # 2 = last 3 days
    # 3 = last 7 days
    # 4 = last 14 days
    while True:
        time_frame = input("Enter the time frame of jobs last posted. Options are:\n1 or last 24 hours\n2 or last 3 days\n3 or last 7 days\n4 or last 14 days")
        time_frame = time_frame.lower()
        if time_frame == "1" or time_frame == "last 24 hours":
            return 1
        elif time_frame == "2" or time_frame == "last 3 days":
            return 2
        elif time_frame == "3" or time_frame == "last 7 days":
            return 3
        elif time_frame == "4" or time_frame == "last 14 days":
            return 4
        else:
            print("Please enter a valid option!")
            
#defining a dictionary for later use...
timeframes = {
    1: "last 24 hours",
    2: "last 3 days",
    3: "last 7 days",
    4: "last 14 days"
}
 

In [8]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, JavascriptException


job_title = job_titles()  
location = locations()
time_frame = time_frames()

def IndeedScraper(job_title:str, location:str, time_frame:int):
    
    #setting the website
    url = "https://www.indeed.com/"
    driver = webdriver.Chrome()
    driver.get(url)
    
    
    #location or remote #if remote, can't use radius"
    keywords_loc = driver.find_element(By.XPATH, '//*[@id="text-input-where"]')
    keywords_loc.clear() #remove words already in keyword
    keywords_loc.send_keys(location)
    keywords_loc.submit()
    
    
    # Wait for the keywords input field to be visible and interactable
    #TO BE ITERATED
    keywords = driver.find_element(By.XPATH, '//*[@id="text-input-what"]')
    keywords.send_keys(job_title)
    keywords.submit()
    
    #try selecting the date frame. If not selectable, there is probably no job left...
    try:
        ##filter-dateposted-menu is the id for "date posted", 
        ##li:nth-child(4) gets the (4)th element within the #filter-dateposted-menu
        ## [a] as in [li:nth-child(4) a] gets the element with the a tag in the fourth element 
        ## .click() clicks the element
        driver.execute_script(f"document.querySelector('#filter-dateposted-menu li:nth-child({time_frame}) a').click()")
    
        #get the number of jobs
        jobs_num = driver.find_element(By.CSS_SELECTOR,".jobsearch-JobCountAndSortPane-jobCount.css-13jafh6.eu4oa1w0").text
    #if you can't select or there's no "jobs_num" there's probably no jobs
    except (JavascriptException, NoSuchElementException):
        print("Search Results:")
        return f"No jobs found for {job_title}, located in {location}, posted within the {timeframes[time_frame]}"
    
    jobs_data = []  # List to store job data
    
    #while true continues until there's no job matches on a given the page
    while True: 
        #on a page, get the current jobs there 
        jobs = driver.find_elements(By.CSS_SELECTOR, ".job_seen_beacon")
        
        #for every job on the page
        for job in jobs:
            #get the title
            title = job.find_element(By.CSS_SELECTOR, "a > span").get_attribute('title')
            #get the company
            company_name = job.find_element(By.CSS_SELECTOR, ".css-92r8pb.eu4oa1w0").text
            #get the location
            company_location = job.find_element(By.CSS_SELECTOR, ".css-1p0sjhy.eu4oa1w0").text
            
            #Add entry as a row in the dataset
            job_entry = {
                "Title": title,
                "Company": company_name,
                "Location": company_location
            }
            #append each row (for all the jobs on the page (so usually 15 at a time)
            jobs_data.append(job_entry)
    
        # Attempt to go to the next page (try
        try:
            #if there's a "next page", click next, if sucessful, the "While true" starts again
            next_page = driver.find_element(By.CSS_SELECTOR, "a[data-testid='pagination-page-next']")
            next_page.click()
        # If there's no next page, exit the loop (same for only one-page job searches)
        except NoSuchElementException:
            break  
    
    #close the browser when done
    driver.close()
    print("Search Results:")
    return pd.DataFrame(jobs_data)

#The User may want to perform multiple sources!
true = True
while true:
    jobs_data = IndeedScraper(job_title, location, time_frame)
        # If jobs_data is a DataFrame, it will be displayed nicely in Jupyter
    if isinstance(jobs_data, pd.DataFrame):
        # This print statement helps with clarity before showing the DataFrame
        display(jobs_data) 
    else:
        # If jobs_data is not a DataFrame, print the message directly
        print(jobs_data)
    # Ask if the user wants to perform a new search
    while True:
        end = input("Do you want to try a new search? (y/n): ").lower()
        if end == "y":
            # Exits the inner loop and restarts the outer loop for a new search
            job_title = job_titles()  
            location = locations()
            time_frame = time_frames()
            break  
        elif end == "n":
            print("Thanks for using this!")
            #makes true False then ends the upper while loop
            true = False
            break 
        else:
            #this will continue until either y or n is answered
            print("Please enter 'y' or 'n'")


Please enter 'y' or 'n': 
Please enter a valid option!
Please enter a valid option!
Search Results:


Unnamed: 0,Title,Company,Location
0,Project Manager,"Main Enterprises, INC.","Stratford, CT 06615"


Thanks for using this!


In [11]:
import pandas as pd
#print(f"There is are {jobs_num} which match the description")
pd.DataFrame(jobs_data)

Unnamed: 0,Title,Company,Location
0,Project Manager,"Main Enterprises, INC.","Stratford, CT 06615"


In [8]:
timeframes = {
    1: "last 24 hours",
    2: "last 3 days",
    3: "last 7 days",
    4: "last 14 days"
}

timeframes[1]

'last 24 hours'

In [9]:
jobs_data

Unnamed: 0,Title,Company,Location
0,Project Manager,"Main Enterprises, INC.","Stratford, CT 06615"
