# University Of Pennsylvania

## Campus Jobs Retriever and Organizer

#### Why the need arose? 


I am attending Univeristy of Pennsylvania in Fall 2021 for the program Masters in Data Science. 
I wanted to look at all the job opportunities I could pursue with my program at one glance and also sort or filter them on criterions such as Job Description, hourly pay, number of hours per week or weekly pay.
This script was created to provide a solution to this need.

The script neatly condenses all the listed jobs on University of Pennsylvania's official jobs portal into one Excel sheet sorted in descending order of Weekly Pay.

## Import Libraries

In [None]:
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time
import re
import imaplib
import email

from datetime import datetime
from dateutil.relativedelta import relativedelta
from datetime import timezone
import os
import ssl
import smtplib
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from selenium.webdriver.common.action_chains import ActionChains


## Open Selenium Controlled Chrome Browser

In [None]:
def open_browser():
    opts = Options()
    opts.headless = False
    driver = webdriver.Chrome(ChromeDriverManager().install(), options=opts)
    return driver

In [None]:
driver=open_browser()

## Open UPenn Jobs Portal

In [None]:
driver.get("https://www.sfs.upenn.edu/seo/job_search/job_search.php")

In [None]:
for element in driver.find_elements_by_tag_name("input"):
    if("Search >>" in element.get_attribute("value")):
        print(element.get_attribute("value"))
        element.click()
        break

## Fetching All Job IDS

In [None]:
job_ids=list()
soup=BeautifulSoup(driver.page_source,"html5lib")
for tr in soup.find_all("table",{"class":"search"})[0].find_all("tr"):
    try:
        job_anchor=tr.find("a")
        job_id=job_anchor['href']
        #print(job_id)
        job_ids.append("https://www.sfs.upenn.edu/seo/job_search/job_search.php"+job_id)
    except:
        pass
        #print(tr,"\n_______________________________________")
driver.quit()

## Fetching Details for Each Job Id

In [None]:
jobs_list=list()
for i in range(0,len(job_ids)):
    print(i)
    job=job_ids[i]
    data=requests.get(job).content
    soup=BeautifulSoup(data,"html5lib")
    job_title="-"
    job_id="-"
    posted_on="-"
    try:
        job_title=soup.find("div",{"class":"section"}).find_all("div")[0].text.strip()
    except:
        pass
    for div in soup.find("div",{"class":"section"}).find_all("div"):
        if("Job" in div.text):
            job_id=str(div.text.strip()).replace("Job","").strip()
        if("Posted on" in div.text):
            posted_on=div.text.strip().replace("Posted on","").strip()

    summary="-"
    details="-"
    contact="-"
    for div in soup.find_all("div",{"class":"section"}):
        if("Summary" in div.text):
            summary=div.text.replace("Summary","").strip().replace("\t","")
        if("Details" in div.text):
            details=div.text.replace("Details","").strip().replace("\t","")
        if("Contact Information" in div.text):
            contact=div.text.replace("Contact Information","").strip().replace("\t","")
    try:
        pay_per_hour=re.findall(".*?([$].*?) .*?",details)[0]
    except:
        pay_per_hour="-"
    try:
        hours_per_week=re.findall(".*?([0-9]{1,2}) hours per week",details)[0]
    except:
        hours_per_week="-"
    try:
        pay_per_week="$"+str(float(pay_per_hour.replace("$",""))*float(hours_per_week))
    except:
        pay_per_week="-"
    job_dict={"JOB_ID":job_id,"Title":job_title,"Posted On":posted_on,"Summary":summary,"Details":details,
              "Contact Information":contact,"Link":job,"PayPerHour":pay_per_hour,"HoursPerWeek":hours_per_week,
             "WeeklyPay":pay_per_week}
    jobs_list.append(job_dict)
    for key in job_dict.keys():
        print(key,":",job_dict[key])
    print("_____________________________________________________________")

## Condense information into Dataframe

In [None]:
df=pd.DataFrame(jobs_list)
for i in range(0,df.shape[0]):
    df.WeeklyPay[i]=float(str(df.WeeklyPay[i]).replace("$",""))

### Sorting by Weekly Pay (Descending Order)

In [None]:
df.sort_values(by=['WeeklyPay'],ascending=False)

### Saving Information to Excel File

In [None]:
df.to_excel("PennJobs.xlsx")

In [None]:
df.head()

# Contact Information:
(For Collaboration on Projects or building web automation bots)

## https://www.fiverr.com/rick137codes