In [14]:
from selenium import webdriver
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import csv
import pandas as pd
import os

class Scraper:

    
    def __init__(self, email, password, driver):
        self.driver = driver
        self.new_sections = []
        self.old_sections = []
        self.email = email
        self.password = password
        self.web_path = 'https://www.upwork.com/ab/account-security/login'
        self.google_sign_in_button_path = '//*[@id="login_google_submit"]/span'
        self.email_path = '//*[@id="identifierId"]'
        self.password_path = '//*[@id="password"]/div[1]/div/div[1]/input'
        self.most_recent_path = "//button[@class = 'air3-tab-btn']"
        self.section_path = "//section[@class = 'air3-card-section air3-card-hover p-4 px-2x px-md-4x']"
        self.load_more_path = "//button[@data-test='load-more-button']"
        self.creds_path = 'C:/Users/191204026/SCRAPING_PROJECTS/credentials.json'
        self.exit_path =  '/html/body/div[11]/div/div[2]/div/div[2]/div[2]/div[2]/button/div/svg'
        self.description_path = '//span[@data-test="job-description-text"]'
        self.names = []
        self.base_path = '//p[@class ="up-footer-copyright"]'
        
    def page_getter(self):
        """This fuction uses the webdriver to open the specified url"""
        driver.get(self.web_path)

    def google_button_clicker(self):
        """This function clicks on the google sign in button on the landing page"""
        try:
            login_button = WebDriverWait(driver, 50).until(
                EC.presence_of_element_located((By.XPATH, self.google_sign_in_button_path))
            )
            login_button.click()
        except:
            driver.quit()
    
    def window_switcher(self):
        """This function switches the window being handled
        by the webdriver to new popup window"""
        time.sleep(10)
        new_window = driver.window_handles[1]
        driver.switch_to.window(new_window)
        
    def google_email_sign_in(self):
        """This function inputs the users email to a dialog box"""
        try:
            email_type_path = WebDriverWait(driver, 50).until(
                EC.presence_of_element_located((By.XPATH, self.email_path))
            )
            email_type_path.clear()
            email_type_path.send_keys(email)
            time.sleep(20)
            email_type_path.send_keys(Keys.RETURN)
        except:
            driver.quit()

    def google_password_sign_in(self):
        """This function inputs the users password to a dialog box"""
        try:
            password_type_path = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.XPATH, self.password_path))
            )
            password_type_path.clear()
            password_type_path.send_keys(password)
            password_type_path.send_keys(Keys.RETURN)
        except:
            driver.quit()
            
    def window_resetter(self):
        """This function resets the current driver window
        back to the original window"""
        window = driver.window_handles[0]
        driver.switch_to.window(window)
        
    def exiter(self):
        """This function exits a popup that comes up when one logs in to upwork"""
        try:
            exit_button_path = WebDriverWait(driver, 50).until(
                EC.presence_of_element_located((By.XPATH, self.exit_path))
            )
            exit_button_path.click()
        except:
            pass
        
    def most_recent(self):
        """This function opens up my most recent tab. This is
        where the data was collected from"""
        try:
            most_recent_path = WebDriverWait(driver, 50).until(
                EC.presence_of_element_located((By.XPATH, self.most_recent_path))
            )
            most_recent_path.click()
        except:
            driver.quit()

    def section_getter(self):
        """This function extracts html sections from the webpage.
        Sections are used because each job post is contained in a section"""
        new_sections = []
        try:
            new_sec_path = WebDriverWait(driver, 50).until(
                EC.presence_of_element_located((By.XPATH, self.section_path))
            )
            new_sec = driver.find_elements(By.XPATH, self.section_path)
        
            for section in new_sec:
                soup_1 = BeautifulSoup(section.get_attribute('innerHTML'), 'html.parser')
                new_name = soup_1.find('h3', class_= 'my-0 p-sm-right job-tile-title h5')
                
                if new_name not in self.names:
                    new_sections.append(section.get_attribute('innerHTML'))
            
        except:
            pass
        return new_sections
        
    def parser(self, new_sections):
        """This function parses each section to obtain relevant data."""
        outer_list = []
        
        for section in new_sections:
            new_skills = []
            
            soup = BeautifulSoup(section, 'html.parser')

            new_name = soup.find('h3', class_= 'my-0 p-sm-right job-tile-title h5')
            new_skill = soup.find('ul', class_= 'air3-token-wrap')
            budget = soup.find('span', {'data-test':'budget'})
            experience_level = soup.find('span', {'data-test':'contractor-tier'})
            rate_type = soup.find('strong', {'data-test':'job-type'})
            duration = soup.find('span', {'data-test':'duration'})
            no_of_connects = soup.find('strong', {'data-test':'connect-price'})
            description = soup.find('span', {'data-test':'job-description-text'})

            for elem in new_skill:
                new_skills.append(elem.text)

            if budget == None:
                budget = "-"
            else:
                budget = budget.text

            if duration == None:
                duration = "-"
            else:
                duration = duration.text
            
            self.names.append(new_name)
            inner_list = [new_name.text, new_skills, budget, experience_level.text, rate_type.text, duration, description.text]
            outer_list.append(inner_list)
        return outer_list

    def load_more_clicker(self):
        """This function uses the driver to click on the load more button"""
        load_more_button = driver.find_element(By.XPATH, self.load_more_path)
        load_more_button.click()
        
    def base_scroller(self):
        """This function scrolls to the bottom of the page"""
        bottom_element = driver.find_element(By.XPATH, self.base_path)
        bottom_element.location_once_scrolled_into_view
    
    def csv_header(self, filename):
        """This function adds the relevant headers to a csv file"""
        headers = [['Title','Skills', 'budget', 'experience_level','rate_type', 'duration', 'description']]
        with open(filename, 'a', newline='') as csvfile:
            csvwriter_1 = csv.writer(csvfile, dialect='excel')
            csvwriter_1.writerows(headers)

    def csv_appender(self, outer_list, filename):
        """This function appends the collectd data to the same csv file from csv_header"""
        with open(filename, 'a', newline='', encoding="utf-8") as csvfile:
            csvwriter_1 = csv.writer(csvfile, dialect='excel')

            csvwriter_1.writerows(outer_list)

In [None]:
email = ''
password = ''
load_more_path = "//button[@data-test='load-more-button']"
driver = uc.Chrome(executable_path ='chromedriver.exe')

us = Scraper(email, password, driver)
us.page_getter()
us.google_button_clicker()
us.window_switcher()
us.google_email_sign_in()
time.sleep(40)
us.google_password_sign_in()
us.window_resetter()
us.exiter()
us.most_recent()
time.sleep(10)

In [31]:
new_file = 'Upwork_data.csv'
us.csv_header(new_file)

In [None]:
while driver.find_element(By.XPATH, load_more_path):
    sections_1 = us.section_getter()
    data_list = us.parser(sections_1)
    us.base_scroller()
    us.load_more_clicker()
    us.csv_appender(data_list, new_file)

In [15]:
driver.quit()

In [None]:
new_file = 'Upwork_data.csv'
new_path = os.path.join('C:/Users/191204026/SCRAPING_PROJECTS', new_file)
new_data = pd.read_csv(new_path, encoding='utf-8')
new_data