# Freelancer Scraper - search by keyword

In [None]:
#Import the necessary packages needed to build the freelancer bot and navigate the pages
import re
import pandas as pd
from time import sleep
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

In [None]:
class FreelancerScraper:
    
    def __init__(self,search=None, max_rows=float("inf")):
        
        #Check if a search term was inputted
        if search is None:
            raise ValueError('Please provide a search term for scraping')
        
        self.search = search
        self.url = f'https://www.freelancer.com/freelancers/united-states/'

        # sets a hard limit on the amount of rows
        # the scraper should fetch
        self.max_rows = max_rows
            
        #List to store the scraped user profile links
        self.user_profiles = set([])
        
        #List to store scraped user profile info
        self.users_info = []
        
        
    def get_all_usernames(self):
        
        print("Fetching usernames...")

        options = Options()
        options.add_argument("--headless")
        options.add_argument('--no-sandbox')

        #Create browser instance
        self.browser = webdriver.Chrome(options=options)
        
        #Get the starting webpage and wait sometime for elements to load
        self.browser.get(self.url)
        sleep(3)
        
        #See if online selected, and deselect. Also wait for sometime for elements to load
        try:
            self.browser.find_element(By.ID,"selected-online").click()
            sleep(3)
        except:
            pass

        search_input = self.browser.find_element(By.ID, "freeSearchInput")
        search_input.clear()
        search_input.send_keys(self.search)
        search_input.send_keys(Keys.RETURN)

        sleep(3)

        result_amount_text = self.browser.find_element(By.CLASS_NAME, "result-amount").text
        result_amount = int(result_amount_text.split(" ")[1])

        print("Found", result_amount, "total results.")

        usernames_scraped = 0
        
        while usernames_scraped < result_amount and usernames_scraped < self.max_rows:
            
            #Get the profile links for everyone on the page
            usernames = self.browser.find_elements(By.CLASS_NAME,"find-freelancer-username")
            
            for user in usernames:
                usernames_scraped += 1

                # add username to list of profiles
                # to fetch later
                self.user_profiles.add(user.get_attribute('href'))
            
            print("Usernames fetched:", usernames_scraped)

            #NAVIGATION TO NEXT PAGE
            
            # find all the page navigation buttons
            pg_nav_btns = self.browser.find_elements(By.CSS_SELECTOR,"a[data-target='pagination']")
            
            # find and click on the last one (next page button) 
            pg_nav_btns[-1].click()
            sleep(3)
        
        #Close the browser when done
        self.browser.quit()  
        
        
    def get_user_profile_info(self,user_profile_link):
        
        """
        Scrapes and inputted user profile link
        """
        
        print("Getting profile information from", user_profile_link, "...")

        options = Options()
        options.add_argument("--headless")
        options.add_argument('--no-sandbox')
        
        try:

            self.browser = webdriver.Chrome(options=options)
            self.browser.get(user_profile_link)
            sleep(2)
        
            #Dictionary to store the user's info
            user_info = {}
            
            #Store the job_or_field
            user_info['search_query'] = self.search

            #Get name
            user_info['name'] = self.browser.find_element(By.CSS_SELECTOR,"h3[data-color='dark']").text
            
            #Store the profile link
            user_info['profile_link'] = user_profile_link

            #Get tagline
            user_info['tagline'] = self.browser.find_element(By.CSS_SELECTOR,"h2[data-color='mid']").text

            ###### Get description
            description_box = self.browser.find_element(By.CSS_SELECTOR,"fl-text[data-max-lines='15']")

            #Try to click on the read more button and extract text
            try:
                description_box.find_element(By.CLASS_NAME,'ReadMoreButton').click()
                user_info['user_description'] = description_box.text
            #If error is thrown because text doesn't need to be expanded, then we can just pull direct text 
            except:
                user_info['user_description'] = description_box.text
            ######################    

            #Get location
            user_info['location'] = self.browser.find_element(By.CSS_SELECTOR,"fl-col[class='SupplementaryInfo']").text

            #Get join date
            user_info['join_date'] = self.browser.find_element(By.XPATH, "//*[contains(text(),'Joined')]").text.replace('Joined ','')

            #Hourly rate
            user_info['hourly_rate'] = re.findall('\$\d+',self.browser.find_element(By.XPATH, "//*[contains(text(),'USD')]").text)[0]

            #Get pay grade
            user_info['pay_grade'] = self.browser.find_element(By.CSS_SELECTOR,"div[data-size='xxsmall").text

            #######Get rating container
            rating_container = self.browser.find_elements(By.CSS_SELECTOR,"fl-bit[class ='RatingContainer']")[1]
            

            #Get average rating and number of reviews
            user_info['avg_rating'], reviews = rating_container.text.split('\n')
            user_info['num_reviews'] = re.findall('\d+',reviews)[0]
            ##################################

            #Get number of recommendations
            user_info['num_recommendations']=re.findall('\d+',self.browser.find_element(By.CSS_SELECTOR,"fl-col[class='RecommendationsText']").text)[0]

            ### GET STATS ON PERFORMANCE
            pct_jobs_completed, pct_on_budget, pct_on_time, repeat_hire_rate = [item.text.\
                                                                                replace('%','') for item in self.browser.\
                                                                                find_elements(By.CSS_SELECTOR,"fl-text[class='ReputationItemAmount']")]
            
            user_info['pct_jobs_completed'] = pct_jobs_completed
            user_info['pct_on_budget'] = pct_on_budget
            user_info['pct_on_time'] = pct_on_time
            user_info['repeat_hire_rate'] = repeat_hire_rate
            ##############################

            #Add the extracted info the list for all users
            self.users_info.append(user_info)

            print("Fetched user info from", user_profile_link)

            #Close the browser when done
            self.browser.close()
        
        except Exception as e:

            print("Failed to fetch user profile:", user_profile_link)
            print(e)
          
        
    
    def run(self):
        
        #Extract all the usernames for the particular role
        self.get_all_usernames()
        
        users_fetched = 0
        #Get all the info for each of the usernames
        for url in self.user_profiles:
            self.get_user_profile_info(url)
            users_fetched += 1
            print("Users fetched:", users_fetched)
            sleep(2.5)
    
        print("All done!")
  
        

In [None]:
scraper = FreelancerScraper(search="designer", max_rows=5000)
scraper.run()

Users fetched: 1161
Getting profile information from https://www.freelancer.com/u/amirrzp78 ...
Fetched user info from https://www.freelancer.com/u/amirrzp78
Users fetched: 1162
Getting profile information from https://www.freelancer.com/u/lilbit800 ...
Fetched user info from https://www.freelancer.com/u/lilbit800
Users fetched: 1163
Getting profile information from https://www.freelancer.com/u/Ridafarhat5 ...
Fetched user info from https://www.freelancer.com/u/Ridafarhat5
Users fetched: 1164
Getting profile information from https://www.freelancer.com/u/barrett797 ...
Fetched user info from https://www.freelancer.com/u/barrett797
Users fetched: 1165
Getting profile information from https://www.freelancer.com/u/A7med3aw ...
Fetched user info from https://www.freelancer.com/u/A7med3aw
Users fetched: 1166
Getting profile information from https://www.freelancer.com/u/TaTaArt ...
Fetched user info from https://www.freelancer.com/u/TaTaArt
Users fetched: 1167
Getting profile information from

#0 0x5592e1ff17f9 <unknown>
#1 0x5592e1f913b3 <unknown>
#2 0x5592e1cd9016 <unknown>
#3 0x5592e1d01833 <unknown>
#4 0x5592e1d2e93c <unknown>
#5 0x5592e1d2e0ae <unknown>
#6 0x5592e1caf687 <unknown>
#7 0x5592e1cb0763 <unknown>
#8 0x5592e1fbd542 <unknown>
#9 0x5592e1fccce7 <unknown>
#10 0x5592e1fcc9e4 <unknown>
#11 0x5592e1fd113a <unknown>
#12 0x5592e1fcd5b9 <unknown>
#13 0x5592e1fb2e00 <unknown>
#14 0x5592e1caf4e5 <unknown>
#15 0x7fc89dc4609b __libc_start_main
#16 0x5592e1cae02a _start

Users fetched: 4416
Getting profile information from https://www.freelancer.com/u/kramercanfield ...
Failed to fetch user profile: https://www.freelancer.com/u/kramercanfield
Message: unknown error: failed to start a thread for the new session
Stacktrace:
#0 0x5644bcccc7f9 <unknown>
#1 0x5644bcc6c3b3 <unknown>
#2 0x5644bc9b4016 <unknown>
#3 0x5644bc9dc833 <unknown>
#4 0x5644bca0993c <unknown>
#5 0x5644bca090ae <unknown>
#6 0x5644bc98a687 <unknown>
#7 0x5644bc98b763 <unknown>
#8 0x5644bcc98542 <unknown>
#9 

In [None]:
scraped_pd_df = pd.DataFrame(scraper.users_info)
scraped_pd_df.head()

Unnamed: 0,search_query,name,profile_link,tagline,user_description,location,join_date,hourly_rate,pay_grade,avg_rating,num_reviews,num_recommendations,pct_jobs_completed,pct_on_budget,pct_on_time,repeat_hire_rate
0,software engineer,Sanjay P.,https://www.freelancer.com/u/cupertinovw,Your Ideas Implemented,"Lambodar Inc,\nCupertino, California\n\nSummar...","Cupertino, United States","August 18, 2004",$46,8.5,4.9,81,10,97.0,100.0,96.0,18.0
1,software engineer,Ken L.,https://www.freelancer.com/u/nvmnet,Computer Engineer,I develop computer softwares\ncustom operating...,"N/A, United States","March 27, 2011",$200,0.0,0.0,0,0,,,,
2,software engineer,Chris C.,https://www.freelancer.com/u/TeamJump,Always looking for extra talent in the areas o...,24 years of software development and informati...,"Kansas City, United States","October 1, 2010",$50,0.0,0.0,0,0,,,,
3,software engineer,Mena E.,https://www.freelancer.com/u/meskandar317,Engineer and Excel Expert,I am an engineer with experience in manufactur...,"Newark, United States","June 19, 2019",$22,0.0,0.0,0,0,,,,
4,software engineer,Tomer H.,https://www.freelancer.com/u/hebtom,Software Engineer,"I'm located in Austin, Texas.\n\nI have worked...","Austin, United States","April 7, 2019",$50,0.0,0.0,0,0,,,,


In [None]:
scraped_pd_df.to_csv(f'{"_".join(scraper.search.split(" "))}.csv',index=False)

In [None]:
scraped_pd_df.describe()

Unnamed: 0,search_query,name,profile_link,tagline,user_description,location,join_date,hourly_rate,pay_grade,avg_rating,num_reviews,num_recommendations,pct_jobs_completed,pct_on_budget,pct_on_time,repeat_hire_rate
count,4595,4595,4595,4595,4595,4595,4595,4595,4595.0,4595.0,4595,4595,4595.0,4595.0,4595.0,4595.0
unique,1,3988,4595,3738,4592,1931,2402,114,89.0,20.0,110,25,29.0,55.0,55.0,52.0
top,software engineer,Michael S.,https://www.freelancer.com/u/cupertinovw,Software Engineer,I am a Full-Stack Software Engineer with a con...,United States,"January 10, 2022",$20,0.0,0.0,0,0,,,,
freq,4595,10,1,181,2,86,20,562,3605.0,3611.0,3600,4206,3565.0,3626.0,3625.0,4103.0


In [None]:
scraped_pd_df["pct_jobs_completed"].value_counts()

N/A    3565
100     848
98       21
50       20
96       16
97       14
99       13
95       12
67       11
75        9
92        8
94        7
80        7
89        6
83        5
93        5
90        4
86        4
88        4
91        4
78        3
71        2
29        1
84        1
63        1
82        1
33        1
77        1
79        1
Name: pct_jobs_completed, dtype: int64

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=acc27b92-84be-4130-8026-204943f38189' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>