In [8]:
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from IPython.display import clear_output

from bs4 import BeautifulSoup
import time

## 1. Requesting data

### 1.1 get seller URLs

In [None]:
driver = Firefox(executable_path=r"geckodriver.exe")

In [None]:
url = "https://www.fiverr.com/?source=top_nav"
driver.get(url)

In [None]:
# search for key words
search_term = 'data science' 
driver.find_elements_by_xpath(".//input[@type='search']")[1].send_keys(search_term, Keys.ENTER)

In [None]:
# check for denied access
soup = BeautifulSoup(driver.page_source, "lxml")
soup.title.text == 'Your Access To This Website Has Been Blocked'

In [None]:
# test for sign-up pop up
pop_up = driver.find_elements_by_class_name('sign-up-form')
action = ActionChains(driver)
if len(pop_up)>0:
    action.move_to_element_with_offset(pop_up[0], 700, 0)
    action.click()
    action.perform()

In [None]:
# no. of search results
no_of_services = driver.find_element_by_class_name('number-of-results').text
no_of_services

In [None]:
# scroll to botom page
js_script = "window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;"
lenOfPage = driver.execute_script(js_script)
match=False
while(match==False):
    lastCount = lenOfPage
    lenOfPage = driver.execute_script(js_script)
    if lastCount==lenOfPage:
        match=True

In [None]:
# click next page
right_arrow = driver.find_elements_by_class_name('pagination-arrows')[-1]
right_arrow.click()

In [None]:
# get seller information
soup = BeautifulSoup(driver.page_source, "lxml")
gigs = soup.findAll('div', {'class': 'gig-card-layout'})

# first seller name and gig url
for gig in gigs[:1]:
    seller_url = "https://www.fiverr.com" + gig.h3.a['href']
    seller_name = gig.find('div', {'class': 'seller-name'}).text
    print(seller_name, seller_url)    

In [None]:
# iterate over pages
scroll = True

while scroll:
    # scroll down
    js_script = "window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;"
    lenOfPage = driver.execute_script(js_script)
    match=False
    while(match==False):
        lastCount = lenOfPage
        lenOfPage = driver.execute_script(js_script)
        if lastCount==lenOfPage:
            match=True
            
    time.sleep(1)
            
    # click next page
    arrows = driver.find_elements_by_class_name('pagination-arrows')
    if len(arrows) == 2:
        try:
            arrows[-1].click()
        except:
            scroll = False
            
    else:
        scroll = False
        
    time.sleep(1)
        
    # get seller information
    soup = BeautifulSoup(driver.page_source, "lxml")
    gigs = soup.findAll('div', {'class': 'gig-card-layout'})
    
    # print first gig info
    for gig in gigs[:1]:
        seller_url = "https://www.fiverr.com" + gig.h3.a['href']
        seller_name = gig.find('div', {'class': 'seller-name'}).text
        print(seller_name, seller_url)

In [None]:
driver.quit()

## 2. testing class

In [1]:
from DS_Fivver_Gigs import *
import pandas as pd

In [None]:
# initiate web driver
driver = FiverrScraper()
# ask for keyword
search_key_word = 'python tutor'

# check for detection
driver.check_status()

# search for keyword
time.sleep(5)
driver.apply_search(search_key_word)

# check for pop-ups and detection
time.sleep(5)
driver.check_status()
driver.check_popup()

In [3]:
# list for storing gigs info
data = list()

# scroll through pages
while driver.scroll:
    data += gig_names(BeautifulSoup(driver.driver.page_source, "lxml"))
    driver.scroll_down()
    driver.click_r_arrow()
    time.sleep(2)
    driver.check_status()

In [4]:
df_gigs = pd.DataFrame(data, columns=['Seller_name', "Gig_URL"])
df_gigs.head()

Unnamed: 0,Seller_name,Gig_URL
0,pygeek,https://www.fiverr.com/pygeek/fix-any-linux-se...
1,dogaozgon,https://www.fiverr.com/dogaozgon/tutor-you-in-...
2,minhalzafarsw06,https://www.fiverr.com/minhalzafarsw06/tutor-h...
3,araboy24,https://www.fiverr.com/araboy24/be-your-python...
4,hammad_jafar,https://www.fiverr.com/hammad_jafar/teach-pyth...


In [5]:
df_gigs.shape

(191, 2)

In [7]:
def get_txt(element):
    if len(element) >= 1:
        return element[0].text
    else:
        return ''
    

def seller_info(soup):
    gig_name = soup.h1.text
    seller_lvl = soup.findAll('div', {'class': 'seller-level'})
    one_liner = soup.findAll('p', {'class': 'one-liner'})
    review_score = soup.findAll('b', {'class': 'rating-score'})
    review_count = soup.findAll('span', {'class': 'ratings-count'})
    
    # categories
    cat = soup.find('div', {'class': 'gig-overview'}).findAll('a', href=True)[:2]
    cat = [_.text for _ in cat[:2]]
    
    # package prices
    packages = soup.findAll('div', {'class': 'gig-page-packages-table'})
    if len(packages)>0:
        prices = [_.text for _ in packages[0].findAll('p', {'class': 'price-label'})]
    else:
        prices = soup.find('div', {'class': 'package-content'}).findAll('span', {'class': 'price'})
        prices = [get_txt(prices), "", ""]
        
    # user stats
    user_stats = soup.find('ul', {'class': 'user-stats'}).findAll('li')
    if len(user_stats) == 4:
        user_stats = [_.text for _ in user_stats]
        user_stats[0] = user_stats[0][4:]
        user_stats[1] = user_stats[1][12:]
        user_stats[2] = user_stats[2][18:]
        user_stats[3] = user_stats[3][13:]
    else:
        user_stats = ['', '', '', '']
        

    return [gig_name, get_txt(seller_lvl), get_txt(one_liner), get_txt(review_score),
            get_txt(review_count)] + prices + user_stats + cat

In [11]:
info = list()

for idx in df_gigs.index:
    gig_url = df_gigs.loc[idx, 'Gig_URL']
    driver.driver.get(gig_url)
    time.sleep(5)
    driver.check_status()
    soup = BeautifulSoup(driver.driver.page_source, "lxml")
    info.append(df_gigs.loc[idx].values.tolist()+seller_info(soup))
    clear_output(wait=True)
    print(f"{idx+1}/{df_gigs.shape[0]} scraped.")

191/191 scraped.


In [12]:
df = pd.DataFrame(info, columns=['Seller_name', "Gig_URL", 'Gig_name', 'Seller_lvl', 'One_liner',
                                 'Review_score', 'Review_count', 'Price_basic', 'Price_Standard', 'Price_premium',
                                 'Origin', 'Member_since', 'Response_time', 'Last_delivery', 'Cat', 'SubCat'])

df.head()

Unnamed: 0,Seller_name,Gig_URL,Gig_name,Seller_lvl,One_liner,Review_score,Review_count,Price_basic,Price_Standard,Price_premium,Origin,Member_since,Response_time,Last_delivery,Cat,SubCat
0,pygeek,https://www.fiverr.com/pygeek/fix-any-linux-se...,"I will tutor, help or teach you to code python...",Level 2 Seller,Your very own Python Geek,5,(385),€8.84,€61.87,€106.06,Pakistan,Dec 2019,1 hour,1 day,Programming & Tech,Online Coding Lessons
1,dogaozgon,https://www.fiverr.com/dogaozgon/tutor-you-in-...,"I will tutor you in python, machine learning a...",,,5,(4),€26.52,,,Canada,Jul 2021,3 hours,1 week,Programming & Tech,Online Coding Lessons
2,minhalzafarsw06,https://www.fiverr.com/minhalzafarsw06/tutor-h...,"I will tutor, help or teach you to code in pyt...",,,5,(6),€8.84,€17.68,€44.19,Pakistan,Jun 2021,1 hour,1 day,Programming & Tech,Online Coding Lessons
3,araboy24,https://www.fiverr.com/araboy24/be-your-python...,I will be your python tutor from beginner to e...,,"Programmer by Day, Programmer by Night",5,(24),€8.84,€13.26,€35.35,United States,Jun 2019,1 hour,1 day,Programming & Tech,Online Coding Lessons
4,hammad_jafar,https://www.fiverr.com/hammad_jafar/teach-pyth...,"I will teach python, programming in python,pyt...",,Your limitations are what you impose on yourself,5,(7),€4.42,€13.26,€22.10,Pakistan,Jun 2021,1 hour,about 6 hours,Programming & Tech,Online Coding Lessons


In [13]:
df.shape

(191, 16)

In [14]:
# df.to_csv('python_tutor_2.csv')

In [15]:
driver.driver.quit()