In [3]:
from bs4 import BeautifulSoup
import requests
import numpy as np
from tabulate import tabulate
import os
import traceback

In [4]:
def generate_url(search_string):
    search_string = search_string.strip().replace(" ", "%20")
    url = r'https://www.fiverr.com/search/gigs?query='+ search_string + r'&source=top-bar&search_in=everywhere&search-autocomplete-original-term='+ search_string
    return url

In [5]:
def get_random_ua():
    random_ua = ''
    ua_file = 'UserAgents.txt'
    try:
        with open(ua_file) as f:
            lines = f.readlines()
        if len(lines) > 0:
            prng = np.random.RandomState()
            index = prng.permutation(len(lines) - 1)
            idx = np.asarray(index, dtype=np.integer)[0]
            random_ua = lines[int(idx)]
    except Exception as ex:
        print('Exception in random_ua')
        print(str(ex))
    finally:
        return random_ua.strip('\n')

In [14]:
# url = r'https://www.fiverr.com/search/gigs?query=machine%20learning&source=top-bar&search_in=everywhere&search-autocomplete-original-term=machine%20learning'

def get_soup(search_string, print_soup=False):
    headers = {
        'user-agent': get_random_ua(),
        'referrer': r'https://google.com',
        'accept': r'*/*',
        'accept-encoding': r'gzip, deflate, br',
        #'accept-language': r'en-US,en;q=0.9,bn;q=0.8',
        'cache-control': r'no-cache',
        #'origin': r'https://www.fiverr.com',
        'pragma': r'no-cache'
    }
    
    url = generate_url(search_string)
    soup = ''
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print ("Http Error:",err)
        raise SystemExit(err)
    except requests.exceptions.ConnectionError as err:
        print ("Error Connecting:",err)
        raise SystemExit(err)
    except requests.exceptions.Timeout as err:
        print ("Timeout Error:",err)
        raise SystemExit(err)
    except requests.exceptions.RequestException as err:
        print ("OOps: Something Else",err)
        raise SystemExit(err)
    else:
        source = r.text
        soup = BeautifulSoup(source, 'lxml')  # pip install lxml

        if(print_soup is True):
            print(source)
            with open(os.path.join(os.getcwd(), "test.html"), "wb") as f:
                f.write(r.content)

    return soup

In [None]:
search_string = r'Video editing'
soup = get_soup(search_string, print_soup=True)

In [8]:
'''
Test block for debug purposes only
'''

try:    
    # Get all the gigs
    gigs = soup.find('div', class_='listing-container')
except AttributeError:
    print(f'\n\nPROCESS BLOCKED BY FIVER\n\n')
    raise  # This will not let the code below to be executed

data = [["ID", "Votes", "Price", "Description", "Level", "Stars"]]

# Extract individual gigs
i = 1 
for gig in gigs.find_all('div', class_='gig-wrapper card'):
    seller_information = gig.find('span', class_='seller-name')
    try:
        user_id = seller_information.a.text.lstrip('by').strip()
        print(str(i) + ' # user id = ' + user_id)
        i += 1
    except Exception as e:
        print('\n'*2 + f'User ID NOT found. Details: {e}')
        raise

    level = 0
    try:
        level = len(list(seller_information.contents))
    except Exception as e:
        print('\n'*2 + f'Level not found. Details: {e}')
        raise
    else:
        if level >= 2:
            try:
                level = seller_information.contents[1].text.lstrip('Level ').rstrip(' Seller')
            except Exception as e:
                print('\n'*2 + f'Level not found. Details: {e}')
                raise
            else:
                print('level = ' + level)
    
    print('ratings : ' + gig.find('span', class_='gig-rating text-body-2').text)
    print('price : ' + gig.find('a', class_='price').text)
    print('-'*60)

In [65]:
def extract_info_from_gig(gig):
    # Gig url
    gig_url = 'https://www.fiverr.com' + gig.find('a')['href']

    # User info
    seller_information = gig.find(class_='seller-name')
    try:
        user_id = seller_information.contents[0].contents[-1]
    except Exception as e:
        print('\n'*2 + f'User ID NOT found. Details: {e}')
        raise

    level = 0
    if len(list(seller_information.contents)) >= 2:
        level = seller_information.contents[1].text.lstrip('Level ').rstrip(' Seller')
    user_info = {'user_id' : user_id,
                 'level' : level }

    # Description
    try:
        description = gig.find('h3', class_='text-display-7').text.lstrip('I will ')
    except Exception as e:
        print('\n'*2 + f'Description NOT found. Details: {e}')
        raise

    # Rating
    try:
        user_rating = gig.find('span', class_='gig-rating text-body-2').text
        stars, votes = user_rating.split('(')
        stars = float(stars)
        votes = int(votes.strip(")"))
    except Exception as e:
        print('\n'*2 + f'Error in user rating detection. Details: {e}')
        raise
    else:
        rating = {'stars' : stars,
                  'votes' : votes}

    # Price starts from
    try:
        price = gig.find('a', class_='price').text
        price = float(price.split('$', 1)[1])
    except Exception as e:
        print('\n'*2 + f'Error in starting price detection. Details: {e}')
        raise

    gig_info = {'url' : gig_url,
                'user_info' : user_info,
                'description' : description,
                'rating' : rating,
                'price' : price}

    return gig_info

In [66]:
def get_container_from_soup(soup):
    ''' Extract listing container from soup '''
    return soup.find_all('div', class_='content')

In [67]:
def get_gig_from_container(container):
    ''' Extract individual gigs from a listing container '''
    return container.find_all('div', class_='gig-card-layout')

In [68]:
# Show the search string
print(f'Search String = "{search_string}"')

# Get number of results
try:
    print(soup.find('div', class_='number-of-results').text)
except AttributeError:
    print(f'\n\nPROCESS BLOCKED BY FIVER\n\n')
    raise  # This will not let the code below to be executed

data = [["ID", "Votes", "Price", "Description", "Level", "Stars"]]
for container in get_container_from_soup(soup):
    for single_gig in get_gig_from_container(container):
        try:
            gig_info = extract_info_from_gig(single_gig)
            data.append([gig_info['user_info']['user_id'],
                         gig_info['rating']['votes'],
                         gig_info['price'],
                         gig_info['description'],
                         gig_info['user_info']['level'],
                         gig_info['rating']['stars']
                         ])

        except Exception as e:
            print('\n' + '='*80 + '\n')
            print(f'ERROR MESSAGE: {e} \n{traceback.format_exc()}')
            print('\n' + '='*80 + '\n')
            print(single_gig.prettify())
            print('\n' + '='*80 + '\n')

print(tabulate(data, headers="firstrow", showindex=True, tablefmt="pretty"))

Search String = "Video editing"
32,254 services available
user_id = 'robin_pk'
user_id = 'shivamsuthar'
user_id = 'samiularafat'
user_id = 'auroracatera_'
user_id = 'accessmediapro'
user_id = 'luiseeditor'
user_id = 'Loveridge Designs'
user_id = 'moinspo'
+---+-------------------+-------+-------+------------------------------------------------------------------------+-------+-------+
|   |        ID         | Votes | Price |                              Description                               | Level | Stars |
+---+-------------------+-------+-------+------------------------------------------------------------------------+-------+-------+
| 0 |     robin_pk      |  14   | 20.0  |                     edit your video like you want                      |   0   |  5.0  |
| 1 |   shivamsuthar    |  488  | 30.0  |                 do professional and fast video editing                 |   0   |  5.0  |
| 2 |   samiularafat    |  13   | 10.0  | edit your zoom interview recordings, webinar vi