In [191]:
from bs4 import BeautifulSoup
import requests
import numpy as np
from tabulate import tabulate
import os
import traceback

In [192]:
def generate_url(search_string):
    search_string = search_string.strip().replace(" ", "%20")
    url = r'https://www.fiverr.com/search/gigs?query='+ search_string + r'&source=top-bar&search_in=everywhere&search-autocomplete-original-term='+ search_string
    return url

In [193]:
def get_random_ua():
    random_ua = ''
    ua_file = 'UserAgents.txt'
    try:
        with open(ua_file) as f:
            lines = f.readlines()
        if len(lines) > 0:
            prng = np.random.RandomState()
            index = prng.permutation(len(lines) - 1)
            idx = np.asarray(index, dtype=np.integer)[0]
            random_ua = lines[int(idx)]
    except Exception as ex:
        print('Exception in random_ua')
        print(str(ex))
    finally:
        return random_ua.strip('\n')

In [194]:
# url = r'https://www.fiverr.com/search/gigs?query=machine%20learning&source=top-bar&search_in=everywhere&search-autocomplete-original-term=machine%20learning'

def write_test_file(content, file_name = "test.html"):
    print(content)
    with open(os.path.join(os.getcwd(), file_name), "wb") as f:
        f.write(content)

def get_soup(url, print_soup=False):
    headers = {
        'user-agent': get_random_ua(),
        'referrer': r'https://google.com',
        'accept': r'*/*',
        'accept-encoding': r'gzip, deflate, br',
        #'accept-language': r'en-US,en;q=0.9,bn;q=0.8',
        'cache-control': r'no-cache',
        #'origin': r'https://www.fiverr.com',
        'pragma': r'no-cache'
    }
    
    soup = ''
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print ("Http Error:",err)
        raise 
    except requests.exceptions.ConnectionError as err:
        print ("Error Connecting:",err)
        write_test_file(r.content)
        raise 
    except requests.exceptions.Timeout as err:
        print ("Timeout Error:",err)
        write_test_file(r.content)
        raise 
    except requests.exceptions.RequestException as err:
        print ("OOps: Something Else",err)
        write_test_file(r.content)
        raise
    else:
        source = r.text
        soup = BeautifulSoup(source, 'lxml')  # pip install lxml

        if(print_soup is True):
            write_test_file(r.content)

    return soup

In [195]:
search_string = r'Video editing'
soup = get_soup(generate_url(search_string), print_soup=False)

In [196]:
'''
Test block for debug purposes only
'''

try:    
    # Get all the gigs
    gigs = soup.find('div', class_='listing-container')
except AttributeError:
    print(f'\n\nPROCESS BLOCKED BY FIVER\n\n')
    raise  # This will not let the code below to be executed

data = [["ID", "Votes", "Price", "Description", "Level", "Stars"]]

# Extract individual gigs
i = 1 
for gig in gigs.find_all('div', class_='gig-wrapper card'):
    seller_information = gig.find('span', class_='seller-name')
    try:
        user_id = seller_information.a.text.lstrip('by').strip()
        print(str(i) + ' # user id = ' + user_id)
        i += 1
    except Exception as e:
        print('\n'*2 + f'User ID NOT found. Details: {e}')
        raise

    level = 0
    try:
        level = len(list(seller_information.contents))
    except Exception as e:
        print('\n'*2 + f'Level not found. Details: {e}')
        raise
    else:
        if level >= 2:
            try:
                level = seller_information.contents[1].text.lstrip('Level ').rstrip(' Seller')
            except Exception as e:
                print('\n'*2 + f'Level not found. Details: {e}')
                raise
            else:
                print('level = ' + level)
    
    print('ratings : ' + gig.find('span', class_='gig-rating text-body-2').text)
    print('price : ' + gig.find('a', class_='price').text)
    print('-'*60)

In [197]:
def extract_info_from_gig(gig):
    # Gig url
    gig_url = 'https://www.fiverr.com' + gig.find('a')['href']

    # User info
    seller_information = gig.find(class_='seller-name')
    try:
        user_id = seller_information.contents[0].contents[-1]
    except Exception as e:
        print('\n'*2 + f'User ID NOT found. Details: {e}')
        raise

    level = 0
    if len(list(seller_information.contents)) >= 2:
        level = seller_information.contents[1].text.lstrip('Level ').rstrip(' Seller')
    user_info = {'user_id' : user_id,
                 'level' : level }

    # Description
    try:
        description = gig.find('h3', class_='text-display-7').text.lstrip('I will ')
    except Exception as e:
        print('\n'*2 + f'Description NOT found. Details: {e}')
        raise

    # Rating
    try:
        user_rating = gig.find('span', class_='gig-rating text-body-2').text
        stars, votes = user_rating.split('(')
        stars = float(stars)
        votes = int(votes.strip(")"))
    except Exception as e:
        print('\n'*2 + f'Error in user rating detection. Details: {e}')
        raise
    else:
        rating = {'stars' : stars,
                  'votes' : votes}

    # Price starts from
    try:
        price = gig.find('a', class_='price').text
        price = float(price.split('$', 1)[1])
    except Exception as e:
        print('\n'*2 + f'Error in starting price detection. Details: {e}')
        raise

    gig_info = {'url' : gig_url,
                'user_info' : user_info,
                'description' : description,
                'rating' : rating,
                'price' : price}

    return gig_info

In [198]:
def get_container_from_soup(soup):
    ''' Extract listing container from soup '''
    return soup.find_all('div', class_='content')

In [199]:
def get_gig_from_container(container):
    ''' Extract individual gigs from a listing container '''
    return container.find_all('div', class_='gig-card-layout')

In [200]:
# Show the search string
print(f'Search String = "{search_string}"')

# Get number of results
try:
    print(soup.find('div', class_='number-of-results').text)
except AttributeError:
    print(f'\n\nPROCESS BLOCKED BY FIVER\n\n')
    raise  # This will not let the code below to be executed

gig_urls = []
data = [["ID", "Votes", "Price", "Description", "Level", "Stars"]]
for container in get_container_from_soup(soup):
    for single_gig in get_gig_from_container(container):
        try:
            gig_info = extract_info_from_gig(single_gig)

            data.append([gig_info['user_info']['user_id'],
                         gig_info['rating']['votes'],
                         gig_info['price'],
                         gig_info['description'],
                         gig_info['user_info']['level'],
                         gig_info['rating']['stars']
                         ])
            gig_urls.append(gig_info['url'])
            
            

        except Exception as e:
            print('\n' + '='*80 + '\n')
            print(f'ERROR MESSAGE: {e} \n{traceback.format_exc()}')
            print('\n' + '='*80 + '\n')
            print(single_gig.prettify())
            print('\n' + '='*80 + '\n')


print(tabulate(data, headers="firstrow", showindex=True, tablefmt="pretty"))

Search String = "Video editing"
32,136 services available
+---+-------------------+-------+-------+------------------------------------------------------------------------+-------+-------+
|   |        ID         | Votes | Price |                              Description                               | Level | Stars |
+---+-------------------+-------+-------+------------------------------------------------------------------------+-------+-------+
| 0 |     robin_pk      |  14   | 20.0  |                     edit your video like you want                      |   0   |  5.0  |
| 1 |   samiularafat    |  13   | 10.0  | edit your zoom interview recordings, webinar video, and remote podcast |   0   |  5.0  |
| 2 |   auroracatera_   |  17   | 100.0 |       do professional, unique video editing and post production        |   0   |  4.8  |
| 3 |   shivamsuthar    |  488  | 30.0  |                 do professional and fast video editing                 |   0   |  5.0  |
| 4 | Loveridge Designs |

In [201]:
def get_no_of_queued_orders(soup_gig_inside):
    try:
        queued_orders = soup_gig_inside.find("span", class_="orders-in-queue").text
    except Exception:
        write_test_file(soup_gig_inside, file_name = "soup_gig_inside.html")
        raise
    queued_orders = list(filter(lambda x: x.isdigit(), queued_orders))
    queued_orders = int(''.join(queued_orders))
    return queued_orders

In [202]:
# got to inside of a git
import random
import time

for i, gig_url in enumerate(gig_urls):
    for tries in range(10):
        try:
            soup_gig_inside = get_soup(gig_url)
            break

        except requests.exceptions.HTTPError:
            countdown = random.randint(10, 30)

            for i in reversed(range(countdown)):
                print(str(i), end="-")
                time.sleep(1)
            continue
    else:
        raise("*"*30,"FATAL","*"*30)

    qued_orders = get_no_of_queued_orders(soup_gig_inside)
    print(f"{qued_orders = }")

    data[i + 1].append(qued_orders)

print(tabulate(data, headers="firstrow", showindex=True, tablefmt="pretty"))


Http Error: 403 Client Error: Forbidden for url: https://block.fiverr.com/?url=aHR0cDovL3d3dy5maXZlcnIuY29tL3JvYmluX3BrL2VkaXQteW91ci12bG9nLWxpa2UteW91LXdhbnQ/cG9zPTEmc291cmNlPXRvcC1iYXImcGNrZ19pZD0xJmZ1bm5lbD0zYWJiMTIwMzAxNzMwMzMyNGIzYjE0MGI5MjE3YWJiZCZjb250ZXh0X3R5cGU9YXV0byZjb250ZXh0X3JlZmVycmVyPXNlYXJjaF9naWdzX3dpdGhfbW9kYWxpdGllcyZyZWZfY3R4X2lkPTY2ZDIzMDE1YjUzNDY4NDc5YmNhMTU1OTg4MmUxY2Uw&uuid=fbeb177b-fd9c-11eb-b248-6b474d52536e&vid=
0-qued_orders = 2
qued_orders = 2
Http Error: 403 Client Error: Forbidden for url: https://block.fiverr.com/?url=aHR0cDovL3d3dy5maXZlcnIuY29tL2F1cm9yYWNhdGVyYV8vcHJvZmVzc2lvbmFsLXZpZGVvLWVkaXRpbmctYW5kLXBvc3QtcHJvZHVjdGlvbj9wb3M9MyZzb3VyY2U9dG9wLWJhciZwY2tnX2lkPTEmZnVubmVsPTNhYmIxMjAzMDE3MzAzMzI0YjNiMTQwYjkyMTdhYmJkJmNvbnRleHRfdHlwZT1hdXRvJmNvbnRleHRfcmVmZXJyZXI9c2VhcmNoX2dpZ3Nfd2l0aF9tb2RhbGl0aWVzJnJlZl9jdHhfaWQ9NjZkMjMwMTViNTM0Njg0NzliY2ExNTU5ODgyZTFjZTA=&uuid=fee0b529-fd9c-11eb-a9d1-48724b445053&vid=
0-Http Error: 403 Client Error: Forbidden for ur

TypeError: exceptions must derive from BaseException