In [16]:
import requests
from bs4 import BeautifulSoup
import time
import random
import json
import csv
import logging
import os
from dotenv import load_dotenv

load_dotenv()

# --- Configuration ---
BASE_URL = "https://www.fiverr.com/categories/programming-tech/ai-services?source=category_tree"  # Specific category URL
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://www.fiverr.com/",  # Add a referer
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Cache-Control": "max-age=0"
}
OUTPUT_FORMAT = os.getenv("OUTPUT_FORMAT", "json")  # Default output format is JSON
MIN_DELAY = int(os.getenv("MIN_DELAY", 3))  # Minimum delay in seconds
MAX_DELAY = int(os.getenv("MAX_DELAY", 7))  # Maximum delay in seconds

# --- Logging Setup ---
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)

file_handler = logging.FileHandler("fiverr_scraper_simplified.log")  # Updated log file name
file_handler.setFormatter(formatter)

logger.addHandler(console_handler)
logger.addHandler(file_handler)

def log_error(message):
    logger.error(message)

In [17]:
class Scraper:
    def __init__(self, headers, min_delay, max_delay):
        self.headers = headers
        self.min_delay = min_delay
        self.max_delay = max_delay

    def fetch_page(self, url):
        """Fetches a webpage with error handling."""
        try:
            time.sleep(random.uniform(self.min_delay, self.max_delay))
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            return response.content
        except requests.exceptions.RequestException as e:
            log_error(f"Error fetching {url}: {e}")
            return None

    def parse_project_listings(self, html_content):
        """Parses project listings from the HTML content."""
        soup = BeautifulSoup(html_content, "html.parser")
        project_listings = []

        # Find project listing elements on Fiverr (adjust selectors as needed)
        gigs = soup.find_all("li", class_="gig-card-layout")

        for gig in gigs:
            try:
                title = gig.find("h3", class_="text-display-7 font-semibold").text.strip()
                description = gig.find("div", class_="expanded-gig-description").text.strip()

                # Extract the project link from the <a> tag
                project_link = BASE_URL + gig.find("a", class_="stretched-link-overlay")["href"]

                # Fetch and parse the project details page
                project_details_html = self.fetch_page(project_link)

                if project_details_html:
                    project_details_soup = BeautifulSoup(project_details_html, "html.parser")
                    
                    # Budget: Look for a package table or starting price
                    budget_element = project_details_soup.find("span", class_="price-item")
                    budget = budget_element.text.strip() if budget_element else "N/A"

                    # Skills: Look for tags or keywords
                    skills_elements = project_details_soup.find_all("span", class_="tag-name")
                    skills = [skill.text.strip() for skill in skills_elements]

                    # Posted Date: Might be in the seller's profile
                    date_posted = "N/A"  # Can try to extract from seller profile if available

                    # Client Country: Usually associated with the seller
                    client_country_element = project_details_soup.find("div", class_="seller-location")
                    client_country = client_country_element.text.strip() if client_country_element else "N/A"

                else:
                    budget = "N/A"
                    skills = []
                    date_posted = "N/A"
                    client_country = "N/A"

                project_listings.append(
                    {
                        "title": title,
                        "description": description,
                        "budget": budget,
                        "skills": skills,
                        "date_posted": date_posted,
                        "client_country": client_country
                    }
                )
            except Exception as e:
                log_error(f"Error parsing project: {e}")

        return project_listings

In [18]:
class DataHandler:
    def __init__(self, output_format="json", output_filename="fiverr_ai_projects"):
        self.output_format = output_format
        self.output_filename = output_filename

    def save_data(self, data):
        """Saves the scraped data to a file (JSON or CSV)."""
        try:
            if self.output_format == "json":
                with open(f"{self.output_filename}.json", "w", encoding="utf-8") as f:
                    json.dump(data, f, indent=4, ensure_ascii=False)
                logger.info(f"Data successfully saved to {self.output_filename}.json")
            elif self.output_format == "csv":
                with open(f"{self.output_filename}.csv", "w", newline="", encoding="utf-8") as f:
                    # Provide an empty list [] as the default for fieldnames if data is empty
                    writer = csv.DictWriter(f, fieldnames=data[0].keys() if data else [])
                    writer.writeheader()
                    writer.writerows(data)
                logger.info(f"Data successfully saved to {self.output_filename}.csv")
            else:
                raise ValueError(f"Unsupported output format: {self.output_format}")
        except (IOError, TypeError, ValueError) as e:
            log_error(f"Error saving data: {e}")

In [19]:
class DataHandler:
    def __init__(self, output_format="json", output_filename="fiverr_ai_projects"):
        self.output_format = output_format
        self.output_filename = output_filename

    def save_data(self, data):
        """Saves the scraped data to a file (JSON or CSV)."""
        try:
            if self.output_format == "json":
                with open(f"{self.output_filename}.json", "w", encoding="utf-8") as f:
                    json.dump(data, f, indent=4, ensure_ascii=False)
                logger.info(f"Data successfully saved to {self.output_filename}.json")
            elif self.output_format == "csv":
                with open(f"{self.output_filename}.csv", "w", newline="", encoding="utf-8") as f:
                    writer = csv.DictWriter(f, fieldnames=data[0].keys() if data else [])
                    writer.writeheader()
                    writer.writerows(data)
                logger.info(f"Data successfully saved to {self.output_filename}.csv")
            else:
                raise ValueError(f"Unsupported output format: {self.output_format}")
        except (IOError, TypeError, ValueError) as e:
            log_error(f"Error saving data: {e}")

In [20]:
def main():
    """Main function to run the scraper."""
    logger.info("Starting the simplified Fiverr AI project scraper...")

    scraper = Scraper(HEADERS, MIN_DELAY, MAX_DELAY)
    data_handler = DataHandler(output_format=OUTPUT_FORMAT)

    html_content = scraper.fetch_page(BASE_URL)

    if html_content:
        projects = scraper.parse_project_listings(html_content)
        if projects:
            data_handler.save_data(projects)
        else:
            logger.info("No projects found.")
    else:
        logger.error("Failed to fetch the category page.")

    logger.info("Scraping finished.")

In [21]:
if __name__ == "__main__":
    main()

2025-01-09 23:41:26,550 - __main__ - INFO - Starting the simplified Fiverr AI project scraper...
2025-01-09 23:41:26,550 - __main__ - INFO - Starting the simplified Fiverr AI project scraper...
2025-01-09 23:41:26,550 - __main__ - INFO - Starting the simplified Fiverr AI project scraper...
2025-01-09 23:41:31,696 - __main__ - ERROR - Error fetching https://www.fiverr.com/categories/programming-tech/ai-services?source=category_tree: 403 Client Error: Forbidden for url: https://www.fiverr.com/categories/programming-tech/ai-services?source=category_tree
2025-01-09 23:41:31,696 - __main__ - ERROR - Error fetching https://www.fiverr.com/categories/programming-tech/ai-services?source=category_tree: 403 Client Error: Forbidden for url: https://www.fiverr.com/categories/programming-tech/ai-services?source=category_tree
2025-01-09 23:41:31,696 - __main__ - ERROR - Error fetching https://www.fiverr.com/categories/programming-tech/ai-services?source=category_tree: 403 Client Error: Forbidden for 