In [1]:
import scrapy
from scrapy.crawler import CrawlerProcess

class OlxSpider(scrapy.Spider):
    name = 'olx_spider'
    allowed_domains = ['olx.com.br']
    start_urls = ['https://www.olx.com.br/']  # Adjust starting URL based on category

    custom_settings = {
        'REQUEST_FINGERPRINTER_IMPLEMENTATION': '2.7'
    }

    def parse(self, response):
        # Extract product listings (adjust selectors as needed)
        product_links = response.css('.item__image-container a::attr(href)').getall()

        for product_link in product_links:
            yield scrapy.Request(product_link, callback=self.parse_product)

        # Handle pagination (if applicable):
        next_page_url = response.css('.pager__next a::attr(href)').get()
        if next_page_url:
            yield scrapy.Request(next_page_url, callback=self.parse)

    def parse_product(self, response):
        # Extract desired data from product page (adjust selectors)
        title = response.css('.sc-ifAKWi.hHhEKF::text').get()
        price = response.css('.sc-7UhVSK.hztVJb::text').get()
        description = response.css('.sc-7UhVSK.hztVJb ~ p::text').get()

        # Create a dictionary to store extracted data
        product_data = {
            'title': title,
            'price': price,
            'description': description
        }

        yield product_data

# Create the CrawlerProcess
process = CrawlerProcess()

# Crawl the spider
process.crawl(OlxSpider)
process.start()

2024-08-17 16:48:32 [scrapy.utils.log] INFO: Scrapy 2.7.1 started (bot: scrapybot)
2024-08-17 16:48:32 [scrapy.utils.log] INFO: Versions: lxml 5.3.0.0, libxml2 2.11.7, cssselect 1.2.0, parsel 1.9.1, w3lib 2.2.1, Twisted 22.10.0, Python 3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)], pyOpenSSL 24.2.1 (OpenSSL 3.2.1 30 Jan 2024), cryptography 42.0.5, Platform Windows-10-10.0.22631-SP0
2024-08-17 16:48:32 [scrapy.crawler] INFO: Overridden settings:
{'REQUEST_FINGERPRINTER_IMPLEMENTATION': '2.7'}
2024-08-17 16:48:32 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2024-08-17 16:48:32 [scrapy.extensions.telnet] INFO: Telnet Password: 6d2ce669a19c380e
2024-08-17 16:48:32 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.logstats.LogStats']
2024-08-17 16:48:32 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scra

## Armazenar dados no CSV

In [2]:
import scrapy
import csv

class OlxSpider(scrapy.Spider):
    name = 'olx_spider'
    allowed_domains = ['olx.com.br']
    start_urls = ['https://www.olx.com.br/']

    def __init__(self):
        self.csv_file = open('dados/coleta_olx.csv', 'a', newline='')
        self.csv_writer = csv.writer(self.csv_file)
        # Escrever o cabeçalho (opcional)
        self.csv_writer.writerow(['Título', 'Preço', 'Link', 'Data'])

    def parse(self, response):
        # ... seu código de parsing ...

        # Extrair os dados
        title = response.css('h2::text').get()
        price = response.css('.sc-1j499a-2::text').get()
        link = response.url
        # ... outros dados ...

        # Escrever uma linha no CSV
        self.csv_writer.writerow([title, price, link])

    def closed(self, reason):
        self.csv_file.close()