In [1]:
import scrapy
import logging
from scrapy.crawler import CrawlerProcess

In [2]:
class ProductsSpider(scrapy.Spider):
    
    name = 'products'
    start_urls = ['https://www.rossmann.pl/kategoria/Perfumy,8512?Page=1&PageSize=96']
    
    custom_settings = {
        'LOG_LEVEL': logging.WARNING,
        'FEEDS': {'res1.csv': {'format':'csv'}}
        ,'DOWNLOAD_DELAY': 3 
        ,'RANDOMIZE_DOWNLOAD_DELAY' : True 
    }
    
    def parse_product(self, response):
        
        xpath_name = '//h1[@class="h1"]/text()'  
        xpath_EAN = "//*[contains(text(), 'Kod EAN')]/following-sibling::text()[1]" 
        xpath_price = '//meta[@property="product:price:amount"]/@content' 
        xpath_price_promo = '//meta[@property="product:sale_price:amount"]/@content' 
        
        name = ''.join(response.xpath(xpath_name).getall())
        EAN = response.xpath(xpath_EAN).get()
        price = response.xpath(xpath_price).get()
        price_promo = response.xpath(xpath_price_promo).get()
        
        yield {
                'Name':name,
                'EAN': EAN,
                'price': price,
                'price promo': price_promo,
                'url': response.url
                }
        
    def parse(self,response):
        
        xpath_url = '//a[@class = "tile-product__name"]/@href' # xpath to a product on the page
        
        # Necessary to switch pages. There is no direct link to the next page.
        xpath_last_page = '//a[@aria-label = "Ostatnia strona"]/text()' 
        last_page = int(response.xpath(xpath_last_page).get())
        
        xpath_current_page = '//input[@type = "number"][@class = "form-control"]/@value' 
        current_page = int(response.xpath(xpath_current_page).getall()[-1])
        
        next_page = f'https://www.rossmann.pl/kategoria/Perfumy,8512?Page={current_page+1}&PageSize=96'
        
        for url in response.xpath(xpath_url).extract(): # extract all products on the card and loop through each one
            href = response.urljoin(url)
            yield scrapy.Request(href, self.parse_product) # single product scraping
                    
        if next_page and current_page<last_page: # switching pages
            yield response.follow(next_page, callback=self.parse)

In [1]:
process = CrawlerProcess({
    'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.34 Safari/537.36'
})

process.crawl(ProductsSpider)
process.start()