In [1]:
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scrapy.exporters import CsvItemExporter
from scrapy.crawler import CrawlerProcess

# Заданный User-Agent
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

class AllPagesSpider(CrawlSpider):
    name = "all_pages"
    allowed_domains = ["www.banki.ru"]
    start_urls = ["https://www.banki.ru/investment/responses/list"]

    rules = (
        Rule(LinkExtractor(restrict_xpaths="//div[@class='responses__item__message']/a"), callback='parse_item', follow=True),
        Rule(LinkExtractor(restrict_xpaths="//li[@class='ui-pagination__item ui-pagination__next']/a"))
    )

    def start_requests(self):
        for url in self.start_urls:
            yield scrapy.Request(url, headers=headers, callback=self.parse, meta={'splash': {'args': {'wait': 2}}})

    def parse(self, response):
        # Извлечение данных со стартовой страницы
        items = response.xpath("//div[@class='responses__item__message']/a/@href").extract()
        for item_url in items:
            yield scrapy.Request(response.urljoin(item_url), headers=headers, callback=self.parse_item)

    def parse_item(self, response):
        item = {}
        item['title'] = response.xpath("//h1[contains(@class, 'response-page__title')]/text()").get().strip()
        item['review'] = response.xpath("//div[contains(@class, 'article-text')]").get().strip()
        yield item

class CSVPipeline:
    def open_spider(self, spider):
        self.file = open('output.csv', 'wb')
        self.exporter = CsvItemExporter(self.file)
        self.exporter.start_exporting()

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item

# Добавляем Spider и Pipeline в настройки проекта Scrapy
process = CrawlerProcess(settings={
    'ITEM_PIPELINES': {
        '__main__.CSVPipeline': 300,
    }
})

process.crawl(AllPagesSpider)
process.start()

2024-04-02 10:16:38 [scrapy.utils.log] INFO: Scrapy 2.11.1 started (bot: scrapybot)
2024-04-02 10:16:38 [scrapy.utils.log] INFO: Versions: lxml 4.9.3.0, libxml2 2.10.4, cssselect 1.2.0, parsel 1.8.1, w3lib 2.1.2, Twisted 23.10.0, Python 3.10.14 | packaged by Anaconda, Inc. | (main, Mar 21 2024, 16:20:14) [MSC v.1916 64 bit (AMD64)], pyOpenSSL 24.0.0 (OpenSSL 3.0.13 30 Jan 2024), cryptography 42.0.5, Platform Windows-10-10.0.17763-SP0
2024-04-02 10:16:38 [scrapy.addons] INFO: Enabled addons:
[]


See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation.
  return cls(crawler)

2024-04-02 10:16:38 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2024-04-02 10:16:38 [scrapy.extensions.telnet] INFO: Telnet Password: 423e36ce5d3c4c5d
2024-04-02 10:16:38 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scr