In [1]:
from scrapy.item import Field
from scrapy.item import Item
from scrapy.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.loader.processors import MapCompose
from scrapy.linkextractors import LinkExtractor
from scrapy.loader import ItemLoader
from scrapy.crawler import CrawlerProcess
import os

In [2]:
class ProSetup(Item):
    name = Field()
    mouse = Field()
    sens = Field()
    dpi = Field()
    country = Field()

In [3]:
class ProSettings(CrawlSpider):
    name = 'ProSetups'
    custom_settings = {
        'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
        'CLOSESPIDER_PAGECOUNT': 500,
        'FEED_EXPORT_ENCODING': 'utf-8',
        'FEED_EXPORT_FIELDS': ['name', 'country', 'mouse', 'dpi', 'sens'],
    }
    
    allowed_domains = ['prosettings.net']
    
    start_urls = ['https://prosettings.net/players/']
    
    download_delay = 1
    
    rules = (
        Rule(LinkExtractor(allow=r'/page/\d+/'), follow=True),
        Rule(LinkExtractor(allow=r'/players/', restrict_xpaths=[
             "//a[@class='js-link-target']"]), follow=True, callback='parse_player')
        )

    def parse_player(self, response):
        sel = Selector(response)
        item = ItemLoader(ProSetup(), sel)
        item.add_xpath('name', "//div[@class='name']//h1//text()")
        item.add_xpath('mouse', "//section[@class='settings-group section--mouse']//h4//text()")
        item.add_xpath('sens', "//tr[@data-field='sensitivity']//td//text()")
        item.add_xpath('dpi', "//tr[@data-field='dpi']//td//text()")
        item.add_xpath('country', "//span[@class='name']//text()")
        yield item.load_item()

In [4]:
output_path = os.path.join('data', 'raw', 'prosettings.csv')

process = CrawlerProcess({
    'FEED_FORMAT': 'csv',
    'FEED_URI': output_path
})
process.crawl(ProSettings)
process.start()

2023-05-22 14:55:34 [scrapy.utils.log] INFO: Scrapy 2.9.0 started (bot: scrapybot)
2023-05-22 14:55:34 [scrapy.utils.log] INFO: Versions: lxml 4.9.2.0, libxml2 2.10.4, cssselect 1.2.0, parsel 1.8.1, w3lib 2.1.1, Twisted 22.10.0, Python 3.11.3 | packaged by conda-forge | (main, Apr  6 2023, 08:57:19) [GCC 11.3.0], pyOpenSSL 23.1.1 (OpenSSL 3.1.0 14 Mar 2023), cryptography 40.0.2, Platform Linux-5.10.16.3-microsoft-standard-WSL2-x86_64-with-glibc2.35
2023-05-22 14:55:34 [scrapy.crawler] INFO: Overridden settings:
{'CLOSESPIDER_PAGECOUNT': 500,
 'FEED_EXPORT_ENCODING': 'utf-8',
 'FEED_EXPORT_FIELDS': ['name', 'country', 'mouse', 'dpi', 'sens'],
 'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) '
               'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 '
               'Safari/537.36'}


See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation.
  return cls(crawler)

2023-05-22 14:55:34 [scrapy.u