In [1]:
import scrapy
from scrapy.crawler import CrawlerProcess
import json

class QuotesSpider(scrapy.Spider):
    name = "quotes"
    start_urls = [
        'https://quotes.toscrape.com/page/1/',
        'https://quotes.toscrape.com/page/2/',
    ]

    custom_settings = {
        'FEEDS': {
            'quotes.jl': {
                'format': 'jsonlines',
                'overwrite': True,
            },
        },
    }

    def parse(self, response):
        for quote in response.css('div.quote'):
            yield {
                'text': quote.css('span.text::text').get().strip('“”'),
                'author': quote.css('small.author::text').get(),
            }

def run_spider(spider):
    process = CrawlerProcess()
    process.crawl(spider)
    process.start()  

run_spider(QuotesSpider)

quotes = []
with open('quotes.jl', 'r') as f:
    for line in f:
        quotes.append(json.loads(line))

for quote in quotes:
        print(quote)

with open('quotes.json', 'w', encoding='utf-8') as f:
    json.dump(quotes, f, ensure_ascii=False, indent=4)
    



2024-07-15 19:36:36 [scrapy.utils.log] INFO: Scrapy 2.8.0 started (bot: scrapybot)
2024-07-15 19:36:36 [scrapy.utils.log] INFO: Versions: lxml 4.9.2.0, libxml2 2.10.3, cssselect 1.1.0, parsel 1.6.0, w3lib 1.21.0, Twisted 22.10.0, Python 3.11.4 | packaged by Anaconda, Inc. | (main, Jul  5 2023, 13:38:37) [MSC v.1916 64 bit (AMD64)], pyOpenSSL 23.2.0 (OpenSSL 1.1.1u  30 May 2023), cryptography 41.0.2, Platform Windows-10-10.0.22631-SP0
2024-07-15 19:36:36 [scrapy.crawler] INFO: Overridden settings:
{}


See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation.
  return cls(crawler)

2024-07-15 19:36:36 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2024-07-15 19:36:36 [scrapy.extensions.telnet] INFO: Telnet Password: ced1dbab9fed37bb
2024-07-15 19:36:36 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',

2024-07-15 19:36:37 [scrapy.core.scraper] DEBUG: Scraped from <200 https://quotes.toscrape.com/page/1/>
{'text': 'Try not to become a man of success. Rather become a man of value.', 'author': 'Albert Einstein'}
2024-07-15 19:36:37 [scrapy.core.scraper] DEBUG: Scraped from <200 https://quotes.toscrape.com/page/1/>
{'text': 'It is better to be hated for what you are than to be loved for what you are not.', 'author': 'André Gide'}
2024-07-15 19:36:37 [scrapy.core.scraper] DEBUG: Scraped from <200 https://quotes.toscrape.com/page/1/>
{'text': "I have not failed. I've just found 10,000 ways that won't work.", 'author': 'Thomas A. Edison'}
2024-07-15 19:36:37 [scrapy.core.scraper] DEBUG: Scraped from <200 https://quotes.toscrape.com/page/1/>
{'text': "A woman is like a tea bag; you never know how strong it is until it's in hot water.", 'author': 'Eleanor Roosevelt'}
2024-07-15 19:36:37 [scrapy.core.scraper] DEBUG: Scraped from <200 https://quotes.toscrape.com/page/1/>
{'text': 'A day without

{'text': 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.', 'author': 'Albert Einstein'}
{'text': 'It is our choices, Harry, that show what we truly are, far more than our abilities.', 'author': 'J.K. Rowling'}
{'text': 'There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.', 'author': 'Albert Einstein'}
{'text': 'The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.', 'author': 'Jane Austen'}
{'text': "Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.", 'author': 'Marilyn Monroe'}
{'text': "This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember,