# Eurostast scrapping

## Imports

In [1]:
# to install
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.crawler import Crawler
from scrapy.settings import Settings

from twisted.internet import reactor

from bs4 import BeautifulSoup

# buit in
import os 
import re
import logging
import sys
import pyodbc
import hashlib
import pandas as pd
from datetime import datetime

## Database Connection

In [2]:
# UID and PWD to personnalize
c = pyodbc.connect('DSN=VirtuosoKapcode;DBA=ESTAT;UID=XXXX;PWD=XXXXXXXXXXXX')
cursor = c.cursor()

## Items definition

Concept Item Definition

In [3]:
class Concept(scrapy.Item):
    id = scrapy.Field()
    url = scrapy.Field()
    title = scrapy.Field()
    definition = scrapy.Field()
    further_info = scrapy.Field()
    related_concepts = scrapy.Field()
    statistical_data = scrapy.Field()
    sources = scrapy.Field()
    categories = scrapy.Field()
    redirection = scrapy.Field()
    original_title = scrapy.Field()
    last_update = scrapy.Field()


Article Item Definition

In [None]:
class Article(scrapy.Item):
    url = scrapy.Field()
    title = scrapy.Field()
    abstract = scrapy.Field()
    full_article = scrapy.Field()
    alerts = scrapy.Field()
    categories = scrapy.Field()
    context = scrapy.Field()
    data_sources = scrapy.Field()
    excel = scrapy.Field()
    last_update = scrapy.Field()
    other_articles = scrapy.Field()
    tables = scrapy.Field()
    database = scrapy.Field()
    dedicated_section = scrapy.Field()
    publications = scrapy.Field()
    methodology = scrapy.Field()
    legislation = scrapy.Field()
    visualisations = scrapy.Field()
    external_links = scrapy.Field()


Paragraph Item Definition

In [None]:
class Paragraph(scrapy.Item):
    title = scrapy.Field()
    content = scrapy.Field()
    figures = scrapy.Field()


Basic sub category info

In [4]:
class LinkInfo(scrapy.Item):
    title = scrapy.Field()
    url = scrapy.Field()


## Pandas dataframe definition

In [5]:
# concepts
conceptsDf = pd.DataFrame(columns=['id', 'url', 'title', 'definition',
                                   'further_info', 'related_concepts',
                                   'statistical_data', 'sources',
                                   'redirection', 'original_title',
                                   'categories', 'last_update'])

# articles
articlesDf = pd.DataFrame(columns=['url', 'title', 'abstract', 'alerts',
                                   'full_article', 'excel', 'data_sources',
                                   'context', 'categories', 'last_update',
                                   'other_articles', 'tables', 'database',
                                   'dedicated_section', 'publications',
                                   'methodology', 'legislation', 'visualisations',
                                   'external_links'])


## Function definition

Text normalization (\n \t \r)

In [6]:
def normalize(txt):
    txt = txt.replace('\r', '')
    txt = txt.replace('\n', '')
    txt = txt.replace('\t', '')
    txt = txt.replace('\xa0', ' ')
    txt = txt.replace('  ', ' ')
    return txt


## SQL request

In [7]:
# select eurostat doc id
estatLinkSelectId = "SELECT id FROM dat_link_info WHERE url = ? AND eurostat = 1"

# select foreign doc id
foreignLinkSelectId = "SELECT id FROM dat_link_info WHERE url = ? AND eurostat = 0"

# insert eurostat doc
estatLinkInsert = "INSERT INTO dat_link_info(title, url, eurostat) VALUES (?, ?, 1)"

# insert doc from outside eurostat
foreignLinkInsert = "INSERT INTO dat_link_info(title, url, eurostat) VALUES (?, ?, 0)"

# select glossary element
conceptSelect = "SELECT * FROM dat_concept WHERE id = ?"

# insert glossary element
conceptFullInsert = "INSERT INTO dat_concept(id, definition, last_update, homepage, redirection) VALUES (?, ?, ?, 0, 0)"

# insert glossary element
conceptInsert = "INSERT INTO dat_concept(id, definition, homepage, redirection) VALUES (?, ?, 0, 0)"

# check if couple exists in dat_further_info
furtherInfoCheck = "SELECT id FROM dat_further_info WHERE concept_id = ? AND link_id =?"

# insert a further_info link
furtherInfoInsert = "INSERT INTO dat_further_info(concept_id, link_id) VALUES (?, ?)"

# check if couple exists in dat_related_concepts
relCptCheck = "SELECT id FROM dat_related_concepts WHERE concept_id = ? AND link_id =?"

# insert a concept link
relCptInsert = "INSERT INTO dat_related_concepts(concept_id, link_id) VALUES (?, ?)"

# check if couple exists in dat_statistical_data
statDataCheck = "SELECT id FROM dat_statistical_data WHERE concept_id = ? AND link_id =?"

# insert a statistical data link
statDataInsert = "INSERT INTO dat_statistical_data(concept_id, link_id) VALUES (?, ?)"

# check if couple exists in dat_sources
sourceCheck = "SELECT id FROM dat_sources WHERE concept_id = ? AND link_id =?"

# insert a source link
sourceInsert = "INSERT INTO dat_sources(concept_id, link_id) VALUES (?, ?)"


## Spiders

Glossary spider

In [8]:
class glossarySpider(scrapy.Spider):
    name = "glossary"

    custom_settings = {
        # limit the logs
        'LOG_LEVEL': logging.WARNING,
        # exports
        'FEEDS': {
            'glossary.json': {
                'format': 'json',
                'encoding': 'utf8',
                'fields': None,
                'indent': 4,
                'item_export_kwargs': {
                    'export_empty_fields': False
                }
            },
            'glossary.csv': {
                'format': 'csv',
                'encoding': 'utf8',
                'item_export_kwargs': {
                    'include_headers_line': True,
                    'delimiter': '#'
                }
            }
        }
    }

    start_urls = ['https://ec.europa.eu/eurostat/statistics-explained' +
                  '/index.php?title=Category:Glossary']

    def parse(self, response):
        # Gather the links on the page
        # starting with the start_urls link
        for page in response.css('#mw-pages').css('.mw-content-ltr'):
            for link in page.css('a ::attr(href)'):
                cptLink = 'https://ec.europa.eu' + link.extract()
                yield scrapy.Request(url=cptLink, callback=self.parse_concept)

        # Check if there is another page
        # if so re-launch the parse function
        # with nextPage url as start_urls
        nextPage = response.xpath("//a[contains(.//text(), 'next 200')]" +
                                  "/@href").get()
        if nextPage is not None:
            nextPage = response.urljoin('https://ec.europa.eu' + nextPage)
            yield scrapy.Request(nextPage, callback=self.parse)

    def parse_concept(self, response):

        # html page
        pageContent = BeautifulSoup(response.css('#mw-content-text').get())

        # split around the part titles (ex: Related concepts , etc.)
        # list of strings (html)
        splitContent = re.split('<h2>|</h2>', pageContent.prettify())

        titleRaw = normalize(response.css('#firstHeading::text').get())
        definitionRaw = BeautifulSoup(splitContent[0])
        redirected = response.xpath("//div[@id = 'contentSub']" +
                                    "[text()[contains(.,'Redirected')]]" +
                                    "/a/text()").get()

        concept = Concept()
        concept['url'] = response.request.url
        # check if already exists in DB
        cursor.execute(estatLinkSelectId, concept['url'])
        c.commit()
        row = cursor.fetchone()
        if row is None:

            concept['title'] = titleRaw.replace('Glossary:', '')
            # check if there was a redirection
            if redirected is not None:
                concept['original_title'] = redirected.replace('Glossary:', '')

            if concept['title'] is None:
                concept['title'] = 'ERROR'

            cursor.execute(estatLinkInsert, concept['title'], concept['url'])
            c.commit()
            # get id
            cursor.execute(estatLinkSelectId, concept['url'])
            c.commit()
            row = cursor.fetchone()
            concept['id'] = row.id
        else:
            concept['id'] = row.id

        # last update
        updateStrRaw = response.xpath('//div[@id="footer"]' +
                                      '//li[@id="lastmod"]/text()').get()
        if updateStrRaw is not None:
            dateFormat = "%d %B %Y, at %H:%M."
            updateStr = re.split('modified on ', normalize(updateStrRaw))
            update = datetime.strptime(updateStr[-1], dateFormat)
            concept['last_update'] = datetime.timestamp(update)

        # check if already in DB
        cursor.execute(conceptSelect, concept['id'])
        c.commit()
        row = cursor.fetchone()
        if row is None:

            concept['definition'] = normalize(definitionRaw.get_text())

            if updateStrRaw is not None:
                cursor.execute(conceptFullInsert,
                               concept['id'], concept['definition'],
                               concept['last_update'])
            else:
                cursor.execute(conceptInsert,
                               concept['id'], concept['definition'])
            c.commit()

            concept['further_info'] = []
            concept['related_concepts'] = []
            concept['statistical_data'] = []
            concept['sources'] = []

            # to identify which sub-categories are in the page
            titlesList = pageContent.find_all('h2')

            # go through each sub-category to assign the right data
            # to the right category
            # if a new/undetected sub-category has to be added,
            # add an elif paragraph
            for i in range(len(titlesList)):
                # index to gather the right info from splitContent
                a = 2*i + 2
                titleTemp = normalize(titlesList[i].get_text())
                if 'Further information' in titleTemp:
                    for elmt in BeautifulSoup(splitContent[a]).find_all('a'):
                        furtherInfo = LinkInfo()
                        furtherInfo['title'] = normalize(elmt.get_text())
                        url = elmt.get('href')
                        if url.startswith('/eurostat'):
                            furtherInfo['url'] = 'https://ec.europa.eu' + url
                        else:
                            furtherInfo['url'] = url
                        # select, check if in Link Info
                        if 'eurostat' in furtherInfo['url']:
                            cursor.execute(estatLinkSelectId,
                                           furtherInfo['url'])
                            c.commit()
                            row = cursor.fetchone()

                            if row is None:
                                # add a document
                                cursor.execute(estatLinkInsert,
                                               furtherInfo['title'],
                                               furtherInfo['url'])
                                c.commit()
                                # get id
                                cursor.execute(estatLinkSelectId,
                                               furtherInfo['url'])
                                c.commit()
                                row = cursor.fetchone()
                                # add a link between the concept and the doc
                                cursor.execute(furtherInfoInsert,
                                               concept['id'], row.id)
                                c.commit()
                            else:
                                idLink = row.id
                                cursor.execute(furtherInfoCheck,
                                               concept['id'], idLink)
                                c.commit()
                                row = cursor.fetchone()
                                if row is None:
                                    # add link between the concept and the doc
                                    cursor.execute(furtherInfoInsert,
                                                   concept['id'], idLink)

                        else:
                            cursor.execute(foreignLinkSelectId,
                                           furtherInfo['url'])
                            c.commit()
                            row = cursor.fetchone()

                            if row is None:
                                # add a document
                                cursor.execute(foreignLinkInsert,
                                               furtherInfo['title'],
                                               furtherInfo['url'])
                                c.commit()
                                # get id
                                cursor.execute(foreignLinkSelectId,
                                               furtherInfo['url'])
                                c.commit()
                                row = cursor.fetchone()
                                # add a link between the concept and the doc
                                cursor.execute(furtherInfoInsert,
                                               concept['id'], row.id)
                                c.commit()
                            else:
                                cursor.execute(furtherInfoCheck,
                                               concept['id'], row.id)
                                c.commit()
                                row = cursor.fetchone()
                                if row is None:
                                    # add link between the concept and the doc
                                    cursor.execute(furtherInfoInsert,
                                                   concept['id'], idLink)

                        concept['further_info'].append(furtherInfo)

                elif 'Related concepts' in titleTemp:
                    for elmt in BeautifulSoup(splitContent[a]).find_all('a'):
                        relCpt = LinkInfo()
                        relCpt['title'] = normalize(elmt.get_text())
                        relCpt['url'] = 'https://ec.europa.eu' +
                        elmt.get('href')
                        # check if the doc already is in the DB
                        cursor.execute(estatLinkSelectId, relCpt['url'])
                        c.commit()
                        row = cursor.fetchone()

                        if row is None:
                            # add a document
                            cursor.execute(estatLinkInsert,
                                           relCpt['title'], relCpt['url'])
                            c.commit()
                            # get id
                            cursor.execute(estatLinkSelectId, relCpt['url'])
                            c.commit()
                            row = cursor.fetchone()
                            # add a link between the concept and the doc
                            cursor.execute(relCptInsert, concept['id'], row.id)
                            c.commit()
                        else:
                            idLink = row.id
                            cursor.execute(relCptCheck, concept['id'], idLink)
                            c.commit()
                            row = cursor.fetchone()
                            if row is None:
                                # add a link between the concept and the doc
                                cursor.execute(relCptInsert,
                                               concept['id'], idLink)
                        concept['related_concepts'].append(relCpt)

                elif 'Statistical data' in titleTemp:
                    for elmt in BeautifulSoup(splitContent[a]).find_all('a'):
                        statData = LinkInfo()
                        statData['title'] = elmt.get('title')
                        statData['url'] = 'https://ec.europa.eu' +
                        elmt.get('href')
                        # check if the doc already is in the DB
                        cursor.execute(estatLinkSelectId, statData['url'])
                        c.commit()
                        row = cursor.fetchone()

                        if row is None:
                            # add a document
                            cursor.execute(estatLinkInsert,
                                           statData['title'], statData['url'])
                            c.commit()
                            # get id
                            cursor.execute(estatLinkSelectId, statData['url'])
                            c.commit()
                            row = cursor.fetchone()
                            # add a link between the concept and the doc
                            cursor.execute(statDataInsert,
                                           concept['id'], row.id)
                            c.commit()
                        else:
                            idLink = row.id
                            cursor.execute(statDataCheck,
                                           concept['id'], idLink)
                            c.commit()
                            row = cursor.fetchone()
                            if row is None:
                                # add link between the concept and the doc
                                cursor.execute(statDataInsert,
                                               concept['id'], idLink)
                        concept['statistical_data'].append(statData)

                elif 'Source' in titleTemp:
                    for elmt in BeautifulSoup(splitContent[a]).find_all('a'):
                        source = LinkInfo()
                        source['title'] = normalize(elmt.get_text())
                        url = elmt.get('href')
                        if url.startswith('/eurostat'):
                            source['url'] = 'https://ec.europa.eu' + url
                        else:
                            source['url'] = url
                        # select, check if in Link Info
                        if 'eurostat' in source['url']:
                            cursor.execute(estatLinkSelectId, source['url'])
                            c.commit()
                            row = cursor.fetchone()

                            if row is None:
                                # add a document
                                cursor.execute(estatLinkInsert,
                                               source['title'], source['url'])
                                c.commit()
                                # get id
                                cursor.execute(estatLinkSelectId,
                                               source['url'])
                                c.commit()
                                row = cursor.fetchone()
                                # add a link between the concept and the doc
                                cursor.execute(sourceInsert,
                                               concept['id'], row.id)
                                c.commit()
                            else:
                                idLink = row.id
                                cursor.execute(sourceCheck,
                                               concept['id'], idLink)
                                c.commit()
                                row = cursor.fetchone()
                                if row is None:
                                    # add link between the concept and the doc
                                    cursor.execute(sourceInsert,
                                                   concept['id'], idLink)

                        else:
                            cursor.execute(foreignLinkSelectId,
                                           source['url'])
                            c.commit()
                            row = cursor.fetchone()

                            if row is None:
                                # add a document
                                cursor.execute(foreignLinkInsert,
                                               source['title'], source['url'])
                                c.commit()
                                # get id
                                cursor.execute(foreignLinkSelectId,
                                               source['url'])
                                c.commit()
                                row = cursor.fetchone()
                                # add a link between the concept and the doc
                                cursor.execute(sourceInsert,
                                               concept['id'], row.id)
                                c.commit()
                            else:
                                cursor.execute(sourceCheck,
                                               concept['id'], row.id)
                                c.commit()
                                row = cursor.fetchone()
                                if row is None:
                                    # add link between the concept and the doc
                                    cursor.execute(sourceInsert,
                                                   concept['id'], idLink)
                        concept['sources'].append(source)

            categories = response.xpath('//div[@id="mw-normal-catlinks"]' +
                                        '/ul/li/a/text()').getall()

            concept['categories'] = categories
        # elif row.last_update == concept['last_update']:
            # To complete in order to update the DB

        # add record to pd dataframe
        pos = len(conceptsDf)
        keys = list(concept.keys())

        for key in keys:
            conceptsDf.loc[pos, key] = concept[key]

        yield concept


Articles spider

In [None]:
class articlesSpider(scrapy.Spider):
    name = 'articles'

    custom_settings = {
        # limit the logs
        'LOG_LEVEL': logging.WARNING,
        # exports
        'FEEDS': {
            'articles.json': {
                'format': 'json',
                'encoding': 'utf8',
                'fields': None,
                'indent': 4,
                'item_export_kwargs': {
                    'export_empty_fields': False
                }
            },
            'articles.csv': {
                'format': 'csv',
                'encoding': 'utf8',
                'item_export_kwargs': {
                    'include_headers_line': True,
                    'delimiter': '#'
                }
            }
        }
    }

    start_urls = ['https://ec.europa.eu/eurostat/statistics-explained' +
                  '/index.php?title=Category:Statistical_article']

    # go through all the articles
    def parse(self, response):
        # Gather the links on the page
        # starting with the start_urls link
        for page in response.css('#mw-pages').css('.mw-content-ltr'):
            for link in page.css('a ::attr(href)'):
                artLink = 'https://ec.europa.eu' + link.extract()
                yield scrapy.Request(url=artLink, callback=self.parse_article)

        # Check if there is another page
        # if so re-launch the parse function
        # with next_page url as start_urls
        nextPage = response.xpath("//a[contains(.//text(), 'next 200')]" +
                                   "/@href").get()
        if nextPage is not None:
            nextPage = response.urljoin('https://ec.europa.eu' + nextPage)
            yield scrapy.Request(nextPage, callback=self.parse)

    # get the information from one article
    def parse_article(self, response):

        article = Article()

        # abstract
        abstractRaw = response.xpath('//div[@class="col-lg-12 se-content"]' +
                                     '/p/descendant-or-self::*' +
                                     '/text()').getall()
        if len(abstractRaw) == 0:
            abstractRaw = response.xpath('//div[@id="mw-content-text"]' +
                                         '/p/descendant-or-self::*' +
                                         '/text()').getall()
            print('############################################')
            print(abstractRaw)
            print('############################################')
            
        if len(abstractRaw) == 0:
            print('*******************************************')
            print(BeautifulSoup(response.xpath('//div[@id="mw-content-text"]').get()))
            print(response.request.url)
            print('*********************************************')
            abstractRaw = response.xpath('//div[@class="col-lg-12 se-content"]' +
                                         'div/following-sibling::text()').getall()
            print(abstractRaw)
            
        abstract = ''
        for paragraph in abstractRaw:
            abstract = abstract + normalize(paragraph) + ' '

        if abstract == '' or abstract == ' ':
            print('là')
            print('----------------------------------------------------------------------')
            


        # full article
        fullArtRaw = response.xpath('//div[@class="panel-body-content"]' +
                                    '/div[@class="content-section"]').getall()
        fullArticle = []

        for seg in fullArtRaw:
            seg = BeautifulSoup(seg)
            articleParagraph = Paragraph()
            titles = seg.find_all('span', {'class': 'mw-headline'})

            if len(titles) > 0:

                titleTag = '<h2>|</h2>|<h3>|</h3>|<h4>|</h4>'
                splitContent = re.split(titleTag, seg.prettify())
                for i in range(len(titles)):
                    # title
                    title = titles[i].get_text()

                    text = BeautifulSoup(splitContent[2*i + 2])
                    # gather the text of each paragraph
                    contentRaw = text.find_all(['p', 'ul'])
                    content = ''
                    for part in contentRaw:
                        content = content + normalize(part.get_text()) + ' '

                    # figures
                    figures = text.find_all('div', {'class': 'thumbcaption'})

                    # assign the results to the right element
                    if title == 'Context':
                        article['context'] = content
                    elif title == 'Data Sources' or title == 'Data sources':
                        article['data_sources'] = content
                    else:
                        articleParagraph['title'] = title
                        articleParagraph['content'] = content

                        # figures
                        if (figures is not None) and (len(figures) != 0):
                            articleParagraph['figures'] = []
                            for fig in figures:
                                caption = re.split('<i>|</i>', fig.prettify())
                                figTemp = LinkInfo()

                                figTitle = BeautifulSoup(caption[0]).get_text()
                                figTemp['title'] = normalize(figTitle)

                                urls = BeautifulSoup(caption[-1]).find_all('a')
                                figTemp['url'] = []
                                for url in urls:
                                    figTemp['url'] = url.get('href')
                                    articleParagraph['figures'].append(figTemp)

                    if 'title' in articleParagraph:
                        fullArticle.append(articleParagraph)

            # context
            if 'context' not in article:
                contextRaw = response.xpath('//div[@id="content-context"]' +
                                            '/p/descendant-or-self::*' +
                                            '/text()').getall()
                ctxt = ''
                for part in contextRaw:
                    ctxt = ctxt + normalize(part) + ' '
                article['context'] = ctxt

            # data sources
            if 'data_sources' not in article:
                dataSourcesRaw = response.xpath('//div[@id="data-details"]' +
                                                '/p/descendant-or-self::*' +
                                                '/text()').getall()
                dataSources = ''
                for part in dataSourcesRaw:
                    dataSources = dataSources + normalize(part) + ' '
                article['data_sources'] = dataSources

            # excel
            excelRaw = response.xpath('//div[@id="content-excel"]').get()
            if excelRaw is not None:
                excelTab = BeautifulSoup(excelRaw).find_all('a')
                article['excel'] = []
                for a in excelTab:
                    linkTemp = LinkInfo()
                    linkTemp['title'] = a.get('title')
                    linkTemp['url'] = a.get('href')
                    article['excel'].append(linkTemp)
        # alerts
        alertsRaw = response.xpath('//div[@class="content"]' +
                                   '//div[@class="alert alert-th3"]').getall()
        if alertsRaw is not None:
            alerts = []
            for alertRaw in alertsRaw:
                alertTab = BeautifulSoup(alertRaw).find_all('p')
                alertTemp = Paragraph()
                alertTemp['title'] = normalize(alertTab[0].get_text())
                alertTxt = ''
                for p in alertTab[1:]:
                    alertTxt = alertTxt + normalize(p.get_text()) + ' '
                alertTemp['content'] = alertTxt

                alerts.append(alertTemp)

        categories = response.xpath('//div[@id="mw-normal-catlinks"]' +
                                    '/ul/li/a/text()').getall()

        article['url'] = response.request.url
        article['title'] = normalize(response.css('#firstHeading::text').get())
        article['abstract'] = normalize(abstract)
        article['full_article'] = fullArticle
        article['alerts'] = alerts
        article['categories'] = categories
        
        # last update
        updateStrRaw = response.xpath('//div[@id="footer"]' +
                                      '//li[@id="lastmod"]/text()').get()
        if updateStrRaw is not None:
            dateFormat = "%d %B %Y, at %H:%M."
            updateStr = re.split('modified on ', normalize(updateStrRaw))
            update = datetime.strptime(updateStr[-1], dateFormat)
            article['last_update'] = update

        # direct access
        directAccess = response.xpath('//div[@class="dat-section"]').getall()

        for elmnt in directAccess:
            elmntBs = BeautifulSoup(elmnt)
            tabLinks = elmntBs.find_all('a')
            linkslist = []
            for a in tabLinks:
                linkTemp = LinkInfo()
                linkTemp['title'] = normalize(a.get_text())
                linkTemp['url'] = a.get('href')
                linkslist.append(linkTemp)

            sectionTitle = elmntBs.find('div').get('id')

            if sectionTitle == 'seealso':
                article['other_articles'] = linkslist
            elif sectionTitle == 'maintables':
                article['tables'] = linkslist
            elif sectionTitle == 'database':
                article['database'] = linkslist
            elif sectionTitle == 'dedicatedsection':
                article['dedicated_section'] = linkslist
            elif sectionTitle == 'publications':
                article['publications'] = linkslist
            elif sectionTitle == 'methodology':
                article['methodology'] = linkslist
            elif sectionTitle == 'legal':
                article['legislation'] = linkslist
            elif sectionTitle == 'visualisation':
                article['visualisations'] = linkslist
            elif sectionTitle == 'externallinks':
                article['external_links'] = linkslist

        # add record to pd dataframe
        pos = len(articlesDf)
        keys = list(article.keys())
        for key in keys:
            articlesDf.loc[pos, key] = article[key]

        yield article


## Launch process

In [9]:
# Specific to Jupyter
process = CrawlerProcess({
    'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
})
#process.crawl(articlesSpider)
process.crawl(glossarySpider)
process.start()


2021-03-19 14:03:34 [scrapy.utils.log] INFO: Scrapy 2.4.1 started (bot: scrapybot)
2021-03-19 14:03:34 [scrapy.utils.log] INFO: Versions: lxml 4.5.2.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.3 (default, Jul  2 2020, 17:30:36) [MSC v.1916 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 2.9.2, Platform Windows-10-10.0.19041-SP0
2021-03-19 14:03:34 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2021-03-19 14:03:34 [scrapy.crawler] INFO: Overridden settings:
{'LOG_LEVEL': 30,
 'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'}
2021-03-19 14:04:26 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Car_theft> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\sit

2021-03-19 14:07:33 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:EFTA> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=District+heating)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\s

2021-03-19 14:07:47 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Economic_size> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=District+heating)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anacon

2021-03-19 14:08:05 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:EC> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=District+heating)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\sit

2021-03-19 14:10:59 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Contaminant> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=CC)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-pac

2021-03-19 14:12:17 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:CIS_(2)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=CC)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-package

2021-03-19 14:13:17 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Business_and_consumer_confidence> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib

2021-03-19 14:13:32 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:BRIC> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\s

2021-03-19 14:14:28 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Avoidable_mortality> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-package

2021-03-19 14:15:10 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Asia-Europe_Meeting_(ASEM)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-

2021-03-19 14:15:27 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Andean_Pact> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy

2021-03-19 14:15:49 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:African_Union_(AU)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-packages

2021-03-19 14:17:00 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:European_Patent_Convention_(EPC)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=District+heating)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:

2021-03-19 14:17:03 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:European_Monetary_System_(EMS)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=District+heating)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\P

2021-03-19 14:17:13 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:European_Research_Area_(ERA)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=European+rail+traffic+management+system+%28ERTMS%29%0AEuropean+rail+traffic+management+system+%28ERTMS%29)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddl

2021-03-19 14:19:07 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Mineral_wastes_from_waste_treatment> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Intra-EU+flow)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:

2021-03-19 14:20:06 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:SAPM> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Population+growth)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\

2021-03-19 14:20:41 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Treaty_establishing_the_European_Economic_Community> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x i

2021-03-19 14:20:52 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Yield> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib

2021-03-19 14:21:38 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:VAT> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\s

2021-03-19 14:22:11 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:UNESCO> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\li

2021-03-19 14:22:20 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:UNCTAD> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\li

2021-03-19 14:22:34 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Treaty_on_European_Union> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\Progra

2021-03-19 14:22:36 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Treaty_establishing_the_European_Atomic_Energy_Community> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    fo

2021-03-19 14:22:57 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Toxic_chemicals> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Tourism%0ATourism)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Ana

2021-03-19 14:23:22 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:TFEU> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=SDR)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-packages\

2021-03-19 14:24:17 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:STS> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=SDR)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\site-packages\s

2021-03-19 14:24:38 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Standard_output_(SO)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=SDR)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\li

2021-03-19 14:25:33 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Short-term_business_statistics_(STS)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=SDR)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramD

2021-03-19 14:25:45 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Sexual_violence> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=SDR)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\sit

2021-03-19 14:26:06 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Secondary_waste> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=SDR)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\sit

2021-03-19 14:27:29 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:REACH> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Population+growth)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib

2021-03-19 14:28:35 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:PRODCOM_list> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Population+growth)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anacon

2021-03-19 14:29:08 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Pre-primary_education> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Population+growth)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramDa

2021-03-19 14:30:24 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:OPEC> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Morbidity+rate)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\sit

2021-03-19 14:30:53 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:NST_2007> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Morbidity+rate)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib

2021-03-19 14:32:06 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:National_Center_for_Scientific_Research> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Morbidity+rate)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  Fil

2021-03-19 14:32:19 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Murder> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Morbidity+rate)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\s

2021-03-19 14:33:02 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:MBT_plant> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Intra-EU+flow)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib

2021-03-19 14:33:38 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Lower_middle_income_countries> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Intra-EU+flow)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\Progr

2021-03-19 14:33:54 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Livestock_survey> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Intra-EU+flow)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anacon

2021-03-19 14:34:13 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Lisbon_objectives> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Intra-EU+flow)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaco

2021-03-19 14:34:28 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Least_developed_countries> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Intra-EU+flow)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramDa

2021-03-19 14:35:17 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Judges> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Intra-EU+flow)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Anaconda3\lib\si

2021-03-19 14:35:42 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Internal_market> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Gross+electricity+consumption)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\Pro

2021-03-19 14:36:05 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Insecticide> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Gross+electricity+consumption)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\Program

2021-03-19 14:36:55 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:IMF> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Gross+electricity+consumption)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\ProgramData\Ana

2021-03-19 14:37:37 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Home_burglary> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Gross+electricity+consumption)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:\Progr

2021-03-19 14:38:08 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Hazardous_chemicals> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=Gross+electricity+consumption)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "C:

2021-03-19 14:39:13 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:GISCO> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=European+rail+traffic+management+system+%28ERTMS%29%0AEuropean+rail+traffic+management+system+%28ERTMS%29)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", lin

2021-03-19 14:39:41 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:G20> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=European+rail+traffic+management+system+%28ERTMS%29%0AEuropean+rail+traffic+management+system+%28ERTMS%29)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 

2021-03-19 14:39:43 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:GATS> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=European+rail+traffic+management+system+%28ERTMS%29%0AEuropean+rail+traffic+management+system+%28ERTMS%29)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line

2021-03-19 14:40:34 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Fish_catch> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=European+rail+traffic+management+system+%28ERTMS%29%0AEuropean+rail+traffic+management+system+%28ERTMS%29)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py"

2021-03-19 14:41:37 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Exchange_rate_mechanism_(ERM)> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=European+rail+traffic+management+system+%28ERTMS%29%0AEuropean+rail+traffic+management+system+%28ERTMS%29)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spidermidd

2021-03-19 14:41:50 [scrapy.core.scraper] ERROR: Spider error processing <GET https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:European_System_of_Central_Banks> (referer: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Category:Glossary&pagefrom=European+rail+traffic+management+system+%28ERTMS%29%0AEuropean+rail+traffic+management+system+%28ERTMS%29)
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\spiderm

## Saving df to excel

In [None]:
# won't work if the xlsx file is open
conceptsDf.to_excel('glossary.xlsx')
#articlesDf.to_excel('articles.xlsx')
