# Crawl diputades

In [9]:
import logging
import json
import time
from pathlib import Path
import gzip
from multiprocessing.pool import ThreadPool

from sqlalchemy import Column, Integer, String, ForeignKey, Boolean, UniqueConstraint, Text, Index
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
from sqlalchemy import update
from pathlib import Path
from sqlalchemy import create_engine

import requests
from selenium import webdriver
import seleniumrequests
#from seleniumrequests import Chrome
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from sqlalchemy import and_, or_
from sqlalchemy import update
from sqlalchemy import distinct, inspect
from json_stream import streamable_list

import pypdfium2 as pdfium
from bs4 import BeautifulSoup

## Database creation

In [14]:
Base = declarative_base()

class Document(Base):
    __tablename__ = 'document'
    
    pdf_url = Column(String, primary_key=True)  
    cve = Column(String)
    fecha = Column(String)
    fecha_mensaje = Column(String)
    mensaje = Column(String)
    ndia = Column(String)
    numdoc = Column(String)
    orga = Column(String)
    seri = Column(String)
    subi = Column(String)
    texto = Column(Text)
    secc = Column(String)
    legislatura = Column(String, ForeignKey('term.term'))
    desu = Column(String)
    desu1 = Column(String)
    desu2 = Column(String)
    
    term = relationship("Term", back_populates="documents")
    interventions = relationship("Intervention", back_populates="document")


class Term(Base):
    __tablename__ = 'term'
    term = Column(String, primary_key=True) 
    term_id = Column(Integer)  
    president = Column(String)
    init_date = Column(String)
    finish_date = Column(String)
    
    diputades = relationship("Diputades", back_populates="term")
    documents = relationship("Document", back_populates="term")


class Diputades(Base):
    __tablename__ = 'diputades'
    id = Column(Integer, primary_key=True) # ID únic per a cada orador
    apellidos = Column(String)
    formacion = Column(String)
    apellidosNombre = Column(String)
    fchBaja = Column(String)
    genero = Column(Integer)
    fchAlta = Column(String)
    idLegislatura = Column(String, ForeignKey('term.term'))
    grupo = Column(String)
    idCircunscripcion = Column(Integer)
    nombreCircunscripcion = Column(String)
    nombre = Column(String)
    codParlamentario = Column(Integer)
    
    charge = Column(String)

    term = relationship("Term", back_populates="diputades")
    interventions = relationship("Intervention", back_populates="speaker")



class Intervention(Base):
    __tablename__ = 'intervention'
    
    id = Column(String, primary_key=True)
    speaker_id = Column(Integer, ForeignKey('diputades.id'))
    text = Column(String)
    document_id = Column(String, ForeignKey('document.pdf_url'))
    fecha = Column(String)
    num_int = Column(Integer)

    document = relationship("Document", back_populates="interventions")
    speaker = relationship("Diputades", back_populates="interventions")



def create_database(engine):
    Base.metadata.create_all(engine)

## Class to crawl diputades

In [27]:
class DiputadesGenerator:
    def __init__(self, outer, id_legislatura):
        self.id_legislatura = id_legislatura
        self.outer = outer

    def __iter__(self):
        diputades_data = self.crawl_diputades_list(self.id_legislatura)
        print("Raw response:", diputades_data)  # Depuració
        diputades_list = diputades_data.get("data", [])  # Accedir correctament a la llista de diputats
        print(f"Number of diputades extracted: {len(diputades_list)}")

        for diputada in diputades_list:
            diputada["id_legislatura"] = self.id_legislatura
            yield diputada

    def crawl_diputades_list(self, id_legislatura):
        url = ("https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=searchDiputados&p_p_cacheability=cacheLevelPage_diputadomodule_idLegislatura={id_legislatura}&_diputadomodule_genero=0&_diputadomodule_grupo=all&_diputadomodule_tipo=0&_diputadomodule_nombre=&_diputadomodule_apellidos=&_diputadomodule_formacion=all&_diputadomodule_filtroProvincias=%5B%5D&_diputadomodule_nombreCircunscripcion="
        )

        self.outer.driver.get(url)
        e = self.outer.driver.find_element(By.TAG_NAME, 'body')
        data = e.text.replace("\n", " ")
        print("Response text from Congreso:", data[:500])

        try:
            return json.loads(data)
        
        except json.JSONDecodeError:
            print("Error: La resposta no és JSON vàlid. Mostrant dades rebudes:")
            print(data[:500])  # Mostra només els primers 500 caràcters per depurar
            return {"data": []}  # Retornem una estructura buida per evitar errors


## Class to crawl

In [28]:
class CongresoCrawler:
    def __init__(self, db: str):
        self.engine = None
        self.session = None
        self.browser = None
        self.base_url = "https://www.congreso.es"
        self.base_dir = "doc"
        self.db = db

    def create_db(self):
        self.engine = create_engine(self.db, echo=True, future=True)
        create_database(self.engine)

    def connect_db(self, echo=True):
        self.engine = create_engine(self.db, echo=echo, future=True)
        session_class = sessionmaker(bind=self.engine)
        self.session = session_class()

    def crawl(self, echo=True):
        self.connect_db(echo)
        self.create_driver() #calling other functions
        self.click_accept_cookies()
        self.get_terms()
        terms = {term.term: term for term in self.session.query(Term).all()}
        sorted_term_ids = sorted(terms.keys(), reverse=False)
        for term_id in sorted_term_ids:
            self.crawl_term(terms[term_id])
        logging.info("Finished crawling Congreso de los Diputados")

    def create_driver(self):
        options = Options()
        #options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
        
    def click_accept_cookies(self):
        self.driver.get(self.base_url)
        # Accept cookies
        continue_link = self.driver.find_element(By.LINK_TEXT, 'Aceptar todas')
        continue_link.click()
        
    def get_terms(self):
        self.driver.get("https://www.congreso.es/es/busqueda-de-diputados")
        e = self.driver.find_element(By.XPATH,'//*[@id="_diputadomodule_legislatura"]') #find XPATH containing publicaciones...
        for term_option in e.find_elements(By.TAG_NAME, 'option'): 
            term_id = int(term_option.get_attribute("value"))  # Integer ID
            term_name = term_option.text.split(" ")[0]  # Extract Roman numeral
            t = Term(term_id=term_id, term=term_name)
            self.session.merge(t)  # Merge to avoid duplicates
        self.session.commit()
        logging.info("Terms committed to DB")
        
    def crawl_term(self, term):
        logging.info(f"Starting crawling term {term.term}")
        it = DiputadesGenerator(self, term.term_id)
        for i, diputada_dict in enumerate(it):
            d = Diputades(**diputada_dict)
            print("Inserting into DB:", diputada_dict)
            self.session.add(d)
            if i % 10 == 0:
                self.session.commit()
        self.session.commit()
        logging.info(f"Finished crawling term {term.term}")
        

In [29]:
logging.basicConfig(level=logging.DEBUG)

# Initialize and run the crawler
cc = CongresoCrawler("sqlite:///interventions.db")
cc.create_db()
cc.crawl()

2025-03-06 12:07:45,287 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:45,289 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("document")


INFO:sqlalchemy.engine.Engine:PRAGMA main.table_info("document")


2025-03-06 12:07:45,290 INFO sqlalchemy.engine.Engine [raw sql] ()


INFO:sqlalchemy.engine.Engine:[raw sql] ()


2025-03-06 12:07:45,293 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("term")


INFO:sqlalchemy.engine.Engine:PRAGMA main.table_info("term")


2025-03-06 12:07:45,294 INFO sqlalchemy.engine.Engine [raw sql] ()


INFO:sqlalchemy.engine.Engine:[raw sql] ()


2025-03-06 12:07:45,295 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("diputades")


INFO:sqlalchemy.engine.Engine:PRAGMA main.table_info("diputades")


2025-03-06 12:07:45,296 INFO sqlalchemy.engine.Engine [raw sql] ()


INFO:sqlalchemy.engine.Engine:[raw sql] ()


2025-03-06 12:07:45,297 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("intervention")


INFO:sqlalchemy.engine.Engine:PRAGMA main.table_info("intervention")


2025-03-06 12:07:45,298 INFO sqlalchemy.engine.Engine [raw sql] ()


INFO:sqlalchemy.engine.Engine:[raw sql] ()


2025-03-06 12:07:45,299 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT
INFO:WDM:Get LATEST chromedriver version for google-chrome
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): googlechromelabs.github.io:443
DEBUG:urllib3.connectionpool:https://googlechromelabs.github.io:443 "GET /chrome-for-testing/latest-patch-versions-per-build.json HTTP/1.1" 200 10004
INFO:WDM:Get LATEST chromedriver version for google-chrome
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): googlechromelabs.github.io:443
DEBUG:urllib3.connectionpool:https://googlechromelabs.github.io:443 "GET /chrome-for-testing/latest-patch-versions-per-build.json HTTP/1.1" 200 10004
INFO:WDM:Driver [/Users/paula/.wdm/drivers/chromedriver/mac64/133.0.6943.141/chromedriver-mac-x64/chromedriver] found in cache
DEBUG:selenium.webdriver.common.service:Started executable: `/Users/paula/.wdm/drivers/chromedriver/mac64/133.0.6943.141/chromedriver-mac-x64/chromedriver` in a child process with pid: 2378 using 0 to output -3
DEBUG:selenium.we

2025-03-06 12:07:55,321 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:55,325 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,326 INFO sqlalchemy.engine.Engine [generated in 0.00115s] ('XV',)


INFO:sqlalchemy.engine.Engine:[generated in 0.00115s] ('XV',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.111'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"14"} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/ele

2025-03-06 12:07:55,349 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,350 INFO sqlalchemy.engine.Engine [cached since 0.02436s ago] ('XIV',)


INFO:sqlalchemy.engine.Engine:[cached since 0.02436s ago] ('XIV',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.113'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"13"} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d

2025-03-06 12:07:55,372 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,374 INFO sqlalchemy.engine.Engine [cached since 0.04832s ago] ('XIII',)


INFO:sqlalchemy.engine.Engine:[cached since 0.04832s ago] ('XIII',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.115'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"12"} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127

2025-03-06 12:07:55,392 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,393 INFO sqlalchemy.engine.Engine [cached since 0.06808s ago] ('XII',)


INFO:sqlalchemy.engine.Engine:[cached since 0.06808s ago] ('XII',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.117'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"11"} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d

2025-03-06 12:07:55,414 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,415 INFO sqlalchemy.engine.Engine [cached since 0.08969s ago] ('XI',)


INFO:sqlalchemy.engine.Engine:[cached since 0.08969s ago] ('XI',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.119'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"10"} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4

2025-03-06 12:07:55,471 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,472 INFO sqlalchemy.engine.Engine [cached since 0.1472s ago] ('X',)


INFO:sqlalchemy.engine.Engine:[cached since 0.1472s ago] ('X',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.121'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"9"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/el

2025-03-06 12:07:55,495 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,496 INFO sqlalchemy.engine.Engine [cached since 0.1709s ago] ('IX',)


INFO:sqlalchemy.engine.Engine:[cached since 0.1709s ago] ('IX',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.123'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"8"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/e

2025-03-06 12:07:55,518 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,521 INFO sqlalchemy.engine.Engine [cached since 0.1954s ago] ('VIII',)


INFO:sqlalchemy.engine.Engine:[cached since 0.1954s ago] ('VIII',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.125'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"7"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4

2025-03-06 12:07:55,540 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,541 INFO sqlalchemy.engine.Engine [cached since 0.216s ago] ('VII',)


INFO:sqlalchemy.engine.Engine:[cached since 0.216s ago] ('VII',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.127'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"6"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/e

2025-03-06 12:07:55,561 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,562 INFO sqlalchemy.engine.Engine [cached since 0.237s ago] ('VI',)


INFO:sqlalchemy.engine.Engine:[cached since 0.237s ago] ('VI',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.129'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"5"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/el

2025-03-06 12:07:55,582 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,583 INFO sqlalchemy.engine.Engine [cached since 0.2578s ago] ('V',)


INFO:sqlalchemy.engine.Engine:[cached since 0.2578s ago] ('V',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.131'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"4"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/el

2025-03-06 12:07:55,603 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,605 INFO sqlalchemy.engine.Engine [cached since 0.2794s ago] ('IV',)


INFO:sqlalchemy.engine.Engine:[cached since 0.2794s ago] ('IV',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.133'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"3"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/e

2025-03-06 12:07:55,624 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,626 INFO sqlalchemy.engine.Engine [cached since 0.3008s ago] ('III',)


INFO:sqlalchemy.engine.Engine:[cached since 0.3008s ago] ('III',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.135'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"2"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/

2025-03-06 12:07:55,660 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,661 INFO sqlalchemy.engine.Engine [cached since 0.3362s ago] ('II',)


INFO:sqlalchemy.engine.Engine:[cached since 0.3362s ago] ('II',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.137'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"1"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/e

2025-03-06 12:07:55,692 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,694 INFO sqlalchemy.engine.Engine [cached since 0.3687s ago] ('I',)


INFO:sqlalchemy.engine.Engine:[cached since 0.3687s ago] ('I',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.139'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 13
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"0"} | headers=HTTPHeaderDict({'Content-Length': '13', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/el

2025-03-06 12:07:55,723 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,725 INFO sqlalchemy.engine.Engine [cached since 0.3995s ago] ('Legislatura',)


INFO:sqlalchemy.engine.Engine:[cached since 0.3995s ago] ('Legislatura',)
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.B9A810928C24961026ED4A7AA8D3727D.d.74EEA24759043D9865290D7BEC40BD7D.e.141'}, 'value']}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/execute/sync HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"-1"} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:52805/session/cf3a6d51a720300ec49e10c9

2025-03-06 12:07:55,763 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,764 INFO sqlalchemy.engine.Engine [cached since 0.439s ago] ('Todas',)


INFO:sqlalchemy.engine.Engine:[cached since 0.439s ago] ('Todas',)


2025-03-06 12:07:55,766 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT
INFO:root:Terms committed to DB


2025-03-06 12:07:55,769 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:55,771 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term


2025-03-06 12:07:55,774 INFO sqlalchemy.engine.Engine [generated in 0.00310s] ()


INFO:sqlalchemy.engine.Engine:[generated in 0.00310s] ()
INFO:root:Starting crawling term I
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec4

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:55,952 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:55,959 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:55,962 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,963 INFO sqlalchemy.engine.Engine [generated in 0.00142s] ('I',)


INFO:sqlalchemy.engine.Engine:[generated in 0.00142s] ('I',)
INFO:root:Finished crawling term I


2025-03-06 12:07:55,966 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:55,968 INFO sqlalchemy.engine.Engine [cached since 0.006182s ago] ('II',)


INFO:sqlalchemy.engine.Engine:[cached since 0.006182s ago] ('II',)
INFO:root:Starting crawling term II
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d5

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,066 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,068 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,069 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,070 INFO sqlalchemy.engine.Engine [cached since 0.1086s ago] ('II',)


INFO:sqlalchemy.engine.Engine:[cached since 0.1086s ago] ('II',)
INFO:root:Finished crawling term II


2025-03-06 12:07:56,073 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,074 INFO sqlalchemy.engine.Engine [cached since 0.1127s ago] ('III',)


INFO:sqlalchemy.engine.Engine:[cached since 0.1127s ago] ('III',)
INFO:root:Starting crawling term III
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d5

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,187 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,193 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,194 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,195 INFO sqlalchemy.engine.Engine [cached since 0.2336s ago] ('III',)


INFO:sqlalchemy.engine.Engine:[cached since 0.2336s ago] ('III',)
INFO:root:Finished crawling term III


2025-03-06 12:07:56,199 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,200 INFO sqlalchemy.engine.Engine [cached since 0.2385s ago] ('IV',)


INFO:sqlalchemy.engine.Engine:[cached since 0.2385s ago] ('IV',)
INFO:root:Starting crawling term IV
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,334 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,337 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,342 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,343 INFO sqlalchemy.engine.Engine [cached since 0.3815s ago] ('IV',)


INFO:sqlalchemy.engine.Engine:[cached since 0.3815s ago] ('IV',)
INFO:root:Finished crawling term IV


2025-03-06 12:07:56,346 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,348 INFO sqlalchemy.engine.Engine [cached since 0.386s ago] ('IX',)


INFO:sqlalchemy.engine.Engine:[cached since 0.386s ago] ('IX',)
INFO:root:Starting crawling term IX
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a7

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,468 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,471 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,473 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,474 INFO sqlalchemy.engine.Engine [cached since 0.5127s ago] ('IX',)


INFO:sqlalchemy.engine.Engine:[cached since 0.5127s ago] ('IX',)
INFO:root:Finished crawling term IX


2025-03-06 12:07:56,477 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,479 INFO sqlalchemy.engine.Engine [cached since 0.5171s ago] ('Legislatura',)


INFO:sqlalchemy.engine.Engine:[cached since 0.5171s ago] ('Legislatura',)
INFO:root:Starting crawling term Legislatura
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST 

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,575 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,577 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,579 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,580 INFO sqlalchemy.engine.Engine [cached since 0.6187s ago] ('Legislatura',)


INFO:sqlalchemy.engine.Engine:[cached since 0.6187s ago] ('Legislatura',)
INFO:root:Finished crawling term Legislatura


2025-03-06 12:07:56,584 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,585 INFO sqlalchemy.engine.Engine [cached since 0.6235s ago] ('Todas',)


INFO:sqlalchemy.engine.Engine:[cached since 0.6235s ago] ('Todas',)
INFO:root:Starting crawling term Todas
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,694 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,696 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,697 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,699 INFO sqlalchemy.engine.Engine [cached since 0.7374s ago] ('Todas',)


INFO:sqlalchemy.engine.Engine:[cached since 0.7374s ago] ('Todas',)
INFO:root:Finished crawling term Todas


2025-03-06 12:07:56,702 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,703 INFO sqlalchemy.engine.Engine [cached since 0.7416s ago] ('V',)


INFO:sqlalchemy.engine.Engine:[cached since 0.7416s ago] ('V',)
INFO:root:Starting crawling term V
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a72

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,867 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,869 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,870 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,871 INFO sqlalchemy.engine.Engine [cached since 0.9101s ago] ('V',)


INFO:sqlalchemy.engine.Engine:[cached since 0.9101s ago] ('V',)
INFO:root:Finished crawling term V


2025-03-06 12:07:56,875 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,876 INFO sqlalchemy.engine.Engine [cached since 0.9143s ago] ('VI',)


INFO:sqlalchemy.engine.Engine:[cached since 0.9143s ago] ('VI',)
INFO:root:Starting crawling term VI
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:56,968 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:56,971 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:56,973 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,974 INFO sqlalchemy.engine.Engine [cached since 1.013s ago] ('VI',)


INFO:sqlalchemy.engine.Engine:[cached since 1.013s ago] ('VI',)
INFO:root:Finished crawling term VI


2025-03-06 12:07:56,977 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:56,979 INFO sqlalchemy.engine.Engine [cached since 1.017s ago] ('VII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.017s ago] ('VII',)
INFO:root:Starting crawling term VII
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,115 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,117 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,119 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,120 INFO sqlalchemy.engine.Engine [cached since 1.159s ago] ('VII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.159s ago] ('VII',)
INFO:root:Finished crawling term VII


2025-03-06 12:07:57,125 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,127 INFO sqlalchemy.engine.Engine [cached since 1.165s ago] ('VIII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.165s ago] ('VIII',)
INFO:root:Starting crawling term VIII
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,216 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,218 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,219 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,223 INFO sqlalchemy.engine.Engine [cached since 1.262s ago] ('VIII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.262s ago] ('VIII',)
INFO:root:Finished crawling term VIII


2025-03-06 12:07:57,227 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,228 INFO sqlalchemy.engine.Engine [cached since 1.267s ago] ('X',)


INFO:sqlalchemy.engine.Engine:[cached since 1.267s ago] ('X',)
INFO:root:Starting crawling term X
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,317 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,319 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,321 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,322 INFO sqlalchemy.engine.Engine [cached since 1.361s ago] ('X',)


INFO:sqlalchemy.engine.Engine:[cached since 1.361s ago] ('X',)
INFO:root:Finished crawling term X


2025-03-06 12:07:57,325 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,326 INFO sqlalchemy.engine.Engine [cached since 1.364s ago] ('XI',)


INFO:sqlalchemy.engine.Engine:[cached since 1.364s ago] ('XI',)
INFO:root:Starting crawling term XI
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a7

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,415 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,418 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,419 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,423 INFO sqlalchemy.engine.Engine [cached since 1.462s ago] ('XI',)


INFO:sqlalchemy.engine.Engine:[cached since 1.462s ago] ('XI',)
INFO:root:Finished crawling term XI


2025-03-06 12:07:57,427 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,428 INFO sqlalchemy.engine.Engine [cached since 1.466s ago] ('XII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.466s ago] ('XII',)
INFO:root:Starting crawling term XII
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,535 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,537 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,539 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,541 INFO sqlalchemy.engine.Engine [cached since 1.58s ago] ('XII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.58s ago] ('XII',)
INFO:root:Finished crawling term XII


2025-03-06 12:07:57,544 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,545 INFO sqlalchemy.engine.Engine [cached since 1.584s ago] ('XIII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.584s ago] ('XIII',)
INFO:root:Starting crawling term XIII
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,655 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,658 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,660 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,661 INFO sqlalchemy.engine.Engine [cached since 1.7s ago] ('XIII',)


INFO:sqlalchemy.engine.Engine:[cached since 1.7s ago] ('XIII',)
INFO:root:Finished crawling term XIII


2025-03-06 12:07:57,665 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,666 INFO sqlalchemy.engine.Engine [cached since 1.705s ago] ('XIV',)


INFO:sqlalchemy.engine.Engine:[cached since 1.705s ago] ('XIV',)
INFO:root:Starting crawling term XIV
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,783 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,785 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,786 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,790 INFO sqlalchemy.engine.Engine [cached since 1.829s ago] ('XIV',)


INFO:sqlalchemy.engine.Engine:[cached since 1.829s ago] ('XIV',)
INFO:root:Finished crawling term XIV


2025-03-06 12:07:57,793 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,794 INFO sqlalchemy.engine.Engine [cached since 1.833s ago] ('XV',)


INFO:sqlalchemy.engine.Engine:[cached since 1.833s ago] ('XV',)
INFO:root:Starting crawling term XV
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/url {'url': 'https://www.congreso.es/ca/busqueda-de-diputados?p_p_id=diputadomodule&p_p_lifecycle=2&p_p_state=nor...'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a720300ec49e10c9dcb127d4/url HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:52805/session/cf3a6d51a720300ec49e10c9dcb127d4/element {'using': 'tag name', 'value': 'body'}
DEBUG:urllib3.connectionpool:http://localhost:52805 "POST /session/cf3a6d51a7

Response text from Congreso: {"data":[]}
Raw response: {'data': []}
Number of diputades extracted: 0
2025-03-06 12:07:57,894 INFO sqlalchemy.engine.Engine COMMIT


INFO:sqlalchemy.engine.Engine:COMMIT


2025-03-06 12:07:57,898 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2025-03-06 12:07:57,900 INFO sqlalchemy.engine.Engine SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


INFO:sqlalchemy.engine.Engine:SELECT term.term AS term_term, term.term_id AS term_term_id, term.president AS term_president, term.init_date AS term_init_date, term.finish_date AS term_finish_date 
FROM term 
WHERE term.term = ?


2025-03-06 12:07:57,901 INFO sqlalchemy.engine.Engine [cached since 1.94s ago] ('XV',)


INFO:sqlalchemy.engine.Engine:[cached since 1.94s ago] ('XV',)
INFO:root:Finished crawling term XV
INFO:root:Finished crawling Congreso de los Diputados
