In [1]:
import requests
import urllib.parse
from src.ParliamentDataHandler import ParliamentDataHandler
from src.ep_talker import EuropeanParliamentTalker
import pandas as pd

In [None]:
data_handler = ParliamentDataHandler()
procedures = data_handler.get_procedures()

In [3]:
# Example of usage
rdf_url = "https://data.europarl.europa.eu/eli/dl/proc/2022-0147"

# Instantiate the class
talker = EuropeanParliamentTalker()

# Step 1: Download and load RDF data
talker.download_rdf(rdf_url)

# Step 2: Perform a SPARQL query to extract all person data
query = """
PREFIX eli: <http://data.europa.eu/eli/ontology#>
PREFIX eli-dl: <http://data.europa.eu/eli/eli-draft-legislation-ontology#>
PREFIX ep-activities: <https://data.europarl.europa.eu/def/ep-activities/>
PREFIX ep-procedure-types: <https://data.europarl.europa.eu/def/ep-procedure-types/>
PREFIX ep-roles: <https://data.europarl.europa.eu/def/ep-roles/>
PREFIX ept: <https://data.europarl.europa.eu/def/>
PREFIX epvoc: <https://data.europarl.europa.eu/def/epvoc#>
PREFIX op-aut: <http://publications.europa.eu/resource/authority/>
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?process ?processId ?processTitle ?currentStage ?procedureType ?procedureTypeLabel 
       ?initiator ?stakeholder ?activity ?activityId ?activityDate ?activityType 
       ?role ?document ?documentId ?documentTitle ?documentDate 
       ?relatedProcess ?relatedProcessTitle ?status
WHERE {
    ?process a eli-dl:Process ;
             eli-dl:process_id ?processId ;
             eli-dl:process_title ?processTitle ;
             eli-dl:current_stage ?currentStage .

    OPTIONAL { 
        ?process eli-dl:process_type ?procedureType .
        ?procedureType rdfs:label ?procedureTypeLabel .
    }

    OPTIONAL { ?process eli-dl:initiator ?initiator . }
    OPTIONAL { ?process eli-dl:stakeholder ?stakeholder . }

    OPTIONAL {
        ?activity a eli-dl:Activity ;
                  eli-dl:activity_id ?activityId ;
                  eli-dl:activity_date ?activityDate ;
                  eli-dl:activity_type ?activityType ;
                  eli-dl:belongs_to_process ?process .
    }

    OPTIONAL {
        ?role a ep-roles:Role ;
              ep-roles:has_role ?role ;
              ep-roles:related_to_process ?process .
    }

    OPTIONAL {
        ?document a eli:LegalResource ;
                  eli:identifier ?documentId ;
                  dcterms:title ?documentTitle ;
                  dcterms:date ?documentDate ;
                  eli:related_to_process ?process .
    }

    OPTIONAL {
        ?relatedProcess eli-dl:related_process ?process ;
                        eli-dl:process_title ?relatedProcessTitle ;
                        eli-dl:status ?status .
    }
    FILTER(LANG(?processTitle) = "en")
}
"""
results = talker.query(query)

results_list = []
for row in results:
    row_dict = {}
    for var in row.labels:
        row_dict[var] = str(row[var])  # Convert RDF nodes to strings
    results_list.append(row_dict)
print("Total: ", len(results_list))

Total:  1


In [4]:
results_list

[{'process': 'https://data.europarl.europa.eu/eli/dl/proc/2022-0147',
  'processId': '2022-0147',
  'processTitle': 'Amending Directive 2011/83/EU concerning financial services contracts concluded at a distance and repealing Directive 2002/65/EC',
  'currentStage': 'http://publications.europa.eu/resource/authority/procedure-phase/RDG1',
  'procedureType': 'None',
  'procedureTypeLabel': 'None',
  'initiator': 'None',
  'stakeholder': 'None',
  'activity': 'None',
  'activityId': 'None',
  'activityDate': 'None',
  'activityType': 'None',
  'role': 'None',
  'document': 'None',
  'documentId': 'None',
  'documentTitle': 'None',
  'documentDate': 'None',
  'relatedProcess': 'None',
  'relatedProcessTitle': 'None',
  'status': 'None'}]

In [4]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

def fetch_procedure_details(procedure):
    id_ = procedure["id"]
    return data_handler.get_procedure_details(id_)

# Use ThreadPoolExecutor to run the tasks in parallel
with ThreadPoolExecutor(max_workers=10) as executor:
    futures = [executor.submit(fetch_procedure_details, procedure) for procedure in procedures[:100]]
    
    # Use tqdm to show progress
    for future in tqdm(as_completed(futures), total=len(futures)):
        details = future.result()
        # Process the details as needed

0it [00:00, ?it/s]


In [3]:
len(procedures)

16997