In [1]:
import requests
import xml.etree.ElementTree as ET
from eurlex import get_html_by_celex_id


url = "https://eur-lex.europa.eu/EURLexWebService"

headers = {
    "Content-Type": "application/soap+xml;charset=UTF-8",
}
 
payload = """<?xml version=\"1.0\" encoding=\"utf-8\"?>
            <soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:sear="http://eur-lex.europa.eu/search">
    <soap:Header>
        <wsse:Security xmlns:wsse="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd" soap:mustUnderstand="true">
            <wsse:UsernameToken xmlns:wsu="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd" wsu:Id="UsernameToken-1">
                <wsse:Username>n00f9vkm</wsse:Username>
                <wsse:Password Type="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-username-token-profile-1.0#PasswordText">6EbVqkb6OVX</wsse:Password>
            </wsse:UsernameToken>
        </wsse:Security>
    </soap:Header>
    <soap:Body>
        <sear:searchRequest>
            <sear:expertQuery>
                <![CDATA[Titel ~ 2019/2144]]>
            </sear:expertQuery>
            <sear:page>1</sear:page>
            <sear:pageSize>1</sear:pageSize>
            <sear:searchLanguage>en</sear:searchLanguage>
        </sear:searchRequest>
    </soap:Body>
</soap:Envelope>"""

response = requests.request("POST", url, headers=headers, data=payload)

if response.status_code != 200:
    raise Exception("Error accessing EUR-Lex service")
    

namespaces = {
    'soap': 'http://www.w3.org/2003/05/soap-envelope',
    'sear': 'http://eur-lex.europa.eu/search'
}

root = ET.fromstring(response.text)
celex_element = root.find('.//sear:ID_CELEX', namespaces) 

celex_value = celex_element[0].text if celex_element is not None else None

if celex_value is not None:
    html_string = get_html_by_celex_id(celex_value)   
    folder_path = "out/"
    file_name = celex_value + ".html"
    file_path = folder_path + file_name

    with open(file_path, "w") as file:
        file.write(html_string)

    print("HTML file saved successfully at:", file_path)

else:
    print("ID_CELEX tag not found.")


HTML file saved successfully at: out/32019R2144.html


In [26]:
from helper.eurlex_loader import get_html_by_title 

res = get_html_by_title("2021/535")

print(res)

<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML//EN" "xhtml-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><!-- CONVEX # converter_version:9.15.0 # generated_on:20231207-2058 # ELI version:0.10 --><head>
      <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
      <link type="text/css" rel="stylesheet" href="oj-convex.css"/>
      <title>L_2019325EN.01000101.xml</title>
   </head>
   <body>
      <table width="100%" border="0" cellspacing="0" cellpadding="0">
         <col width="10%"/>
         <col width="10%"/>
         <col width="60%"/>
         <col width="20%"/>
         <tbody>
            <tr>
               <td  >
                  <p class="oj-hd-date">16.12.2019   </p>
               </td>
               <td  >
                  <p class="oj-hd-lg">EN</p>
               </td>
               <td  >
                  <p class="oj-hd-ti">Official Journal of the European Union</p>
               </td>
               <td

In [14]:
import eurlex
import pandas as pd

docs = eurlex.get_documents(types=["RECO", "REG"] , limit=150)
print(len(docs))
result = pd.DataFrame()
for doc in docs:
    df_list = list()
    df_list.append(result)
    html_doc = eurlex.get_html_by_celex_id(doc["celex"])
    df = eurlex.parse_html(html_doc)
    df_list.append(df)
    result = pd.concat(df_list)
    
result.to_csv('out.csv', index=False)  

150


In [13]:
html = eurlex.get_html_by_celex_id("42016X1723")
df = eurlex.parse_html(html)
df.to_clipboard()

In [5]:
from helper.eurlex_loader import get_documents_date_range


docs = get_documents_date_range(start="2023-10-01", end="2024-02-01")
docs

[{'celex': '32022R2474R(05)',
  'date': '2023-12-18',
  'link': 'http://publications.europa.eu/resource/cellar/9fef36d4-9d46-11ee-b164-01aa75ed71a1',
  'type': 'REG'},
 {'celex': '32023R2841',
  'date': '2023-12-13',
  'link': 'http://publications.europa.eu/resource/cellar/aca5f4e8-9d47-11ee-b164-01aa75ed71a1',
  'type': 'REG'},
 {'celex': '32023R2873',
  'date': '2023-12-18',
  'link': 'http://publications.europa.eu/resource/cellar/a7c21c30-9dbd-11ee-b164-01aa75ed71a1',
  'type': 'REG'},
 {'celex': '32024R0259',
  'date': '2024-01-10',
  'link': 'http://publications.europa.eu/resource/cellar/b268121e-b024-11ee-b164-01aa75ed71a1',
  'type': 'REG'},
 {'celex': '32024R0257',
  'date': '2024-01-10',
  'link': 'http://publications.europa.eu/resource/cellar/da4acb75-b024-11ee-b164-01aa75ed71a1',
  'type': 'REG'},
 {'celex': '32023R2638',
  'date': '2023-11-20',
  'link': 'http://publications.europa.eu/resource/cellar/545919a5-88d9-11ee-99ba-01aa75ed71a1',
  'type': 'REG'},
 {'celex': '32023