In [35]:
import requests

url = "https://eur-lex.europa.eu/search.html"

params = {
    "scope": "EURLEX",
    "text": 'aaaaaaaaaaaaaaaaaaaaa',
    "lang": 'en',
    "type": "quick",
    "DTS_DOM": "EU_LAW",
    "sortOne": "DD",
    "sortOneOrder": "desc",
    "DD_YEAR": 2020,
    "page": 2
}
r = requests.get(url, params=params)
r, r.history

(<Response [200]>, [<Response [302]>])

In [36]:
r.url

'https://eur-lex.europa.eu/search.html?scope=EURLEX&DD_YEAR=2020&sortOneOrder=desc&DTS_DOM=EU_LAW&sortOne=DD&page=2&text=aaaaaaaaaaaaaaaaaaaaa&lang=en&type=quick&qid=1622553666155'

In [62]:
import requests
from bs4 import BeautifulSoup
import unicodedata

import meilisearch
import json
import re

In [161]:
def search_page(query, page=1, language='en'):
    url = "https://eur-lex.europa.eu/search.html"

    params = {
        "scope": "EURLEX",
        "text": query,
        "lang": language,
        "type": "quick",
        "DTS_DOM": "EU_LAW",
        "sortOne": "DD",
        "sortOneOrder": "desc",
        "page": page
    }

    html = requests.get(url, params=params).content
    soup = BeautifulSoup(html, 'html.parser')
    
    page_results = soup.find_all('div', class_='SearchResult')
    final_results = []
    
    for result in page_results:
        entry = {}
        
        celex = result.find_all('div', class_='col-sm-6')[0].find('dd')
        celex = result.find('p').text if celex is None else celex.text
        celex = re.sub(r'[^a-zA-Z0-9]', '', celex)
        
        title = result.find('a', class_='title')
        col2 = result.find_all('div', class_='col-sm-6')[1].find_all('dd')
        date = list(filter(lambda v: re.match("\d{2}/\d{2}/\d{4}", v.text), col2))[0]
        
        entry['id'] = unicodedata.normalize('NFKD', celex)
        entry['title'] = unicodedata.normalize('NFKD', title.text)
        entry['author'] = unicodedata.normalize('NFKD', col2[0].text)
        entry['date'] = unicodedata.normalize('NFKD', date.text[:10])
        entry['link'] = unicodedata.normalize('NFKD', title['name'])
        final_results.append(entry)
    return final_results

In [162]:
def search_many(query, pages=10):
    results = []
    for page in range(1, pages+1):
        results.extend(search_page(query, page))
    return results

In [163]:
results = search_many('agriculture', 30)

In [229]:
client = meilisearch.Client('http://127.0.0.1:7700')

In [228]:
client.index('eurlex').add_documents(results)

MeiliSearchCommunicationError: MeiliSearchCommunicationError, HTTPConnectionPool(host='127.0.0.1', port=7700): Max retries exceeded with url: /indexes/eurlex/documents (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd935fa2610>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [225]:
client.index('eurlex').get_documents({'limit':2})

[{'id': '02013R057520230628',
  'title': 'Consolidated text: Regulation (EU) No 575/2013 of the European Parliament and of the Council of 26 June 2013 on prudential requirements for credit institutions and amending Regulation (EU) No 648/2012 (Text with EEA relevance)Text with EEA relevance',
  'author': 'Not available',
  'date': '28/06/2023',
  'link': 'https://eur-lex.europa.eu/legal-content/AUTO/?uri=CELEX:02013R0575-20230628'},
 {'id': '02009L013820210630',
  'title': 'Consolidated text: Directive 2009/138/EC of the European Parliament and of the Council of 25 November 2009 on the taking-up and pursuit of the business of Insurance and Reinsurance (Solvency II) (recast) (Text with EEA relevance)Text with EEA relevance',
  'author': 'Not available',
  'date': '30/06/2021',
  'link': 'https://eur-lex.europa.eu/legal-content/AUTO/?uri=CELEX:02009L0138-20210630'}]

In [221]:
client.index('eurlex').get_all_update_status()

[{'status': 'processed',
  'updateId': 0,
  'type': {'name': 'DocumentsAddition', 'number': 300},
  'duration': 0.17600354,
  'enqueuedAt': '2021-06-01T09:52:10.435180921Z',
  'processedAt': '2021-06-01T09:52:10.616635283Z'}]

In [226]:
client.index('eurlex').search('agriculture')

{'hits': [{'id': '32021D0762',
   'title': 'Commission Implementing Decision (EU) 2021/762 of 6 May 2021 concerning the extension of the action taken by the Irish Department of Agriculture, Food and the Marine permitting the making available on the market and use of biocidal products containing propan-2-ol for use as human hygiene products in accordance with Article 55(1) of Regulation (EU) No 528/2012 of the European Parliament and of the Council (notified under document C(2021) 3127) (Only the English text is authentic)',
   'author': 'European Commission, Directorate-General for Health and Food Safety',
   'date': '06/05/2021',
   'link': 'https://eur-lex.europa.eu/legal-content/AUTO/?uri=CELEX:32021D0762'},
  {'id': '32021D0735',
   'title': 'Commission Implementing Decision (EU) 2021/735 of 4 May 2021 concerning the extension of the action taken by the Irish Department of Agriculture, Food and the Marine permitting the making available on the market and use of the biocidal product

In [232]:
client.create_dump()

{'uid': '20210601-102040441', 'status': 'in_progress'}

In [193]:
client.get_dump_status('20210601-092338997')

{'uid': '20210601-092338997', 'status': 'done'}