In [8]:
import boto3
import instructor
import os
from IPython.display import Markdown, display
from pydantic import BaseModel

In [6]:
%pip install "instructor[bedrock]"

Note: you may need to restart the kernel to use updated packages.


In [13]:
s3 = boto3.client("s3")
bedrock_client = boto3.client('bedrock-runtime')
client = instructor.from_bedrock(bedrock_client)


class Law(BaseModel):
    countrys: list[str]
    sectors_of_activity: list[str]
    regulation_types: list[str]
    date_of_application: list[str]
    measures_imposed: list[str]


BUCKET = "csv-file-store-ec51f700"
KEY = "dzd-3lz7fcr1rwmmkw/5h6d6xccl72dn4/dev/data/directives/1.DIRECTIVE (UE) 20192161 DU PARLEMENT EUROPÉEN ET DU CONSEIL.html"


def getLawInformations(bucket: str = BUCKET, key: str = KEY) -> Law:
    obj = s3.get_object(Bucket=bucket, Key=key)
    text_of_law = obj["Body"].read().decode("utf-8")
    response = client.chat.completions.create(
        modelId="global.anthropic.claude-haiku-4-5-20251001-v1:0",
        messages=[
            {
                "role": "user",
                "content": (
                    "Extract the following information from the text below, according to the schema:\n\n"
                    "1. **countrys** – list of countries where the regulation applies, direct list of the countries affected, no groupes of countries\n"
                    "2. **sectors_of_activity** – list of industries or business sectors mentioned\n"
                    "3. **regulation_types** – type(s) of regulation (environmental, financial, privacy...)\n"
                    "4. **date_of_application** – when the law or measure starts to apply\n"
                    "5. **measures_imposed** – the specific actions, limits or obligations imposed, try to use numbers and details as much as possible\n\n"
                    "Text to analyze:\n\n"
                    f"{text_of_law}"
                ),
            },
        ],
        response_model=Law,
        inferenceConfig={
            "maxTokens": 64000,
        }
    )
    return response


if __name__ == "__main__":
    law_info = getLawInformations()
    print(law_info)

countrys=['European Union Member States'] sectors_of_activity=['Retail and e-commerce', 'Online marketplaces', 'Digital services', 'Consumer goods', 'Food and beverages', 'Entertainment and ticketing', 'Transportation', 'Energy supply', 'Real estate'] regulation_types=['Consumer Protection', 'Unfair Commercial Practices', 'Distance Selling', 'Consumer Rights', 'Price Transparency', 'Digital Markets', 'Data Protection'] date_of_application=['28 November 2021 - Adoption deadline for Member States', '28 May 2022 - Application date', '27 November 2019 - Directive adoption date'] measures_imposed=['Effective, proportionate and dissuasive sanctions for violations', 'Minimum fines of 4% of annual turnover for large-scale or Union-wide infringements', 'Minimum fines of 2 million euros when turnover data unavailable', 'Mandatory disclosure of ranking parameters on online marketplaces', 'Clear indication of paid advertising in search results', 'Right to withdrawal of 14 days (30 days in certain 