# Load Data

In [None]:
from bs4 import BeautifulSoup
import os

def extract_text_from_html(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        html_content = file.read()
    soup = BeautifulSoup(html_content, 'html.parser')
    paragraphs = soup.find_all('p')
    text_content = "\n".join([p.get_text() for p in paragraphs])
    return text_content
file_paths = {
    'gdpr': '/kaggle/input/english-dataset/english_gdpr.html',
    'ai_act': '/kaggle/input/english-dataset/english_AI_act.html',
    'dma': '/kaggle/input/english-dataset/english_dma.html',
    'dsa': '/kaggle/input/english-dataset/english_dsa.html'
}

texts = {law: extract_text_from_html(path) for law, path in file_paths.items()}
for law, text in texts.items():
    print(f"First 1000 characters of {law.upper()}:\n{text[:1000]}\n")

# Chunk Legal Documents for Processing and setting up the vector database


In [None]:
from nltk.tokenize import sent_tokenize
from transformers import AutoTokenizer, AutoModel
import nltk

nltk.download('punkt')

tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

def chunk_text_based_on_tokens(text, max_tokens=300):
    sentences = sent_tokenize(text)
    chunks = []
    current_chunk = []
    current_length = 0

    for sentence in sentences:
        sentence_length = len(tokenizer.tokenize(sentence))
        if current_length + sentence_length <= max_tokens:
            current_chunk.append(sentence)
            current_length += sentence_length
        else:
            chunks.append(" ".join(current_chunk))
            current_chunk = [sentence]
            current_length = sentence_length

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

def truncate_or_split_chunks(chunks, max_length=512):
    truncated_chunks = []
    for chunk in chunks:
        tokens = tokenizer.encode(chunk, add_special_tokens=False)
        if len(tokens) > max_length:
            truncated_chunks += [
                tokenizer.decode(tokens[i:i+max_length])
                for i in range(0, len(tokens), max_length)
            ]
        else:
            truncated_chunks.append(chunk)
    return truncated_chunks

def extract_sections_articles_chapters(soup):
    sections = []
    current_section = []
    for element in soup.find_all(['h1', 'h2', 'h3', 'p']):
        if element.name in ['h1', 'h2', 'h3']:
            if current_section:
                sections.append(" ".join(current_section))
                current_section = []
            current_section.append(element.get_text())
        else:
            current_section.append(element.get_text())
    if current_section:
        sections.append(" ".join(current_section))
    return sections

def load_and_process_html(file_path, max_tokens=300, max_length=512):
    with open(file_path, 'r', encoding='utf-8') as file:
        html_content = file.read()
    soup = BeautifulSoup(html_content, 'html.parser')
    sections = extract_sections_articles_chapters(soup)
    all_chunks = []
    for section in sections:
        all_chunks.extend(chunk_text_based_on_tokens(section, max_tokens=max_tokens))
    all_chunks = truncate_or_split_chunks(all_chunks, max_length=max_length)
    return all_chunks

# Install necessary Libraries

In [None]:
!pip install -U langchain-community
!pip install sentence_transformers

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

model_name = "distilbert-base-uncased"
encode_kwargs = {'normalize_embeddings': True}
model = AutoModel.from_pretrained(model_name)

model_norm = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)

def embed_chunks(chunks, model_name):
    encode_kwargs = {'normalize_embeddings': True}
    model_norm = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs={'device': 'cuda'},
        encode_kwargs=encode_kwargs
    )
    embeddings = model_norm.embed_documents(chunks)
    return embeddings

In [None]:
!pip install elasticsearch torch transformers tqdm
from elasticsearch import Elasticsearch
from tqdm import tqdm
import torch
from transformers import AutoModel

In [None]:
from elasticsearch import Elasticsearch
import logging

logging.basicConfig(level=logging.DEBUG)

cloud_id = "bd9bae292d764d1b9f9085196cb56c35:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyQ2Njc1NWIyZDExNGQ0ZjUxOWMyNWVkM2MxOTFmMzI5ZiRlNzkzN2U4NzBkOGU0MzA1OWZmOWNiNjNjOTM5ZmYzZA=="
es_username = "elastic"
es_password = "9IcbOuJhZZAJhCl6VfGm48Aq"
es = Elasticsearch(
    cloud_id=cloud_id,
    basic_auth=(es_username, es_password)
)

if es.ping():
    print("Connected to Elasticsearch!")
else:
    print("Could not connect to Elasticsearch.")

# retrieve the most relevant chunk based on testing specific queries

In [None]:
laws_info = {
    'gdpr': {
        'file_path': '/kaggle/input/english-dataset/english_gdpr.html',
        'collection_name': 'embeddings_gdpr',
        'query': "What are the key considerations for Member States when reconciling the right to freedom of expression and information with the right to the protection of personal data under this Regulation, and how should exemptions and derogations be applied in this context?"
    },
    'ai_act': {
        'file_path': '/kaggle/input/english-dataset/english_AI_act.html',
        'collection_name': 'embeddings_ai_act',
        'query': "What are the implications of the proposed Regulation on the placement and use of high-risk AI systems with respect to existing Union laws, particularly in areas such as data protection, consumer rights, employment, and national labor laws?"
    },
    'dma': {
        'file_path': '/kaggle/input/english-dataset/english_dma.html',
        'collection_name': 'embeddings_dma',
        'query': "What are the key steps and responsibilities of the Commission in addressing and remedying infringements by very large online platforms and search engines according to the text provided?"
    },
    'dsa': {
        'file_path': '/kaggle/input/english-dataset/english_dsa.html',
        'collection_name': 'embeddings_dsa',
        'query': "What distinguishes online platforms from other providers of hosting services according to the regulation, and why are cloud computing and web-hosting services generally not considered online platforms?"
    }
}

def process_and_store_embeddings(file_path, index_name):
    chunks = load_and_process_html(file_path)
    embeddings = embed_chunks(chunks, model_name)
    store_embeddings_in_es(index_name, chunks, embeddings)
def store_embeddings_in_es(index_name, chunks, embeddings):
    global es
    es.indices.create(
        index=index_name,
        body={
            "mappings": {
                "properties": {
                    "chunk": {"type": "text"},
                    "embedding": {
                        "type": "dense_vector",
                        "dims": 768,
                        "index": True,  
                        "similarity": "cosine",  
                        "index_options": {  
                            "type": "int8_hnsw",
                            "m": 16,
                            "ef_construction": 100
                        }
                    }
                }
            }
        },
        ignore=400 
    )

    for i, (chunk, embedding) in tqdm(enumerate(zip(chunks, embeddings))):
        document = {
            "chunk": chunk,
            "embedding": embedding  
        }
        es.index(index=index_name, id=i, body=document)
for law, info in laws_info.items():
    print(f"Processing and indexing {law}...")
    try:
        process_and_store_embeddings(info['file_path'], info['collection_name'])
    except Exception as e:
        print(f"Error processing {law}: {e}")

In [None]:
def encode_query(query):
    inputs = tokenizer(query, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        embedding = model(**inputs).last_hidden_state.mean(dim=1).squeeze().tolist()
    return embedding

def query_es(index_name, query_embedding, top_k=10:
    search_query = {
        "size": top_k,
        "query": {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {"query_vector": query_embedding}
                }
            }
        }
    }

    response = es.search(index=index_name, body=search_query)
    return [hit["_source"]["chunk"] for hit in response['hits']['hits']]

In [None]:
for law, info in laws_info.items():
    print(f"\nQuerying {law.upper()} collection:")
    query_embedding = encode_query(info['query'])
    results = query_es(info['collection_name'], query_embedding, top_k=1)
    if results:
        print(f"Retrieved chunk from {law.upper()}:")
        print(results[0])
    else:
        print(f"No results found for {law.upper()}.")

# measuring the relevancy of the retrieved texts and the answers

In [None]:
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer, util
import torch
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModel.from_pretrained("distilbert-base-uncased")

semantic_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

def generate_bert_embedding(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    embedding = outputs.last_hidden_state[:, 0, :].numpy()  
    return embedding

def calculate_cosine_similarity(reference_embedding, retrieved_embedding):
    cosine_sim = np.dot(reference_embedding, retrieved_embedding.T) / (np.linalg.norm(reference_embedding) * np.linalg.norm(retrieved_embedding))
    return cosine_sim.item() 

def calculate_semantic_similarity(reference_text, retrieved_text, model):
    embeddings1 = model.encode(reference_text, convert_to_tensor=True)
    embeddings2 = model.encode(retrieved_text, convert_to_tensor=True)
    similarity = util.pytorch_cos_sim(embeddings1, embeddings2)
    return similarity.item()

reference_answers = {
    'gdpr': "Member States law should reconcile the rules governing freedom of expression and information, including journalistic, academic, artistic and or literary expression with the right to the protection of personal data pursuant to this Regulation. The processing of personal data solely for journalistic purposes, or for the purposes of academic, artistic or literary expression should be subject to derogations or exemptions from certain provisions of this Regulation if necessary to reconcile the right to the protection of personal data with the right to freedom of expression and information, as enshrined in Article 11 of the Charter. This should apply in particular to the processing of personal data in the audiovisual field and in news archives and press libraries. Therefore, Member States should adopt legislative measures which lay down the exemptions and derogations necessary for the purpose of balancing those fundamental rights. Member States should adopt such exemptions and derogations on general principles, the rights of the data subject, the controller and the processor, the transfer of personal data to third countries or international organisations, the independent supervisory authorities, cooperation and consistency, and specific data-processing situations. Where such exemptions or derogations differ from one Member State to another, the law of the Member State to which the controller is subject should apply. In order to take account of the importance of the right to freedom of expression in every democratic society, it is necessary to interpret notions relating to that freedom, such as journalism, broadly.",
    'ai_act': "Harmonised rules applicable to the placing on the market, the putting into service and the use of high-risk AI systems should be laid down consistently with Regulation (EC) No 765/2008 of the European Parliament and of the Council (7), Decision No 768/2008/EC of the European Parliament and of the Council (8) and Regulation (EU) 2019/1020 of the European Parliament and of the Council (9) (New Legislative Framework). The harmonised rules laid down in this Regulation should apply across sectors and, in line with the New Legislative Framework, should be without prejudice to existing Union law, in particular on data protection, consumer protection, fundamental rights, employment, and protection of workers, and product safety, to which this Regulation is complementary. As a consequence, all rights and remedies provided for by such Union law to consumers, and other persons on whom AI systems may have a negative impact, including as regards the compensation of possible damages pursuant to Council Directive 85/374/EEC (10) remain unaffected and fully applicable. Furthermore, in the context of employment and protection of workers, this Regulation should therefore not affect Union law on social policy and national labour law, in compliance with Union law, concerning employment and working conditions, including health and safety at work and the relationship between employers and workers. This Regulation should also not affect the exercise of fundamental rights as recognised in the Member States and at Union level, including the right or freedom to strike or to take other action covered by the specific industrial relations systems in Member States as well as the right to negotiate, to conclude and enforce collective agreements or to take collective action in accordance with national law. This Regulation should not affect the provisions aiming to improve working conditions in platform work laid down in a Directive of the European Parliament and of the Council on improving working conditions in platform work. Moreover, this Regulation aims to strengthen the effectiveness of such existing rights and remedies by establishing specific requirements and obligations, including in respect of the transparency, technical documentation and record-keeping of AI systems. Furthermore, the obligations placed on various operators involved in the AI value chain under this Regulation should apply without prejudice to national law, in compliance with Union law, having the effect of limiting the use of certain AI systems where such law falls outside the scope of this Regulation or pursues legitimate public interest objectives other than those pursued by this Regulation. For example, national labour law and law on the protection of minors, namely persons below the age of 18, taking into account the UNCRC General Comment No 25 (2021) on children’s rights in relation to the digital environment, insofar as they are not specific to AI systems and pursue other legitimate public interest objectives, should not be affected by this Regulation.",
    'dma': "Given the potential significant societal effects of an infringement of the additional obligations to manage systemic risks that solely apply to very large online platforms and very large online search engines and in order to address those public policy concerns, it is necessary to provide for a system of enhanced supervision of any action undertaken to effectively terminate and remedy infringements of this Regulation. Therefore, once an infringement of one of the provisions of this Regulation that solely apply to very large online platforms or very large online search engines has been ascertained and, where necessary, sanctioned, the Commission should request the provider of such platform or of such search engine to draw a detailed action plan to remedy any effect of the infringement for the future and communicate such action plan within a timeline set by the Commission, to the Digital Services Coordinators, the Commission and the Board. The Commission, taking into account the opinion of the Board, should establish whether the measures included in the action plan are sufficient to address the infringement, taking also into account whether adherence to relevant code of conduct is included among the measures proposed. The Commission should also monitor any subsequent measure taken by the provider of a very large online platform or of a very large online search engine concerned as set out in its action plan, taking into account also an independent audit of the provider. If following the implementation of the action plan the Commission still considers that the infringement has not been fully remedied, or if the action plan has not been provided or is not considered suitable, it should be able to use any investigative or enforcement powers pursuant to this Regulation, including the power to impose periodic penalty payments and initiating the procedure to disable access to the infringing service.",
    'dsa': "Considering the particular characteristics of the services concerned and the corresponding need to make the providers thereof subject to certain specific obligations, it is necessary to distinguish, within the broader category of providers of hosting services as defined in this Regulation, the subcategory of online platforms. Online platforms, such as social networks or online platforms allowing consumers to conclude distance contracts with traders, should be defined as providers of hosting services that not only store information provided by the recipients of the service at their request, but that also disseminate that information to the public at the request of the recipients of the service. However, in order to avoid imposing overly broad obligations, providers of hosting services should not be considered as online platforms where the dissemination to the public is merely a minor and purely ancillary feature that is intrinsically linked to another service, or a minor functionality of the principal service, and that feature or functionality cannot, for objective technical reasons, be used without that other or principal service, and the integration of that feature or functionality is not a means to circumvent the applicability of the rules of this Regulation applicable to online platforms. For example, the comments section in an online newspaper could constitute such a feature, where it is clear that it is ancillary to the main service represented by the publication of news under the editorial responsibility of the publisher. In contrast, the storage of comments in a social network should be considered an online platform service where it is clear that it is not a minor feature of the service offered, even if it is ancillary to publishing the posts of recipients of the service. For the purposes of this Regulation, cloud computing or web-hosting services should not be considered to be an online platform where dissemination of specific information to the public constitutes a minor and ancillary feature or a minor functionality of such services.Moreover, cloud computing services and web-hosting services, when serving as infrastructure, such as the underlying infrastructural storage and computing services of an internet-based application, website or online platform, should not in themselves be considered as disseminating to the public information stored or processed at the request of a recipient of the application, website or online platform which they host."
}

similarities = []

for law, info in laws_info.items():
    print(f"\nQuerying {law.upper()} collection:")
    
    query_embedding = encode_query(info['query'])
    retrieved_chunks = query_es(info['collection_name'], query_embedding, top_k=1)
    
    if retrieved_chunks:
        retrieved_text = retrieved_chunks[0]         
        retrieved_embedding = generate_bert_embedding(retrieved_text, tokenizer, model)
        reference_embedding = generate_bert_embedding(reference_answers[law], tokenizer, model)
        cosine_sim = calculate_cosine_similarity(reference_embedding, retrieved_embedding)        
        semantic_sim = calculate_semantic_similarity(reference_answers[law], retrieved_text, semantic_model)

        similarities.append({
            'law': law,
            'retrieved_answer': retrieved_text,
            'cosine_similarity': cosine_sim,
            'semantic_similarity': semantic_sim
        })

        print(f"Retrieved chunk from {law.upper()}")
        print(retrieved_text)
        print(f"Cosine Similarity with reference answer: {cosine_sim:.4f}")
        print(f"Semantic Similarity with reference answer: {semantic_sim:.4f}")
        print("----\n")
    else:
        print(f"No valid results found for {law.upper()} in the query.")

# setting 20 questions and their answers for each law, make embedidngs of them while making summarizations of the most relevant chunks that have beed retrieved.

In [None]:
from collections import defaultdict
from transformers import pipeline
import numpy as np
from elasticsearch import Elasticsearch
import torch
import time

integrated_questions_answers = [
    # Question 1 from GDPR
    {
        'law': 'gdpr',
        'question': "What is the fundamental right regarding the processing of personal data as per the Charter of Fundamental Rights of the European Union?",
        'answer': "The protection of natural persons in relation to the processing of personal data is a fundamental right. Article 8(1) of the Charter of Fundamental Rights of the European Union (‘the Charter’) and Article 16(1) of the Treaty on the Functioning of the European Union (TFEU) provide that everyone has the right to the protection of personal data concerning them. This Regulation is intended to contribute to the accomplishment of an area of freedom, security, and justice and of an economic union, to economic and social progress, to the strengthening and the convergence of the economies within the internal market, and to the well-being of natural persons."
    },
    # Question 1 from AI Act
    {
        'law': 'ai_act',
        'question': "What are the main objectives of the AI Act concerning the development and use of AI in the European Union?",
        'answer': "The AI Act aims to ensure that AI systems placed on the market and used in the Union are safe, respect existing law on fundamental rights and Union values, and do not undermine fundamental rights. The Act aims to establish a legal framework that addresses the risks posed by AI, in particular high-risk AI systems, and aims to enhance transparency, accountability, and trust in AI while promoting innovation and competitiveness."
    },
    
    # Question 1 from DMA
    {
        'law': 'dma',
        'question': "What criteria are used to define a 'gatekeeper' under the Digital Markets Act?",
        'answer': "A gatekeeper under the DMA is defined as a provider of core platform services that has a significant impact on the internal market, serves as an important gateway for business users to reach end users, and enjoys an entrenched and durable position in the market. The criteria include having a strong economic position, a large number of users, and control over an ecosystem that is difficult for other companies to contest."
    },
    # Question 1 from DSA
    {
        'law': 'dsa',
        'question': "What are the main responsibilities of online platforms under the Digital Services Act?",
        'answer': "Under the DSA, online platforms are responsible for taking effective measures to mitigate risks related to illegal content, ensure the safety of users, and protect fundamental rights. Platforms must implement mechanisms for reporting and removing illegal content, provide users with clear terms and conditions, and establish processes for handling complaints and appeals. Platforms that reach a significant number of users are also required to assess and mitigate systemic risks, such as the spread of disinformation and harmful content."
    },
    # Question 2 from GDPR
    {
        'law': 'gdpr',
        'question': "How does GDPR aim to balance the right to the protection of personal data with other fundamental rights?",
        'answer': "This Regulation respects all fundamental rights and observes the freedoms and principles recognized in the Charter as enshrined in the Treaties, in particular the respect for private and family life, home and communications, the protection of personal data, freedom of thought, conscience and religion, freedom of expression and information, freedom to conduct a business, the right to an effective remedy and to a fair trial, and cultural, religious and linguistic diversity. The right to the protection of personal data must be considered in relation to its function in society and be balanced against other fundamental rights, in accordance with the principle of proportionality."
    },
    # Question 2 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act propose to regulate high-risk AI systems?",
        'answer': "The AI Act classifies AI systems based on the risk they pose and subjects high-risk AI systems to strict requirements. High-risk AI systems include those used in critical infrastructure, education, employment, essential public and private services, law enforcement, and migration, asylum, and border control management. These systems must comply with requirements related to risk management, data governance, technical documentation, record-keeping, transparency, provision of information to users, human oversight, accuracy, and robustness. Providers of these systems must establish a quality management system and ensure continuous monitoring and post-market surveillance."
    },
    # Question 2 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA propose to regulate the behavior of gatekeepers in digital markets?",
        'answer': "The DMA imposes specific obligations on gatekeepers to prevent them from engaging in unfair practices that harm competition and consumers. This includes prohibiting gatekeepers from favoring their own services over those of competitors (self-preferencing), requiring them to allow interoperability with third-party services, and ensuring that they do not unfairly limit access to their platforms. Gatekeepers are also required to provide data portability, offer fair terms to business users, and ensure transparency in their operations."
    },
    # Question 2 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA aim to protect users from illegal content on digital platforms?",
        'answer': "The DSA aims to protect users from illegal content by requiring platforms to implement notice-and-action mechanisms, allowing users to report illegal content easily. Platforms must act expeditiously to remove or disable access to illegal content upon receiving a notice. The DSA also introduces obligations for platforms to cooperate with law enforcement and provide transparency reports on their content moderation activities. Platforms must take proactive measures to prevent the spread of illegal content and ensure that their algorithms do not promote harmful or illegal content."
    },
    # Question 3 from GDPR
    {
        'law': 'gdpr',
        'question': "What challenges have arisen due to technological developments and globalization in the context of personal data protection?",
        'answer': "Technological developments and globalization have brought new challenges for the protection of personal data. The scale of the collection and sharing of personal data has increased significantly. Technology allows both private companies and public authorities to make use of personal data on an unprecedented scale in order to pursue their activities. Natural persons increasingly make personal information available publicly and globally. Technology has transformed both the economy and social life, and should further facilitate the free flow of personal data within the Union and the transfer to third countries and international organizations, while ensuring a high level of the protection of personal data."
    },
    # Question 3 from AI Act
    {
        'law': 'ai_act',
        'question': "What responsibilities does the AI Act place on AI providers to ensure ethical AI practices?",
        'answer': "Providers of high-risk AI systems are responsible for ensuring that their systems comply with the requirements set out in the Act. This includes the obligation to conduct a conformity assessment before placing the system on the market, ensure the system undergoes proper testing, provide clear instructions and information to users, implement human oversight measures, and monitor the system throughout its lifecycle. Providers must also report serious incidents and malfunctions to the authorities."
    },
    # Question 3 from DMA
    {
        'law': 'dma',
        'question': "What are the key obligations imposed on gatekeepers by the DMA?",
        'answer': "The key obligations for gatekeepers under the DMA include prohibitions on combining personal data from different sources without user consent, restrictions on pre-installing software or apps, and requirements to allow business users access to data generated on their platform. Gatekeepers must also ensure that their platforms are open and interoperable with third-party services, and they are prohibited from using non-public data from their business users to compete against them."
    },
    # Question 3 from DSA
    {
        'law': 'dsa',
        'question': "What transparency requirements are imposed on online platforms by the DSA?",
        'answer': "The DSA imposes extensive transparency requirements on online platforms, including the obligation to publish transparency reports detailing the number of content removal actions, the reasons for these actions, and the outcomes of user appeals. Platforms must also disclose how their content moderation systems and recommendation algorithms work, including the criteria used to rank and display content. Users must be informed about the terms and conditions governing the use of the platform and any changes made to these terms. Additionally, platforms must provide clear information about the advertising they serve, including the identity of advertisers and the targeting criteria used."
    },
    # Question 4 from GDPR
    {
        'law': 'gdpr',
        'question': "How does the GDPR address the transfer of personal data to third countries or international organizations?",
        'answer': "The transfer of personal data to third countries or international organizations is allowed only where the conditions laid down in this Regulation are met, in order to ensure that the level of protection of natural persons guaranteed by this Regulation is not undermined. In any event, transfers to third countries and international organizations may only be carried out in full compliance with this Regulation. This Regulation is without prejudice to international agreements concluded between the Union and third countries regulating the transfer of personal data, including appropriate safeguards for the data subjects."
    },
    # Question 4 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act address transparency and accountability in AI systems?",
        'answer': "The AI Act mandates that AI systems, particularly high-risk ones, must be transparent and provide clear information about their purpose, capabilities, and limitations. Users should be able to understand how decisions are made by AI systems and what data is being processed. The Act requires that AI systems be designed with features that ensure accountability, including auditability, traceability of decisions, and the ability to provide explanations for decisions made by the AI."
    },
    # Question 4 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA aim to prevent unfair practices in the digital market?",
        'answer': "The DMA aims to prevent unfair practices by setting out clear rules for gatekeepers, including prohibitions on self-preferencing, restrictions on unfair terms and conditions for business users, and requirements for transparency in how they operate. The DMA also ensures that gatekeepers cannot use their dominant position to stifle competition or innovation by smaller firms. The European Commission is empowered to investigate and sanction gatekeepers that do not comply with these rules."
    },
    # Question 4 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA propose to handle the dissemination of harmful content?",
        'answer': "The DSA proposes to handle the dissemination of harmful content by requiring platforms to assess the risks associated with the dissemination of harmful or illegal content and to take appropriate measures to mitigate these risks. Platforms must implement safeguards to ensure that their algorithms do not promote harmful content, and they must provide users with tools to control the content they are exposed to. The DSA also encourages platforms to cooperate with trusted flaggers and fact-checkers to identify and address harmful content more effectively. In cases where platforms fail to mitigate risks adequately, they may be subject to regulatory action, including fines and other penalties."
    },
    # Question 5 from GDPR
    {
        'law': 'gdpr',
        'question': "What specific protections does GDPR offer to children regarding their personal data?",
        'answer': "Children merit specific protection with regard to their personal data, as they may be less aware of the risks, consequences, safeguards, and rights in relation to the processing of personal data. Such specific protection should, in particular, apply to the use of personal data of children for the purposes of marketing or creating personality or user profiles and the collection of personal data with regard to children when using services offered directly to a child. The consent of the holder of parental responsibility should not be necessary in the context of preventive or counselling services offered directly to a child."
    },
    # Question 5 from AI Act
    {
        'law': 'ai_act',
        'question': "What measures are suggested by the AI Act to protect fundamental rights in the deployment of AI technologies?",
        'answer': "The AI Act incorporates several measures to protect fundamental rights, such as requiring AI systems to be designed and used in a manner that is consistent with respect for human dignity, privacy, non-discrimination, and other fundamental rights. This includes embedding human oversight mechanisms, ensuring that AI systems do not lead to biased or discriminatory outcomes, and providing avenues for individuals to contest decisions made by AI systems that affect them significantly. The Act also promotes the development of codes of conduct and voluntary measures by providers to ensure that AI is used ethically and in alignment with societal values."
    },
    # Question 5 from DMA
    {
        'law': 'dma',
        'question': "What enforcement mechanisms are included in the DMA to ensure compliance by gatekeepers?",
        'answer': "The DMA includes robust enforcement mechanisms, such as the ability for the European Commission to impose fines of up to 10% of the gatekeeper’s total worldwide annual turnover for non-compliance. In cases of repeated infringements, the Commission can impose additional penalties, including structural remedies, such as the divestiture of businesses. The DMA also allows for periodic penalty payments to ensure that gatekeepers comply with the obligations and prohibitions set out in the regulation."
    },
    # Question 5 from DSA
    {
        'law': 'dsa',
        'question': "What measures does the DSA include to protect freedom of expression while combating illegal content?",
        'answer': "The DSA includes measures to protect freedom of expression by ensuring that any restrictions on content are necessary, proportionate, and legally justified. Platforms must provide users with clear explanations when content is removed or access is restricted, and users must have the right to appeal such decisions. The DSA also requires platforms to ensure that content moderation processes are fair and transparent, with safeguards in place to prevent the arbitrary removal of content. In addition, the DSA encourages platforms to develop codes of conduct in collaboration with stakeholders to balance the need to combat illegal content with the protection of free speech."
    },
    # Question 6 from GDPR
    {
        'law': 'gdpr',
        'question': "How does the GDPR define personal data, and what are some examples?",
        'answer': "Personal data under the GDPR is defined as any information relating to an identified or identifiable natural person (‘data subject’). Examples include a person’s name, identification number, location data, online identifier, or one or more factors specific to the physical, physiological, genetic, mental, economic, cultural, or social identity of that natural person. The definition is broad, capturing various forms of data that could be used to directly or indirectly identify an individual."
    },
    # Question 6 from AI Act
    {
        'law': 'ai_act',
        'question': "What categories of AI systems are considered high-risk under the AI Act?",
        'answer': "High-risk AI systems under the AI Act include those used in critical infrastructure (such as transport, energy, and water supply), educational and vocational training, employment and worker management, access to essential private and public services (such as credit scoring and social benefits), law enforcement (such as predictive policing), migration, asylum, and border control management, and administration of justice and democratic processes. These systems are subject to stringent requirements due to the significant risks they pose to fundamental rights and safety."
    },
    # Question 6 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA address the issue of self-preferencing by gatekeepers?",
        'answer': "The DMA specifically prohibits gatekeepers from engaging in self-preferencing practices, where they favor their own products or services over those of competitors on their platforms. This includes practices such as ranking their own products higher in search results or giving preferential access to data. The aim is to ensure a level playing field in digital markets, where competition is based on merit rather than the market power of the gatekeeper. The prohibition on self-preferencing is one of the key obligations imposed on gatekeepers to prevent anti-competitive behavior."
    },
    # Question 6 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA address the issue of content moderation on online platforms?",
        'answer': "The DSA requires online platforms to implement content moderation policies that are transparent, consistent, and aligned with fundamental rights. Platforms must establish clear terms and conditions for content moderation and provide users with detailed information on how content is assessed, removed, or restricted. The DSA also mandates that platforms implement mechanisms for users to appeal content moderation decisions, ensuring that users have the opportunity to contest unjustified removals or restrictions. These measures aim to create a fair and accountable content moderation system that respects freedom of expression while combating illegal content."
    },
    # Question 7 from GDPR
    {
        'law': 'gdpr',
        'question': "What is the legal basis for processing personal data under the GDPR?",
        'answer': "The GDPR outlines several legal bases for processing personal data, including: the data subject has given consent to the processing; processing is necessary for the performance of a contract to which the data subject is a party; processing is necessary for compliance with a legal obligation; processing is necessary to protect the vital interests of the data subject or another natural person; processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority; and processing is necessary for the purposes of the legitimate interests pursued by the controller or a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject."
    },
    # Question 7 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act define 'AI system' and what technologies fall under this definition?",
        'answer': "The AI Act defines an 'AI system' as software that is developed with one or more of the techniques and approaches listed in the Act, such as machine learning, logic- and knowledge-based approaches, and statistical approaches. These systems can, for a given set of human-defined objectives, generate outputs such as content, predictions, recommendations, or decisions influencing the environments they interact with. The definition is broad and includes a variety of AI technologies, from simple algorithms to complex machine learning models."
    },
    # Question 7 from DMA
    {
        'law': 'dma',
        'question': "What are the criteria for identifying core platform services under the DMA?",
        'answer': "Core platform services under the DMA include a range of digital services that serve as important gateways for business users to reach end users. These services include online intermediation services, such as app stores and marketplaces, online search engines, social networking services, video-sharing platform services, number-independent interpersonal communication services, operating systems, cloud computing services, and advertising services. A service is considered a core platform service if it has a significant impact on the internal market and is an essential gateway for business users to access end users."
    },
    # Question 7 from DSA
    {
        'law': 'dsa',
        'question': "What obligations do very large online platforms (VLOPs) have under the DSA?",
        'answer': "VLOPs, defined as platforms with more than 45 million users in the EU, have additional obligations under the DSA due to their significant impact on society and public discourse. VLOPs must conduct annual risk assessments to identify and mitigate systemic risks, such as the dissemination of illegal content, disinformation, and harmful content. They are also required to provide greater transparency in their content recommendation algorithms, offer users more control over the content they see, and cooperate with authorities to prevent and address systemic risks. These obligations are intended to ensure that VLOPs operate in a manner that is safe, transparent, and respectful of fundamental rights."
    },
    # Question 8 from GDPR
    {
        'law': 'gdpr',
        'question': "What are the rights of data subjects under the GDPR?",
        'answer': "The GDPR grants data subjects several rights, including the right to be informed, the right of access, the right to rectification, the right to erasure (‘right to be forgotten’), the right to restrict processing, the right to data portability, the right to object to processing, and rights in relation to automated decision-making and profiling. These rights empower individuals to have control over their personal data and ensure transparency and accountability in data processing."
    },
    # Question 8 from AI Act
    {
        'law': 'ai_act',
        'question': "What obligations do users of high-risk AI systems have under the AI Act?",
        'answer': "Users of high-risk AI systems are required to operate the systems in accordance with the instructions provided by the AI system provider, monitor the operation of the AI system, and promptly report any serious incidents or malfunctions to the provider and the competent authorities. Users must also keep logs generated by the AI system, ensure that human oversight is maintained, and ensure that the AI system is used only for its intended purpose. Additionally, users are responsible for implementing measures to mitigate risks to fundamental rights and safety."
    },
    # Question 8 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA promote interoperability between digital services?",
        'answer': "The DMA promotes interoperability by requiring gatekeepers to ensure that their core platform services can interact with third-party services. This includes making available the necessary technical interfaces and documentation to allow for interoperability. The goal is to prevent gatekeepers from locking in users and business users to their platforms and to enable competition by allowing new entrants and smaller competitors to offer complementary or competing services. Interoperability is seen as a key measure to promote innovation and consumer choice in digital markets."
    },
    # Question 8 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA enhance the protection of minors online?",
        'answer': "The DSA includes specific provisions to enhance the protection of minors online, recognizing that children are particularly vulnerable to harmful content and practices. Platforms must implement measures to ensure that their services are safe for minors, including age-appropriate content moderation, parental controls, and restrictions on targeted advertising to minors. The DSA also requires platforms to provide clear and accessible information to minors and their parents about the risks associated with online activities and how to protect themselves. These measures are designed to create a safer online environment for children and to empower them and their guardians to make informed decisions."
    },
    # Question 9 from GDPR
    {
        'law': 'gdpr',
        'question': "How does the GDPR address data protection by design and by default?",
        'answer': "The GDPR requires data controllers to implement data protection by design and by default. This means that data protection measures must be integrated into the processing activities from the outset and that only personal data necessary for each specific purpose of the processing is processed. The controller must take appropriate technical and organizational measures, such as pseudonymization, to ensure that, by default, personal data is not made accessible to an indefinite number of people without the individual's consent."
    },
    # Question 9 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act address the use of biometric identification systems?",
        'answer': "The AI Act imposes strict regulations on the use of biometric identification systems, particularly those used in public spaces for law enforcement purposes. The use of real-time remote biometric identification systems in publicly accessible spaces is generally prohibited, with exceptions granted under specific conditions, such as preventing a terrorist attack, locating a missing child, or identifying a suspect of a serious crime. Even in these cases, the use must be authorized by judicial or other independent authorities, and subject to strict safeguards to protect fundamental rights."
    },
    # Question 9 from DMA
    {
        'law': 'dma',
        'question': "What obligations does the DMA impose on gatekeepers regarding data access and portability?",
        'answer': "The DMA imposes obligations on gatekeepers to provide business users and end users with access to the data generated through their interactions on the platform. This includes providing data in a structured, commonly used, and machine-readable format to facilitate data portability. Gatekeepers are also required to allow business users to access data that is necessary for the development and improvement of their own products and services. These obligations are intended to prevent gatekeepers from using their control over data to stifle competition and innovation."
    },
    # Question 9 from DSA
    {
        'law': 'dsa',
        'question': "What are the transparency obligations for online platforms regarding their algorithms?",
        'answer': "The DSA imposes transparency obligations on online platforms to provide clear and accessible information about how their algorithms work, particularly those used for content moderation, recommendation, and ranking. Platforms must explain the criteria and logic behind their algorithms, allowing users to understand how decisions are made and how content is presented to them. VLOPs have additional obligations to conduct algorithmic audits and to allow independent researchers to assess the impact of their algorithms on society. These transparency measures are intended to increase accountability and trust in the digital ecosystem."
    },
    # Question 10 from GDPR
    {
        'law': 'gdpr',
        'question': "What is the role of the Data Protection Officer (DPO) under the GDPR?",
        'answer': "The Data Protection Officer (DPO) is responsible for overseeing data protection strategies and ensuring compliance with GDPR requirements. The DPO must be appointed by public authorities and bodies, and by organizations that engage in regular and systematic monitoring of data subjects on a large scale or process special categories of data on a large scale. The DPO’s responsibilities include advising the organization on GDPR obligations, monitoring compliance, providing training to staff, conducting audits, and serving as the contact point for supervisory authorities and data subjects."
    },
    # Question 10 from AI Act
    {
        'law': 'ai_act',
        'question': "What are the requirements for conformity assessments under the AI Act?",
        'answer': "High-risk AI systems must undergo a conformity assessment before they can be placed on the market or put into service. This assessment involves evaluating whether the AI system meets the requirements set out in the AI Act, including risk management, data governance, transparency, human oversight, and accuracy. The assessment can be conducted by the provider or by a notified body, depending on the nature of the AI system. The conformity assessment must be documented, and the AI system must bear a CE marking indicating compliance with the regulation."
    },
    # Question 10 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA address the issue of tying and bundling practices by gatekeepers?",
        'answer': "The DMA prohibits gatekeepers from engaging in tying and bundling practices that require users to purchase or use additional services as a condition for accessing the gatekeeper's core platform service. For example, a gatekeeper cannot require users to install or use a specific app or service as a precondition for using their platform. The prohibition on tying and bundling is intended to prevent gatekeepers from leveraging their market power to extend their dominance into other markets and to ensure that users have the freedom to choose the services they want to use."
    },
    # Question 10 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA address the issue of disinformation and fake news on digital platforms?",
        'answer': "The DSA requires platforms, particularly VLOPs, to take proactive measures to combat the spread of disinformation and fake news. This includes implementing mechanisms to detect, assess, and mitigate the risks associated with disinformation, collaborating with independent fact-checkers, and providing users with accurate information and context. Platforms must also ensure that their content moderation and recommendation systems do not amplify or promote disinformation. The DSA promotes transparency by requiring platforms to report on their efforts to combat disinformation and to provide users with tools to identify and report false information."
    },
    # Question 11 from GDPR
    {
        'law': 'gdpr',
        'question': "What are the implications of the GDPR for cross-border data processing activities?",
        'answer': "The GDPR establishes a framework for cross-border data processing activities to ensure that data protection is consistent across the EU. Organizations that process personal data across multiple EU member states must designate a lead supervisory authority, which acts as the single point of contact for overseeing compliance. The GDPR also facilitates cooperation between supervisory authorities through mechanisms such as the consistency mechanism and the European Data Protection Board (EDPB)."
    },
    # Question 11 from AI Act
    {
        'law': 'ai_act',
        'question': "What role do national supervisory authorities play under the AI Act?",
        'answer': "National supervisory authorities are responsible for overseeing the implementation and enforcement of the AI Act within their respective jurisdictions. They are tasked with monitoring the compliance of AI systems with the Act's requirements, conducting inspections and investigations, and taking enforcement actions where necessary. These authorities also play a key role in coordinating with other national authorities and the European Commission to ensure a harmonized approach to AI regulation across the EU."
    },
    # Question 11 from DMA
    {
        'law': 'dma',
        'question': "What are the consequences for gatekeepers that fail to comply with the DMA?",
        'answer': "Gatekeepers that fail to comply with the obligations and prohibitions set out in the DMA face significant consequences, including fines of up to 10% of their total worldwide annual turnover. In cases of repeated non-compliance, the European Commission can impose additional measures, such as structural remedies, including the divestiture of parts of the business. The DMA also provides for periodic penalty payments to ensure that gatekeepers comply with the obligations on an ongoing basis. The enforcement of the DMA is designed to be robust to prevent gatekeepers from engaging in anti-competitive behavior."
    },
    # Question 11 from DSA
    {
        'law': 'dsa',
        'question': "What role do trusted flaggers play under the DSA?",
        'answer': "The DSA recognizes the role of trusted flaggers—entities with expertise in identifying illegal content—as important partners in content moderation. Trusted flaggers are granted priority in the notice-and-action mechanisms, meaning that their reports are processed more quickly and with higher accuracy. Platforms must ensure that trusted flaggers' reports are handled by experienced moderators and that they receive feedback on the actions taken. The designation of trusted flaggers is intended to improve the efficiency and effectiveness of content moderation, particularly in combating illegal content and harmful activities online."
    },
    # Question 12 from GDPR
    {
        'law': 'gdpr',
        'question': "How does the GDPR handle data breaches, and what are the obligations of data controllers in such cases?",
        'answer': "Under the GDPR, data controllers are required to report data breaches to the relevant supervisory authority within 72 hours of becoming aware of the breach, unless the breach is unlikely to result in a risk to the rights and freedoms of individuals. If the breach poses a high risk to the affected individuals, the data controller must also inform the data subjects without undue delay. The GDPR mandates that organizations implement appropriate technical and organizational measures to prevent data breaches and mitigate their impact."
    },
    # Question 12 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act encourage innovation while ensuring safety and compliance?",
        'answer': "The AI Act encourages innovation by providing regulatory sandboxes, which are controlled environments where AI developers can test their systems under the supervision of competent authorities without immediately facing the full regulatory requirements. These sandboxes allow for experimentation and development of innovative AI solutions while ensuring that safety, ethical, and legal standards are maintained. The Act also promotes the adoption of voluntary codes of conduct for non-high-risk AI systems, allowing providers to demonstrate their commitment to ethical AI practices."
    },
    # Question 12 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA enhance consumer protection in digital markets?",
        'answer': "The DMA enhances consumer protection by ensuring that gatekeepers do not engage in practices that harm consumers, such as self-preferencing, unfair terms and conditions, or limiting access to data. The DMA also promotes transparency in how gatekeepers operate, requiring them to provide clear and accessible information to consumers about their practices. Additionally, the DMA ensures that consumers have more choice and control over the digital services they use, by promoting interoperability and data portability. By fostering competition, the DMA aims to improve the quality and affordability of digital services for consumers."
    },
    # Question 12 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA promote the accountability of online platforms?",
        'answer': "The DSA promotes accountability by imposing rigorous reporting and transparency requirements on online platforms. Platforms must publish regular transparency reports detailing their content moderation activities, including the number of removal actions, reasons for removals, and outcomes of user appeals. VLOPs are also required to undergo independent audits of their content moderation and risk management practices. These audits are intended to assess the platform's compliance with the DSA and to identify areas for improvement. By promoting transparency and accountability, the DSA aims to build trust in the digital environment and ensure that platforms act responsibly."
    },
    # Question 13 from GDPR
    {
        'law': 'gdpr',
        'question': "What are the restrictions on processing special categories of personal data under the GDPR?",
        'answer': "The GDPR imposes stricter rules on processing special categories of personal data, such as data revealing racial or ethnic origin, political opinions, religious or philosophical beliefs, trade union membership, genetic data, biometric data, health data, and data concerning a person’s sex life or sexual orientation. Processing of such data is prohibited unless specific conditions are met, such as obtaining explicit consent from the data subject, fulfilling legal obligations in the field of employment and social security, or protecting the vital interests of the data subject."
    },
    # Question 13 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act address the transparency of AI systems?",
        'answer': "The AI Act mandates that AI systems, particularly high-risk ones, be designed and developed with transparency in mind. This includes providing clear and accessible information to users about the AI system’s purpose, capabilities, limitations, and how it functions. Users must be informed when they are interacting with an AI system, especially in cases where the AI is used to make decisions with significant impacts on individuals. The transparency requirements are aimed at ensuring that users and affected individuals understand how and why decisions are made by AI systems."
    },
    # Question 13 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA address the issue of access to business users' data by gatekeepers?",
        'answer': "The DMA imposes obligations on gatekeepers to provide business users with access to the data they generate through their interactions on the platform. This includes access to aggregated and anonymized data, as well as data that is essential for the development and improvement of the business user's products and services. The DMA also prohibits gatekeepers from using non-public data from business users to compete against them, ensuring that gatekeepers do not exploit their access to data to gain an unfair competitive advantage."
    },
    # Question 13 from DSA
    {
        'law': 'dsa',
        'question': "What are the penalties for non-compliance with the DSA?",
        'answer': "The DSA provides for substantial penalties for non-compliance, including fines of up to 6% of the platform's total worldwide annual turnover. In cases of repeated or severe non-compliance, the DSA allows for additional measures, such as temporary suspension of the platform's services or other corrective actions. The enforcement of the DSA is overseen by national regulatory authorities, which have the power to investigate and sanction platforms that violate the regulation. These penalties are designed to ensure that platforms take their obligations seriously and that the DSA's provisions are effectively implemented."
    },
    # Question 14 from GDPR
    {
        'law': 'gdpr',
        'question': "How does the GDPR regulate automated decision-making and profiling?",
        'answer': "The GDPR places restrictions on automated decision-making, including profiling, where decisions are made solely based on automated processing and significantly affect individuals. Such processing is permitted only in specific situations, such as when it is necessary for entering into or performing a contract, authorized by Union or Member State law, or based on the data subject’s explicit consent. Organizations must ensure that individuals are informed about the existence of automated decision-making, the logic involved, and the potential consequences. Data subjects have the right to contest automated decisions and seek human intervention."
    },
    # Question 14 from AI Act
    {
        'law': 'ai_act',
        'question': "What are the obligations related to data quality under the AI Act?",
        'answer': "The AI Act requires that high-risk AI systems be trained, tested, and validated using high-quality datasets that are relevant, representative, free of errors, and complete. The data must be carefully selected to avoid biases that could lead to discriminatory outcomes. Providers must ensure that the data governance framework includes measures to assess and mitigate risks related to data quality, such as using diverse and representative datasets, validating the accuracy and reliability of data, and regularly updating datasets to reflect changes over time."
    },
    # Question 14 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA ensure fair and non-discriminatory access to core platform services?",
        'answer': "The DMA requires gatekeepers to ensure that their core platform services are offered on fair, reasonable, and non-discriminatory terms. This means that gatekeepers cannot impose unfair terms or conditions on business users or engage in practices that favor their own services over those of competitors. The DMA also requires gatekeepers to provide transparency in how they operate, including clear and accessible information about the terms and conditions for using their services. These measures are intended to prevent gatekeepers from abusing their market power and to ensure a level playing field in digital markets."
    },
    # Question 14 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA address the issue of illegal goods, services, and content online?",
        'answer': "The DSA requires platforms to implement measures to detect and remove illegal goods, services, and content from their services. This includes ensuring that sellers and service providers on their platforms are properly identified and that they comply with applicable laws and regulations. Platforms must also provide users with clear mechanisms to report illegal goods and services, and they must act expeditiously to remove or disable access to such content. The DSA's provisions are designed to protect consumers and ensure that online marketplaces operate in a safe and lawful manner."
    },
    # Question 15 from GDPR
    {
        'law': 'gdpr',
        'question': "What penalties and enforcement actions are provided for under the GDPR?",
        'answer': "The GDPR provides for substantial penalties and enforcement actions to ensure compliance. Supervisory authorities have the power to impose administrative fines of up to 20 million euros or 4% of the total worldwide annual turnover of the preceding financial year, whichever is higher, for the most serious violations. Penalties are determined based on factors such as the nature, gravity, and duration of the infringement, the intentional or negligent character of the infringement, and the measures taken by the organization to mitigate the damage."
    },
    # Question 15 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act regulate the use of AI in law enforcement and public safety?",
        'answer': "The AI Act imposes strict regulations on the use of AI systems in law enforcement and public safety, particularly those used for predictive policing, biometric identification, and surveillance. These systems are considered high-risk and are subject to rigorous scrutiny to ensure that they do not infringe on fundamental rights, such as privacy and non-discrimination. Law enforcement agencies must conduct a detailed risk assessment and implement safeguards to ensure that the use of AI systems is necessary, proportionate, and respectful of human rights."
    },
    # Question 15 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA promote innovation and competition in digital markets?",
        'answer': "The DMA promotes innovation and competition by preventing gatekeepers from engaging in practices that stifle competition, such as self-preferencing, tying, and bundling. By ensuring that gatekeepers operate on fair, reasonable, and non-discriminatory terms, the DMA creates opportunities for new entrants and smaller competitors to compete on a level playing field. The DMA also promotes interoperability and data portability, enabling businesses to develop innovative services that can interact with the gatekeeper's platform. These measures are designed to foster a dynamic and competitive digital market that benefits consumers and businesses alike."
    },
    # Question 15 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA support the rights of consumers in the digital marketplace?",
        'answer': "The DSA strengthens consumer rights by ensuring that online platforms provide clear and accessible information about the goods, services, and content available on their platforms. This includes requiring platforms to disclose information about the identity of sellers, the terms and conditions of transactions, and the nature of the goods and services offered. Consumers must also be informed about their rights, including the right to withdraw from a transaction, the right to a refund, and the right to access effective dispute resolution mechanisms. The DSA's consumer protection provisions are designed to create a safe and transparent digital marketplace."
    },
    # Question 16 from GDPR
    {
        'law': 'gdpr',
        'question': "What is the role of the European Data Protection Board (EDPB) under the GDPR?",
        'answer': "The European Data Protection Board (EDPB) is an independent body established by the GDPR to ensure the consistent application of data protection rules across the EU. The EDPB is composed of representatives of the national data protection authorities and the European Data Protection Supervisor (EDPS). Its responsibilities include issuing guidelines, recommendations, and best practices on the interpretation and application of the GDPR, resolving disputes between supervisory authorities, and advising the European Commission on data protection matters."
    },
    # Question 16 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act address the issue of bias and discrimination in AI systems?",
        'answer': "The AI Act mandates that AI systems, particularly high-risk ones, be designed and developed in a manner that prevents, identifies, and mitigates biases that could lead to discriminatory outcomes. Providers must take measures to ensure that AI systems do not produce results that unfairly disadvantage individuals or groups based on protected characteristics such as race, gender, or religion. This includes using diverse datasets, conducting bias audits, and implementing corrective measures to address any identified biases. The Act also emphasizes the importance of human oversight in preventing and addressing bias."
    },
    # Question 16 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA address the issue of mergers and acquisitions by gatekeepers?",
        'answer': "The DMA requires gatekeepers to inform the European Commission of any intended mergers, acquisitions, or concentrations involving other providers of core platform services or digital services. This notification requirement allows the Commission to assess whether the proposed transaction would undermine the objectives of the DMA, such as by reinforcing the gatekeeper's market power or reducing competition in digital markets. The DMA's provisions on mergers and acquisitions are intended to prevent gatekeepers from consolidating their dominance through strategic acquisitions and to ensure that competition remains robust in digital markets."
    },
    # Question 16 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA handle the issue of online harassment and abuse?",
        'answer': "The DSA requires platforms to implement measures to combat online harassment and abuse, including providing users with tools to report and block abusive content and behavior. Platforms must act swiftly to remove or disable access to content that constitutes harassment or abuse, and they must provide support to victims. The DSA also encourages platforms to collaborate with law enforcement and civil society organizations to address online harassment and to develop best practices for creating a safe online environment. These measures are intended to protect users from harm and to promote a respectful and inclusive digital space."
    },
    # Question 17 from GDPR
    {
        'law': 'gdpr',
        'question': "How does the GDPR address the issue of consent in data processing?",
        'answer': "Under the GDPR, consent must be freely given, specific, informed, and unambiguous. Organizations must ensure that consent is obtained through a clear affirmative action, such as ticking a box on a website, and that it is distinguishable from other matters. The data subject must be informed of their right to withdraw consent at any time, and withdrawal must be as easy as giving consent. Additionally, for children below the age of 16, parental consent is required for processing their data."
    },
    # Question 17 from AI Act
    {
        'law': 'ai_act',
        'question': "What is the role of the European Artificial Intelligence Board (EAIB) under the AI Act?",
        'answer': "The European Artificial Intelligence Board (EAIB) is established under the AI Act to facilitate cooperation and coordination among national supervisory authorities and the European Commission. The EAIB is responsible for issuing guidelines, recommendations, and best practices on the implementation of the AI Act, providing advice to the European Commission on AI-related matters, and promoting the harmonized application of the Act across the EU. The EAIB also plays a role in resolving disputes between national authorities and ensuring consistency in the interpretation and enforcement of the AI Act."
    },
    # Question 17 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA address the issue of dark patterns and deceptive design practices by gatekeepers?",
        'answer': "The DMA prohibits gatekeepers from using dark patterns and deceptive design practices that manipulate or deceive users into making decisions that are not in their best interests. This includes practices such as hiding important information, making it difficult for users to exercise their rights, or nudging users toward certain choices. The DMA requires gatekeepers to provide clear and accessible information to users and to design their interfaces in a way that respects user autonomy and choice. These provisions are intended to protect consumers from manipulative practices and to ensure that digital services are transparent and user-friendly."
    },
    # Question 17 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA ensure that users have control over their data and privacy?",
        'answer': "The DSA enhances user control over data and privacy by requiring platforms to provide clear and accessible information about how user data is collected, processed, and used. Users must be informed about their rights to access, rectify, and delete their data, as well as their right to object to data processing. The DSA also requires platforms to implement privacy-by-design and privacy-by-default principles, ensuring that users' privacy is protected from the outset. Additionally, platforms must provide users with tools to manage their privacy settings and to control the use of their data for targeted advertising."
    },
    # Question 18 from GDPR
    {
        'law': 'gdpr',
        'question': "What is the GDPR’s approach to international data transfers?",
        'answer': "The GDPR allows international data transfers only if the third country, territory, or international organization ensures an adequate level of data protection, as determined by the European Commission. In the absence of an adequacy decision, transfers are permitted under appropriate safeguards, such as binding corporate rules or standard contractual clauses. In specific circumstances, derogations for specific situations, such as explicit consent of the data subject, may allow transfers. The GDPR aims to ensure that personal data transferred outside the EU is afforded the same level of protection as within the EU."
    },
    # Question 18 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act impact the use of AI in healthcare?",
        'answer': "The AI Act recognizes the potential benefits of AI in healthcare, such as improving diagnosis, treatment, and patient outcomes. However, it also acknowledges the risks associated with the use of AI in this sensitive sector. AI systems used in healthcare, particularly those that involve decision-making or provide recommendations to healthcare professionals, are classified as high-risk and are subject to strict requirements. These include ensuring the accuracy and reliability of AI systems, maintaining human oversight, and safeguarding patient data. The Act also emphasizes the importance of transparency and informed consent in the use of AI in healthcare."
    },
    # Question 18 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA promote transparency in digital advertising?",
        'answer': "The DMA promotes transparency in digital advertising by requiring gatekeepers to provide advertisers and publishers with access to data related to their advertising campaigns, including information on pricing, performance, and targeting criteria. Gatekeepers must also ensure that their advertising services are offered on fair, reasonable, and non-discriminatory terms, and they are prohibited from using non-public data to gain an unfair advantage in the advertising market. These provisions are intended to promote competition and transparency in digital advertising, ensuring that advertisers and publishers have the information they need to make informed decisions."
    },
    # Question 18 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA address the issue of algorithmic transparency and accountability?",
        'answer': "The DSA requires platforms, particularly VLOPs, to provide transparency about how their algorithms work, including the criteria used for content recommendation, ranking, and removal. Platforms must explain the logic behind their algorithms and provide users with options to control how algorithms affect their online experience. The DSA also mandates that platforms conduct regular audits of their algorithms to assess their impact on users and society. These audits must be conducted by independent third parties and must evaluate whether the algorithms are fair, non-discriminatory, and aligned with fundamental rights."
    },
    # Question 19 from GDPR
    {
        'law': 'gdpr',
        'question': "What rights do data subjects have in relation to automated decision-making under the GDPR?",
        'answer': "Under the GDPR, data subjects have the right not to be subject to a decision based solely on automated processing, including profiling, which produces legal effects or similarly significant effects concerning them. Exceptions include situations where automated decision-making is necessary for entering into or performing a contract, authorized by Union or Member State law, or based on explicit consent. In such cases, organizations must implement safeguards to protect the data subject's rights, such as the right to obtain human intervention, express their point of view, and contest the decision."
    },
    # Question 19 from AI Act
    {
        'law': 'ai_act',
        'question': "How does the AI Act address the issue of AI literacy and public awareness?",
        'answer': "The AI Act encourages initiatives to promote AI literacy and public awareness, recognizing that informed and educated citizens are essential for the responsible adoption of AI technologies. The Act calls for the development of educational programs and resources to help individuals understand the capabilities, limitations, and risks associated with AI. It also promotes public consultations and stakeholder engagement to ensure that the perspectives of various groups, including civil society, are considered in the development and deployment of AI systems."
    },
    # Question 19 from DMA
    {
        'law': 'dma',
        'question': "How does the DMA address the issue of access to core platform services by end users?",
        'answer': "The DMA ensures that end users have access to core platform services on fair and non-discriminatory terms. Gatekeepers are prohibited from restricting or degrading the quality of access to their services or from engaging in practices that limit user choice, such as forcing users to install certain apps or use specific services. The DMA also promotes data portability, allowing end users to transfer their data to other services and take advantage of competitive offerings. These provisions are designed to enhance user choice and control over the digital services they use."
    },
    # Question 19 from DSA
    {
        'law': 'dsa',
        'question': "What are the requirements for online platforms to cooperate with regulatory authorities under the DSA?",
        'answer': "The DSA requires online platforms to cooperate with regulatory authorities by providing them with access to data, records, and information necessary for monitoring and enforcement purposes. Platforms must respond promptly to requests from authorities and must facilitate inspections and investigations. The DSA also mandates that platforms provide transparency reports and undergo independent audits to demonstrate compliance with the regulation. Cooperation with authorities is essential for ensuring that platforms meet their obligations and that the DSA's provisions are effectively enforced."
    },
    # Question 20 from GDPR
    {
        'law': 'gdpr',
        'question': "What is the GDPR's stance on the appointment of a Data Protection Officer (DPO) and when is it mandatory?",
        'answer': "The GDPR mandates the appointment of a Data Protection Officer (DPO) in specific cases: when processing is carried out by a public authority or body, except for courts acting in their judicial capacity; when the core activities of the controller or processor consist of processing operations that require regular and systematic monitoring of data subjects on a large scale; or when the core activities consist of processing special categories of data on a large scale. The DPO must have expert knowledge of data protection law and practices and is responsible for advising the organization on GDPR compliance and monitoring its implementation."
    },
    # Question 20 from AI Act
    {
        'law': 'ai_act',
        'question': "What measures does the AI Act include to support the ethical development of AI?",
        'answer': "The AI Act supports the ethical development of AI by encouraging the adoption of voluntary codes of conduct, fostering research on ethical AI, and promoting the development of AI systems that align with European values and fundamental rights. The Act emphasizes the importance of human-centric AI, where AI systems are designed to enhance human capabilities and well-being while respecting human dignity and autonomy. It also supports the creation of regulatory sandboxes to allow developers to experiment with innovative AI solutions in a controlled environment, ensuring that ethical considerations are integrated into the design and deployment of AI technologies."
    },
    # Question 20 from DMA
    {
        'law': 'dma',
        'question': "What role does the European Commission play in enforcing the DMA?",
        'answer': "The European Commission is responsible for enforcing the DMA, including monitoring compliance, conducting investigations, and imposing penalties for non-compliance. The Commission has the authority to impose fines, periodic penalty payments, and structural remedies on gatekeepers that violate the DMA's obligations and prohibitions. The Commission also has the power to initiate market investigations to assess whether new services should be designated as core platform services or whether additional obligations should be imposed on gatekeepers. The enforcement of the DMA is designed to be robust and effective, ensuring that gatekeepers operate in a manner that promotes competition and innovation in digital markets."
    },
    # Question 20 from DSA
    {
        'law': 'dsa',
        'question': "How does the DSA promote the development of codes of conduct for online platforms?",
        'answer': "The DSA encourages the development of codes of conduct for online platforms to address specific issues such as content moderation, algorithmic transparency, and the protection of minors. These codes of conduct are developed in collaboration with industry stakeholders, civil society organizations, and regulatory authorities. The DSA promotes the adoption of these voluntary measures to ensure that platforms operate in a responsible and ethical manner. The codes of conduct provide a framework for best practices and help platforms to align their operations with the DSA's objectives, while also allowing for flexibility and innovation."
    },
]

laws_info = {
    'gdpr': {
        'file_path': '/kaggle/input/english-lawsss/english_gdpr.html',
        'collection_name': 'embeddings_gdpr',
        'questions_answers': [qa for qa in integrated_questions_answers if qa['law'] == 'gdpr']
    },
    'ai_act': {
        'file_path': '/kaggle/input/english-lawsss/english_AI_act.html',
        'collection_name': 'embeddings_ai_act',
        'questions_answers': [qa for qa in integrated_questions_answers if qa['law'] == 'ai_act']
    },
    'dma': {
        'file_path': '/kaggle/input/english-lawsss/english_dma.html',
        'collection_name': 'embeddings_dma',
        'questions_answers': [qa for qa in integrated_questions_answers if qa['law'] == 'dma']
    },
    'dsa': {
        'file_path': '/kaggle/input/english-lawsss/english_dsa.html',
        'collection_name': 'embeddings_dsa',
        'questions_answers': [qa for qa in integrated_questions_answers if qa['law'] == 'dsa']
    },
}

summarizer = pipeline("summarization", model="t5-base", device=0)
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')
sbert_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

def summarize_text_huggingface_with_retry(text, max_length=300, min_length=100, max_retries=3):
    for attempt in range(max_retries):
        try:
            summary = summarizer(retrieved_text, max_length=max_length, min_length=20, do_sample=False)
            return summary[0]['summary_text']
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)  
            else:
                return None

def query_elasticsearch(index_name, query_embedding, top_k=10):
    query_vector = query_embedding.tolist() if isinstance(query_embedding, np.ndarray) else query_embedding
    if isinstance(query_vector[0], list):
        query_vector = query_vector[0]  
    search_query = {
        "knn": {
            "field": "embedding",
            "query_vector": query_vector,
            "k": top_k,
            "num_candidates": 100
        }
    }

    try:
        response = es.search(index=index_name, body={"size": top_k, "query": search_query})
        return [
            {"text": hit["_source"]["chunk"], "id": hit["_id"]}
            for hit in response['hits']['hits']
        ]
    except Exception as e:
        print(f"Error querying Elasticsearch index '{index_name}': {e}")
        raise

def embed_and_query_all_laws_es(laws_info, es, tokenizer, model, semantic_model, summarizer, top_k=1):
    similarities = {law: {'cosine': [], 'semantic': []} for law in laws_info}

    for law, info in laws_info.items():
        print(f"\nQuerying {law.upper()} collection:")
        for qa in info['questions_answers']:
            query = qa['question']
            reference_answer = qa['answer']
            query_embedding = generate_bert_embedding(query, tokenizer, model)
            print(info['collection_name'])
            results = query_elasticsearch(info['collection_name'], query_embedding, top_k)

            if results:
                retrieved_text = results[0]['text']
                chunk_id = results[0]['id']
                summary = summarize_text_huggingface_with_retry(retrieved_text)
                qa['summary'] = summary
                retrieved_embedding = generate_bert_embedding(summary, tokenizer, model) if summary else None
                reference_embedding = generate_bert_embedding(reference_answer, tokenizer, model)
                if retrieved_embedding is not None:
                    cosine_sim = calculate_cosine_similarity(reference_embedding, retrieved_embedding)
                else:
                    cosine_sim = None
                semantic_sim = calculate_semantic_similarity(reference_answer, retrieved_text, semantic_model)

                similarities[law]['cosine'].append(cosine_sim)
                similarities[law]['semantic'].append(semantic_sim)
                print(f"Query: {query}")
                print(f"Retrieved chunk from {law.upper()}:")
                print(f"Retrieved text: {retrieved_text}")
                print(f"Summary for {law.upper()} - Question: {query}:\n{summary}\n")
                print(f"Reference answer: {reference_answer}")
                print(f"Cosine Similarity: {cosine_sim:.4f}" if cosine_sim else "Cosine Similarity: N/A")
                print(f"Semantic Similarity: {semantic_sim:.4f}")
                print("----\n")
            else:
                print(f"No valid results found for query: {query} in {law.upper()}.")

    print("\nCalculated Averages:")
    for law in similarities:
        cosine_values = [val for val in similarities[law]['cosine'] if val is not None]
        semantic_values = similarities[law]['semantic']

        avg_cosine = sum(cosine_values) / len(cosine_values) if cosine_values else None
        avg_semantic = sum(semantic_values) / len(semantic_values) if semantic_values else None

        if avg_cosine is not None:
            print(f"{law.upper()} Average Cosine Similarity: {avg_cosine:.4f}")
        else:
            print(f"{law.upper()} Average Cosine Similarity: N/A")

        if avg_semantic is not None:
            print(f"{law.upper()} Average Semantic Similarity: {avg_semantic:.4f}")
        else:
            print(f"{law.upper()} Average Semantic Similarity: N/A")

embed_and_query_all_laws_es(laws_info, es, tokenizer, model, semantic_model, summarizer, top_k=1)

# Load the other 80 questions, to retrieve the most relevant chunks, retrieve, making sums and compare them with the answers

In [None]:
import pandas as pd
data = pd.read_csv("/kaggle/input/english-dataset/gdpr_test_data (1) (1).csv")
data.head()

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModel
import numpy as np
import torch
from sklearn.metrics.pairwise import cosine_similarity

summarizer = pipeline("summarization", model="t5-base", device=0)

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')

sbert_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

def load_questions_answers_from_csv(file_path):
    df = pd.read_csv(file_path)
    qa_pairs = [{'question': row['Question'], 'answer': row['Answer']} for _, row in df.iterrows()]
    return qa_pairs

gdpr_csv_path = '/kaggle/input/english-dataset/gdpr_test_data (1) (1).csv'
gdpr_qa_pairs = load_questions_answers_from_csv(gdpr_csv_path)

ai_act_csv_path = '/kaggle/input/english-dataset/ai_test_data (1) (1).csv'
ai_act_qa_pairs = load_questions_answers_from_csv(ai_act_csv_path)

dma_csv_path = '/kaggle/input/english-dataset/digital_marketing_test_data (1) (1).csv'
dma_qa_pairs = load_questions_answers_from_csv(dma_csv_path)

dsa_csv_path = '/kaggle/input/english-dataset/digital_services_test_data (1) (1).csv'
dsa_qa_pairs = load_questions_answers_from_csv(dsa_csv_path)

laws_info = {
    'gdpr': {
        'file_path': '/kaggle/input/english-dataset/english_gdpr.html',
        'collection_name': 'embeddings_gdpr',
        'questions_answers': gdpr_qa_pairs
    },
    'ai_act': {
        'file_path': '/kaggle/input/english-dataset/english_AI_act.html',
        'collection_name': 'embeddings_ai_act',
        'questions_answers': ai_act_qa_pairs
    },
    'dma': {
        'file_path': '/kaggle/input/english-dataset/english_dma.html',
        'collection_name': 'embeddings_dma',
        'questions_answers': dma_qa_pairs
    },
    'dsa': {
        'file_path': '/kaggle/input/english-dataset/english_dsa.html',
        'collection_name': 'embeddings_dsa',
        'questions_answers': dsa_qa_pairs
    }
}

def query_elasticsearch(index_name, query_embedding, top_k=10):
    if isinstance(query_embedding, list) and len(query_embedding) == 1 and isinstance(query_embedding[0], list):
        query_embedding = query_embedding[0] 

    search_query = {
        "knn": {
            "field": "embedding",  
            "query_vector": query_embedding,
            "k": top_k,
            "num_candidates": 100
        }
    }
    response = es.search(index=index_name, body={"size": top_k, "query": search_query})
    return [hit["_source"]["chunk"] for hit in response["hits"]["hits"]]

def generate_bert_embedding(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()  
    return embedding.tolist() 

def calculate_cosine_similarity(embedding1, embedding2):
    embedding1 = np.array(embedding1) 
    embedding2 = np.array(embedding2) 
    if embedding1.ndim == 1:
        embedding1 = embedding1.reshape(1, -1)
    if embedding2.ndim == 1:
        embedding2 = embedding2.reshape(1, -1)

    return cosine_similarity(embedding1, embedding2)[0][0]

def calculate_semantic_similarity(reference_text, summary_text, model):
    embeddings = model.encode([reference_text, summary_text])
    return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

def summarize_text(text, max_length=350, min_length=100):
    try:
        summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        print(f"Summarization failed: {e}")
        return None

def embed_summarize_and_compare_all_laws(laws_info, top_k=1):
    similarities = {law: {'cosine': [], 'semantic': []} for law in laws_info}

    for law, info in laws_info.items():
        print(f"\nProcessing {law.upper()} collection:")

        for qa in info['questions_answers']:
            query = qa['question']
            reference_answer = qa['answer']

            query_embedding = generate_bert_embedding(query, tokenizer, model)

            results = query_elasticsearch(info['collection_name'], query_embedding, top_k)

            if results:
                retrieved_text = results[0] 

                summary = summarize_text(retrieved_text)
                if summary:
                    print(f"Summary for {law.upper()} - Question: {query}:\n{summary}\n")

                    reference_embedding = generate_bert_embedding(reference_answer, tokenizer, model)
                    summary_embedding = generate_bert_embedding(summary, tokenizer, model)

                    cosine_sim = calculate_cosine_similarity(reference_embedding, summary_embedding)
                    semantic_sim = calculate_semantic_similarity(reference_answer, summary, sbert_model)

                    similarities[law]['cosine'].append(cosine_sim)
                    similarities[law]['semantic'].append(semantic_sim)  
                    print(f"Retrieved text: {retrieved_text}")
                    print(f"Reference answer: {reference_answer}")
                    print(f"Cosine Similarity: {cosine_sim:.4f}")
                    print(f"Semantic Similarity: {semantic_sim:.4f}")
                    print("----\n")
                else:
                    print(f"Failed to summarize the retrieved chunk for {query} in {law.upper()}\n")
            else:
                print(f"No valid results found for query: {query} in {law.upper()}")

    return similarities

def calculate_and_print_averages(similarities):
    print("\nCalculated Averages:")
    for law, similarity_data in similarities.items():
        if similarity_data['cosine']:
            avg_cosine = sum(similarity_data['cosine']) / len(similarity_data['cosine'])
            print(f"{law.upper()} Average Cosine Similarity: {avg_cosine:.4f}")
        else:
            print(f"No valid cosine similarities found for {law.upper()}")

        if similarity_data['semantic']:
            avg_semantic = sum(similarity_data['semantic']) / len(similarity_data['semantic'])
            print(f"{law.upper()} Average Semantic Similarity: {avg_semantic:.4f}")
        else:
            print(f"No valid semantic similarities found for {law.upper()}")
similarities = embed_summarize_and_compare_all_laws(laws_info, top_k=1)
calculate_and_print_averages(similarities)