<a href="https://colab.research.google.com/github/MoritzLaurer/rag-demo/blob/master/rag_haystack_ai_law.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install packages

In [None]:
%%bash
pip install --upgrade pip
pip install farm-haystack[colab,inference]~=1.23.0
# for reading pdfs
wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin

## Prepare example data

#### Download PDF data

In [5]:
## download PDF data
import os
import zipfile
import requests
from io import BytesIO

# URL of the zip file in your GitHub repo (make sure it's the raw file URL)
zip_url = 'https://github.com/MoritzLaurer/rag-demo/blob/master/data/position-papers-pdfs.zip?raw=true'

# Download the zip file
print("Downloading zip file...")
response = requests.get(zip_url)
zip_content = BytesIO(response.content)

# Define the extraction path
extract_path = '/content/data'

# Create directory if it doesn't exist
if not os.path.exists(extract_path):
    os.makedirs(extract_path)

# Extract the zip file
print("Extracting zip file...")
with zipfile.ZipFile(zip_content, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extraction completed.")


Downloading zip file...
Extracting zip file...
Extraction completed.


#### Download and prepare meta data

In [7]:
## prepare meta data
import pandas as pd
import numpy as np

# load meta data
df_metadata = pd.read_csv(
    "https://raw.githubusercontent.com/MoritzLaurer/rag-demo/master/data/position-papers-metadata.csv",
    sep=";", on_bad_lines='error', encoding="cp1252"
)

df_metadata = df_metadata[[
    'Organisation name', #'Transparency register number',
    'User type', 'Organisation size', 'Country', 'Scope',
    'Feedback date', 'Language', 'Reference',
    #'Publication privacy settings', 'First name', 'Surname',
    #'You can upload a document here:\n\n'
]]

df_metadata = df_metadata.rename(columns={
    'Reference': "document_reference", 'Feedback date': "document_date", 'Language': "language",
    'User type': "stakeholder_type", 'Scope': "stakeholder_scope",
    'Organisation name': "stakeholder_name",
    #'Transparency register number': "transparency_register_number",
    #'First name': "first_name", 'Surname': "surname",
    'Organisation size': "stakeholder_size", 'Country': "stakeholder_country",
    #'Publication privacy settings', 'You can upload a document here:\n\n'
})

# add column with exact pdf names corresponding to pdf reference
# not all respondents provided PDFs
def find_string_with_substring(substring, string_list):
    for string in string_list:
        if substring in string:
            return string
    return np.nan

doc_dir = "./data"
file_names = os.listdir(doc_dir)
pdf_name_col = [find_string_with_substring(ref, file_names) for ref in df_metadata["document_reference"]]

# note that not all respondents provided PDFs
# document_name is NaN if no PDF is available
df_metadata.loc[:, "document_name"] = pdf_name_col

df_metadata

Unnamed: 0,stakeholder_name,stakeholder_type,stakeholder_size,stakeholder_country,stakeholder_scope,document_date,language,document_reference,document_name
0,Governance of AI Research Group,Academic/Research Institution,Micro (< 10 employees),United States,,19-06-2020 23:58,English,F529892,F529892-Governance_of_AI_Research_Group_EU_Com...
1,European Technology Policy Committee (EUTPC) o...,Academic/Research Institution,Large (250 or more),United States,,19-06-2020 22:38,English,F529891,
2,EIT Health e.V.,Other,Medium (< 250 employees),Germany,,19-06-2020 21:54,English,F529890,F529890-EIT_Health_Consultative_Group_on_EC_Da...
3,on behalf of: Chairman of the National Broadca...,Public authority,Medium (< 250 employees),Poland,National,19-06-2020 17:58,Polish,F529889,F529889-feedback_Consultation_on_the_White_Pap...
4,,,,,,19-06-2020 17:17,English,F529888,F529888-DIGITAL_SME_Position_Paper_AI_White_Pa...
...,...,...,...,...,...,...,...,...,...
1211,,,,,,19-02-2020 16:09,English,F518570,
1212,CUBE ROBOT X by haleez.com,Business Association,Micro (< 10 employees),Germany,,19-02-2020 15:58,German,F518569,
1213,,,,,,19-02-2020 14:33,English,F518568,
1214,,,,,,19-02-2020 13:17,English,F518567,


## Create a search index with the downloaded data

In [8]:
from haystack.utils import convert_files_to_docs

# Convert all PDF files in folder to Haystack doc object
# https://docs.haystack.deepset.ai/reference/utils-api#convert_files_to_docs
dir_path = "./data"
docs = convert_files_to_docs(dir_path)

print("Example for PDF converted to doc object:\n")
print(docs[0])

Example for PDF converted to doc object:

<Document: id=c8d68f7d442a07b88be295981f52f229, content='Stellungnahme
Zur EU-Konsultation zum Weißbuch zur Künstlichen Intelligenz - ein
europäisches Konzep...'>


In [9]:
# add meta data to docs based on unique reference
for doc in docs:
    # the unique reference of each respondent are the first 7 characters of the PDF name
    # this reference can be used to merge the PDFs with meta data from the .csv
    if doc.meta["name"]:
        doc_reference = doc.meta["name"][:7]
        for col in df_metadata.columns:
            doc.meta[col] = df_metadata[df_metadata["document_reference"] == doc_reference][col].iloc[0]

print("Example for meta data added to document")
print(docs[0].meta)

Example for meta data added to document
{'name': 'F530333-Stellungnahme_KI_Weissbuch.pdf', 'stakeholder_name': 'Deutscher Naturschutzring', 'stakeholder_type': 'Environmental Organisation', 'stakeholder_size': 'Small (< 50 employees)', 'stakeholder_country': 'Germany', 'stakeholder_scope': nan, 'document_date': '14-06-2020 16:17', 'language': 'German', 'document_reference': 'F530333', 'document_name': 'F530333-Stellungnahme_KI_Weissbuch.pdf'}


In [10]:
from haystack import Pipeline
from haystack.nodes import PreProcessor, PDFToTextConverter
from haystack.document_stores import InMemoryDocumentStore
import os

# Initialize the document store
document_store = InMemoryDocumentStore(embedding_dim=384, use_bm25=True)

# preprocessor: https://docs.haystack.deepset.ai/docs/preprocessor
preprocessor = PreProcessor(
    clean_whitespace=True,
    clean_header_footer=True,
    clean_empty_lines=True,
    split_by="word",
    split_length=200,
    split_overlap=20,
    split_respect_sentence_boundary=True,
    add_page_number=True,
    max_chars_check=100_000
)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [11]:
# adding meta data to the text passages
from haystack.nodes import BaseComponent
from haystack.schema import Document
from typing import Any, Dict, List, Tuple, Union

class MetaData2TextAugmenter(BaseComponent):
    outgoing_edges = 1

    def __init__(self, meta_data_to_add: List[str]):
        super().__init__()
        self.meta_data_to_add = meta_data_to_add

    def run(self, documents: List[Document]) -> Tuple[Dict[str, Any], str]:
        for doc in documents:
            # Append specified meta_data to text content
            #if doc.meta:
            content_with_meta = "Passage meta data: "
            for meta_data_key in self.meta_data_to_add:
                if meta_data_key in doc.meta:
                    content_with_meta += f'{meta_data_key}: {doc.meta[meta_data_key]}, '
            doc.content = content_with_meta + "\n\nPassage: " + doc.content

        return {"documents": documents}, "output_1"

    def run_batch(self, documents: List[Document]) -> Tuple[Dict[str, Any], str]:
        return self.run(documents)


meta_data_to_add = [
    "stakeholder_name",  "stakeholder_type",  "stakeholder_scope",
    "stakeholder_size", "stakeholder_country",
    "document_date", "language"
    #"document_reference", "document_name",
]

meta2text_augmenter = MetaData2TextAugmenter(meta_data_to_add=meta_data_to_add)


In [12]:
# instantiate indexing pipeline
indexing_pipeline = Pipeline()

#indexing_pipeline.add_node(component=converter, name="TextConverter", inputs=["File"])
indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["File"])
indexing_pipeline.add_node(component=meta2text_augmenter, name="meta2text_augmenter", inputs=["PreProcessor"])
indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["meta2text_augmenter"])

# Run the pipeline
indexing_pipeline.run(documents=docs)


Preprocessing: 100%|██████████| 440/440 [00:15<00:00, 27.82docs/s]
Updating BM25 representation...: 100%|██████████| 12585/12585 [00:01<00:00, 11070.97 docs/s]


{'documents': [<Document: {'content': 'Passage meta data: stakeholder_name: Deutscher Naturschutzring, stakeholder_type: Environmental Organisation, stakeholder_scope: nan, stakeholder_size: Small (< 50 employees), stakeholder_country: Germany, document_date: 14-06-2020 16:17, language: German, \n\nPassage: Stellungnahme\nZur EU-Konsultation zum Weißbuch zur Künstlichen Intelligenz - ein\neuropäisches Konzept für Exzellenz und Vertrauen\nGemeinsame Stellungnahme vom Bund für Umwelt und Naturschutz Deutschland (BUND),\nNaturschutzbund Deutschland (NABU), Germanwatch sowie dem Umweltdachverband Deutscher\nNaturschutzring (DNR)\nGrundsätzliche Einschätzung:\nDer digitale Wandel und der sinnvolle Einsatz algorithmischer Entscheidungssysteme/ Künstlicher\nIntelligenz (KI) können einen substantiellen Beitrag zu einer nachhaltigen Entwicklung leisten. Die\nEinführung von KI in alle Bereiche unserer Gesellschaft birgt jedoch zugleich Risiken – soziale,\nökologische und wirtschaftliche. Auf bei

In [13]:
# inspect an example document
# https://docs.haystack.deepset.ai/reference/document-store-api#inmemorydocumentstore
print(document_store.get_document_count())
print(document_store.get_document_by_id(id="1d1f0b8b1a976b696d106b0aa4443049"))

12585
<Document: id=1d1f0b8b1a976b696d106b0aa4443049, content='Passage meta data: stakeholder_name: Governance of AI Research Group, stakeholder_type: Academic/Res...'>


## RAG pipeline with open source models

In [None]:
# https://haystack.deepset.ai/tutorials/22_pipeline_with_promptnode
from haystack.nodes import EmbeddingRetriever, BM25Retriever

# the texts are in multiple languages. we therefore use a multilingual embedding model
# the best place to find the latest embedding models is the MTEB leaderboard
# https://huggingface.co/spaces/mteb/leaderboard
retriever_sbert = EmbeddingRetriever(
    document_store=document_store, embedding_model="intfloat/multilingual-e5-small"
)

# we can optionally also add bm25 indexation
#retriever_bm25 = BM25Retriever(document_store=document_store)

document_store.update_embeddings(
    retriever=retriever_sbert, filters=None,
    update_existing_embeddings=True, batch_size=256
)

In [22]:
from haystack.nodes import SentenceTransformersRanker

ranker = SentenceTransformersRanker(model_name_or_path="corrius/cross-encoder-mmarco-mMiniLMv2-L12-H384-v1")  #"cross-encoder/ms-marco-MiniLM-L-12-v2"


config.json:   0%|          | 0.00/891 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/471M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [36]:
from haystack.nodes import PromptNode, PromptTemplate
from google.colab import userdata

lfqa_prompt = PromptTemplate(
    #name="lfqa",
    prompt="""Synthesize a comprehensive answer from the following text for the given question.
              \n\n Question: {query}
              \n\n Related text: {join(documents)} \n\n Question: {query} \n\n Answer:""",
)

prompt_node = PromptNode(
    model_name_or_path = "mistralai/Mistral-7B-Instruct-v0.1",
    api_key = userdata.get('hf_api_key'),
    default_prompt_template = lfqa_prompt  #"deepset/question-generation"
)
#prompt_node = PromptNode(model_name_or_path="google/flan-t5-base", default_prompt_template=lfqa_prompt)



In [37]:
from haystack import Pipeline

querying_generation_pipeline = Pipeline()
querying_generation_pipeline.add_node(component=retriever_sbert, name="Retriever", inputs=["Query"])
querying_generation_pipeline.add_node(component=ranker, name="Ranker", inputs=["Retriever"])
querying_generation_pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["Ranker"])


In [40]:
from pprint import pprint
from haystack.utils import print_answers

#query = "What are the most important aspects of AI regulation for civil society?"
#query = "What does business think about AI regulation?"
#query = "What provisions should not be included in AI regulation according to business stakeholders?"
query = "What does Microsoft think of the AI regulation?"

prediction = querying_generation_pipeline.run(
    query=query,
    params={
        "Retriever": {"top_k": 10},
        "Ranker": {"top_k": 2}
    }
)

pprint(prediction["query"])
pprint(prediction["documents"])
pprint(prediction["results"])
#pprint(prediction["answers"][0].meta["prompt"])
#print_answers(prediction, details="minimum")  # Choose from `minimum`, `medium` and `all`

Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 40.12 Batches/s]


'What does Microsoft think of the AI regulation?'
[<Document: {'content': 'Passage meta data: stakeholder_name: Microsoft Cooperation, stakeholder_type: Company/Business organisation, stakeholder_scope: nan, stakeholder_size: Large (250 or more), stakeholder_country: United States, document_date: 13-06-2020 11:59, language: English, \n\nPassage: “\n“\nAI isn’t just another piece of technology.\nIt could be one of the world’s most\nfundamental pieces of technology the\nhuman race has ever created.\nSatya Nadella, CEO, Microsoft\x0c3\nWe thus offer these comments not because we oppose AI regulation, but rather to aid the Commission\nin the difficult task of assessing where regulation might be most appropriate and how best to\nregulate consistent with European values. As elaborated in Part I, our principal suggestions on the\nEU’s proposed AI regulatory framework are as follows:\nPromote trustworthy AI through governance and tools.\nRegulatory frameworks for AI should incentivize relevant

## Generative QA with generative LLMs (closed APIs)

In [None]:
with open('/content/drive/My Drive/freelance/demos/key_openai.txt', 'r') as file:
    openai_key = file.read().replace('\n', '')

In [None]:
from haystack.nodes import BM25Retriever, EmbeddingRetriever

retriever_bm25 = BM25Retriever(document_store=document_store)

# the texts are in multiple languages. we therefore use a multilingual embedding model
# the best place to find the latest embedding models is the MTEB leaderboard
# https://huggingface.co/spaces/mteb/leaderboard
retriever_sbert = EmbeddingRetriever(
    document_store=document_store, embedding_model="intfloat/multilingual-e5-small"
)

document_store.update_embeddings(
    retriever=retriever_sbert, filters=None,
    update_existing_embeddings=True, batch_size=256
)



In [None]:
# optional: build a multiretriever which combines two different retrievers
# to increase performance
from haystack.nodes import BaseRetriever
from typing import List
from haystack import Document

class MultiRetriever(BaseRetriever):
    def __init__(self, retrievers: List[BaseRetriever]):
        self.retrievers = retrievers

    def retrieve(self, query: str, filters: dict = None, top_k: int = 10, index: str = None, **kwargs) -> List[Document]:
        # Use each retriever to retrieve documents
        all_results = []
        seen_ids = set()
        for i, retriever in enumerate(self.retrievers):
            results = retriever.retrieve(query, filters, top_k, index, **kwargs)
            for doc in results:
                if doc.id not in seen_ids:
                    all_results.append(doc)
                    seen_ids.add(doc.id)

        return all_results

    def retrieve_batch(self, query: List[str], filters: dict = None, top_k: int = 10, index: str = None, **kwargs) -> List[List[Document]]:
        # Use each retriever to retrieve documents
        all_results = []
        seen_ids = set()
        for retriever in self.retrievers:
            results = retriever.retrieve_batch(query, filters, top_k, index, **kwargs)
            for docs in results:
                unique_docs = []
                for doc in docs:
                    if doc.id not in seen_ids:
                        unique_docs.append(doc)
                        seen_ids.add(doc.id)
                all_results.append(unique_docs)

        return all_results

multi_retriever = MultiRetriever([retriever_bm25, retriever_sbert])

In [None]:
from haystack.nodes import SentenceTransformersRanker

ranker = SentenceTransformersRanker(model_name_or_path="corrius/cross-encoder-mmarco-mMiniLMv2-L12-H384-v1")  #"cross-encoder/ms-marco-MiniLM-L-12-v2"


config.json:   0%|          | 0.00/891 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/471M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [None]:
from haystack.nodes import PromptNode, PromptTemplate, AnswerParser

# prompt hub https://haystack.deepset.ai/blog/share-and-use-prompt-with-prompthub
# join function https://github.com/deepset-ai/haystack/blob/8920fd693965e9011084c87cee9afd565fdcecbf/haystack/nodes/prompt/shapers.py#L8C1-L8C1
prompt_template = PromptTemplate(
    """Here is a list of passages that may or may not be related to a user query. Your task is to answer the query only taking into account the information in the passages. List of passages:
    \n{join(documents, delimiter="\n\n")}
    \n\nQuery: {query}
    \nAnswer:""",
    output_parser=AnswerParser()
)
#prompt_template = PromptTemplate("deepset/question-answering", output_parser=AnswerParser())

prompt_node = PromptNode(
    model_name_or_path="gpt-3.5-turbo", api_key=openai_key,
    default_prompt_template=prompt_template, max_length=100
)


In [None]:
from haystack import Pipeline

querying_generation_pipeline = Pipeline()
querying_generation_pipeline.add_node(component=multi_retriever, name="Retriever", inputs=["Query"])
querying_generation_pipeline.add_node(component=ranker, name="Ranker", inputs=["Retriever"])
querying_generation_pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["Ranker"])


In [None]:
from pprint import pprint
from haystack.utils import print_answers

query = "What are the most important aspects of AI regulation for civil society?"
#query = "What does business think about AI regulation?"
#query = "What provisions should not be included in AI regulation according to business stakeholders?"
query = "What does Microsoft think of the AI regulation?"

prediction = querying_generation_pipeline.run(
    query=query,
    params={
        "Retriever": {"top_k": 10},
        "Ranker": {"top_k": 5}
    }
)

pprint(prediction["query"])
pprint(prediction["answers"][0].answer)
pprint(prediction["documents"])
#pprint(prediction["answers"][0].meta["prompt"])
#print_answers(prediction, details="minimum")  # Choose from `minimum`, `medium` and `all`

## Passage Retriever Pipeline without generation

In [None]:
from haystack.nodes import BM25Retriever, EmbeddingRetriever, SentenceTransformersRanker

retriever_bm25 = BM25Retriever(document_store=document_store)

retriever_sbert = EmbeddingRetriever(
    document_store=document_store, embedding_model="intfloat/multilingual-e5-small"  #"sentence-transformers/all-MiniLM-L6-v2"  #"sentence-transformers/multi-qa-mpnet-base-dot-v1"
)

document_store.update_embeddings(retriever_sbert)


In [None]:
ranker = SentenceTransformersRanker(model_name_or_path="cross-encoder/mmarco-mMiniLMv2-L12-H384-v1")  #"cross-encoder/ms-marco-MiniLM-L-12-v2"


In [None]:
from haystack import Pipeline

retriever_pipeline = Pipeline()
retriever_pipeline.add_node(component=retriever_sbert, name="Retriever", inputs=["Query"])
retriever_pipeline.add_node(component=ranker, name="Ranker", inputs=["Retriever"])


In [None]:
from pprint import pprint
from haystack.utils import print_answers, print_documents

query = "Which aspects of AI regulation are most important for businesses?"
#query = "Welche Aspekte der geplanten AI Regulierung sind für Unternehmen besonders wichtig?"

prediction = retriever_pipeline.run(
    query=query,
    params={"Retriever": {"top_k": 20}, "Ranker": {"top_k": 5}}
)

#pprint(prediction["query"])
#pprint(prediction["documents"])
#for key_document, value_document in prediction.items():
#    pprint(value_document["content"])
#print_answers(prediction, details="minimum")  # Choose from `minimum`, `medium` and `all`
print_documents(prediction)  # Choose from `minimum`, `medium` and `all`



## Extractive Q&A pipeline

In [None]:
from haystack.nodes import BM25Retriever, EmbeddingRetriever

# example for BM25/TFIDF document representation: https://towardsdatascience.com/understanding-feature-engineering-part-3-traditional-methods-for-text-data-f6f7d70acd41
retriever = BM25Retriever(document_store=document_store)

retriever_sbert = EmbeddingRetriever(
    document_store=document_store, embedding_model="intfloat/multilingual-e5-small"  #"sentence-transformers/all-MiniLM-L6-v2"  #"sentence-transformers/multi-qa-mpnet-base-dot-v1"
)
document_store.update_embeddings(retriever_sbert)

Downloading (…)lve/main/config.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

Updating Embedding:   0%|          | 0/12483 [00:00<?, ? docs/s]

Inferencing Samples:   0%|          | 0/313 [00:00<?, ? Batches/s]

Inferencing Samples:   0%|          | 0/78 [00:00<?, ? Batches/s]

In [None]:
from haystack.nodes import FARMReader

reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

In [None]:
from haystack import Pipeline

querying_pipeline = Pipeline()
querying_pipeline.add_node(component=retriever_sbert, name="Retriever", inputs=["Query"])
querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])


In [None]:
from pprint import pprint
from haystack.utils import print_answers

query = "What does business think about AI regulation?"

prediction = querying_pipeline.run(
    query=query,
    params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)

pprint(prediction["query"])
pprint(prediction["documents"])
pprint(prediction["answers"])
#print_answers(prediction, details="minimum")  # Choose from `minimum`, `medium` and `all`

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

'What does business think about AI regulation?'
[<Document: {'content': 'Policymakers should take care to ensure that the cost of AI regulation is not so high\nthat it prevents these products from reaching the market.\nDifferentiate types of harm.\nRisks to safety and risks to fundamental rights are inherently distinct; any AI regulatory regime\nshould recognize this distinction, both in the requirements it imposes and the compliance regime it\nadopts. Both are important to address.\nClarify roles of regulated actors.\nAI regulation should be clear on which requirements fall on which regulated actors (developers,\ndeployers, etc.) and should impose responsibilities on the actor that can most efficiently and\neffectively comply with them.\nLeverage existing laws and regulatory frameworks.\nAbsent clear gaps, policymakers should rely on existing laws to the extent possible rather than adopt\nwholly new regulatory frameworks and obligations on top of them. Where new laws are needed then\n