In [1]:
import os
# import dotenv
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
# Data Ingestion 

from langchain_community.document_loaders import PyMuPDFLoader
from langchain_openai import ChatOpenAI
loader = PyMuPDFLoader('../Data/EU AI Act Doc (1) (3).docx',
                       extract_images = True,
                    #    mode = 'single'
                       )
text_docs = loader.load()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print(text_docs)

[Document(metadata={'producer': '', 'creator': '', 'creationdate': '', 'source': '../Data/EU AI Act Doc (1) (3).docx', 'file_path': '../Data/EU AI Act Doc (1) (3).docx', 'total_pages': 10, 'format': 'Office document', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'encryption': '', 'modDate': '', 'creationDate': '', 'page': 0}, page_content='High-level summary of the AI Act\n27 Feb, 2024\nUpdated on 30 May in accordance with the Corrigendum version of\nthe AI Act.\nIn this article we provide you with a high-level summary of the AI\nAct, selecting the parts which are most likely to be relevant to you\nregardless of who you are. We provide links to the original\ndocument where relevant so that you can always reference the Act\ntext.\nTo explore the full text of the AI Act yourself, use our\xa0AI Act\nExplorer. Alternatively, if you want to know which parts of the text\nare most relevant to you, use our\xa0Compliance Checker.\nFour-point summary\nT

In [4]:
# Checking for extracted images

for doc in text_docs:
    if 'images' in doc.metadata:
        images = doc.metadata['images']
        print(f"Extracted {len(images)} images on this page.")

In [5]:
# Text Splittings 

from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
docs = text_splitter.split_documents(text_docs)
docs[:5]

[Document(metadata={'producer': '', 'creator': '', 'creationdate': '', 'source': '../Data/EU AI Act Doc (1) (3).docx', 'file_path': '../Data/EU AI Act Doc (1) (3).docx', 'total_pages': 10, 'format': 'Office document', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'encryption': '', 'modDate': '', 'creationDate': '', 'page': 0}, page_content='High-level summary of the AI Act\n27 Feb, 2024\nUpdated on 30 May in accordance with the Corrigendum version of\nthe AI Act.\nIn this article we provide you with a high-level summary of the AI\nAct, selecting the parts which are most likely to be relevant to you\nregardless of who you are. We provide links to the original\ndocument where relevant so that you can always reference the Act\ntext.\nTo explore the full text of the AI Act yourself, use our\xa0AI Act\nExplorer. Alternatively, if you want to know which parts of the text\nare most relevant to you, use our\xa0Compliance Checker.\nFour-point summary\nT

In [6]:
# Vector Embeddings and Vectorstore

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(docs, OpenAIEmbeddings())

In [7]:
query = "What is the four point summary?"
retireved_results=db.similarity_search(query)
print(retireved_results[0].page_content)

High-level summary of the AI Act
27 Feb, 2024
Updated on 30 May in accordance with the Corrigendum version of
the AI Act.
In this article we provide you with a high-level summary of the AI
Act, selecting the parts which are most likely to be relevant to you
regardless of who you are. We provide links to the original
document where relevant so that you can always reference the Act
text.
To explore the full text of the AI Act yourself, use our AI Act
Explorer. Alternatively, if you want to know which parts of the text
are most relevant to you, use our Compliance Checker.
Four-point summary
The AI Act classifies AI according to its risk:
Unacceptable risk is prohibited (e.g. social scoring systems and
manipulative AI).
Most of the text addresses high-risk AI systems, which are
regulated.
A smaller section handles limited risk AI systems, subject to lighter
transparency obligations: developers and deployers must ensure
that end-users are aware that they are interacting with AI (chatbots


In [8]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = "gpt-4o")
print(llm)

client=<openai.resources.chat.completions.completions.Completions object at 0x0000021DE6D97B50> async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000021DE7B3B690> root_client=<openai.OpenAI object at 0x0000021DE6E0C890> root_async_client=<openai.AsyncOpenAI object at 0x0000021DE6A6E290> model_name='gpt-4o' model_kwargs={} openai_api_key=SecretStr('**********') stream_usage=True


In [9]:
# Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
            Answer the following question based only on the context. 
            Think step by step defore providing a detailed answer. 
            I will tip you $1000 if the used finds the answer helpful.
            Context : {context}
            Question: {input}""")

In [10]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000021DE5757290>, search_kwargs={})

In [11]:
from langchain_classic.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [12]:
from langchain_classic.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [13]:
# response = retrieval_chain.invoke({"input":"An attention function can be described as mapping a query"})
response = retrieval_chain.invoke({"input":"What is the four point summary?"})

In [14]:
response

{'input': 'What is the four point summary?',
 'context': [Document(metadata={'moddate': '', 'creationDate': '', 'subject': '', 'producer': '', 'creator': '', 'encryption': '', 'author': '', 'file_path': '../Data/EU AI Act Doc (1) (3).docx', 'format': 'Office document', 'source': '../Data/EU AI Act Doc (1) (3).docx', 'modDate': '', 'title': '', 'creationdate': '', 'trapped': '', 'keywords': '', 'page': 0, 'total_pages': 10}, page_content='High-level summary of the AI Act\n27 Feb, 2024\nUpdated on 30 May in accordance with the Corrigendum version of\nthe AI Act.\nIn this article we provide you with a high-level summary of the AI\nAct, selecting the parts which are most likely to be relevant to you\nregardless of who you are. We provide links to the original\ndocument where relevant so that you can always reference the Act\ntext.\nTo explore the full text of the AI Act yourself, use our\xa0AI Act\nExplorer. Alternatively, if you want to know which parts of the text\nare most relevant to y

In [15]:
print(response['answer'])

To determine the four-point summary based on the provided context regarding the AI Act, we'll break down the relevant information step by step:

1. **Classification of AI Based on Risk:**
   - The AI Act classifies AI systems according to their risk level.
   - Systems deemed as "unacceptable risk," such as those involving social scoring and manipulative AI, are prohibited.

2. **Regulation of High-risk AI Systems:**
   - The majority of the AI Act's text is dedicated to addressing high-risk AI systems.
   - These systems are subjected to strict regulations to ensure safety and compliance.

3. **Handling of Limited Risk AI Systems:**
   - AI systems with a limited risk are subject to lighter transparency obligations.
   - Developers and deployers must ensure end-users are aware they are interacting with AI, for instance in the case of chatbots.

4. **Systemic Risk Notification and Obligations for GPAI Models:**
   - GPAI models are considered to have systemic risks when their training 

In [16]:
# query = "What is multi head attention?"

query = "Requirements for providers of high-risk AI systems"
retireved_results=db.similarity_search(query)
print(retireved_results[1].page_content)

previously completed human assessment without proper human
review; or
performs a preparatory task to an assessment relevant for the
purpose of the use cases listed in Annex III.
AI systems are always considered high-risk if it profiles individuals,
i.e. automated processing of personal data to assess various aspects
of a person’s life, such as work performance, economic situation,
health, preferences, interests, reliability, behaviour, location or
movement.
Providers whose AI system falls under the use cases in Annex III but
believes it is not high-risk must document such an assessment
before placing it on the market or putting it into service.
Requirements for providers of high-risk AI systems (Art. 8–17)
High risk AI providers must:
Establish a risk management system throughout the high risk AI
system’s lifecycle;
Conduct data governance, ensuring that training, validation and
testing datasets are relevant, sufficiently representative and, to the


In [17]:
retireved_results

[Document(metadata={'source': '../Data/EU AI Act Doc (1) (3).docx', 'producer': '', 'author': '', 'page': 3, 'creator': '', 'modDate': '', 'format': 'Office document', 'creationdate': '', 'moddate': '', 'file_path': '../Data/EU AI Act Doc (1) (3).docx', 'keywords': '', 'total_pages': 10, 'subject': '', 'trapped': '', 'encryption': '', 'creationDate': '', 'title': ''}, page_content='greater political influence than judicial authorities (Hacker, 2024).\nHigh risk AI systems (Chapter III)\nSome AI systems are considered ‘High risk’ under the AI Act.\nProviders of those systems will be subject to additional\nrequirements.\nClassification rules for high-risk AI systems (Art. 6)\nHigh risk AI systems are those:\nused as a safety component or a product covered by EU laws\nin\xa0Annex I\xa0AND\xa0required to undergo a third-party conformity\nassessment under those\xa0Annex I\xa0laws;\xa0OR\nthose under\xa0Annex III\xa0use cases (below), except if:\nthe AI system performs a narrow procedural ta