In [1]:
%pip install -qU langgraph langchain langchain_openai langchain_experimental pymupdf tiktoken python-dotenv

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-huggingface 0.0.3 requires langchain-core<0.3,>=0.1.52, but you have langchain-core 0.3.1 which is incompatible.
langchain-qdrant 0.1.3 requires langchain-core<0.3,>=0.1.52, but you have langchain-core 0.3.1 which is incompatible.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [15]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
# from helper.file_loader import FileLoader

load_dotenv()

open_ai_key = os.getenv("OPENAI_API_KEY")
langchain_key = os.getenv("LANGCHAIN_API_KEY")

llm = ChatOpenAI(api_key=open_ai_key, model="gpt-4o")

In [16]:
from langchain.document_loaders import PyMuPDFLoader

loader1 = PyMuPDFLoader("https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf")
document1 = loader1.load()

loader2 = PyMuPDFLoader("https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf")
document2 = loader2.load()

document = document1 + document2

In [17]:
import tiktoken
from langchain.text_splitter import RecursiveCharacterTextSplitter

def tiktoken_len(text):
    tokens = tiktoken.encoding_for_model("gpt-4o-mini").encode(
        text,
    )
    return len(tokens)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 300,
    chunk_overlap = 0,
    length_function = tiktoken_len,
)

split_chunks = text_splitter.split_documents(document)

In [19]:
from langchain_openai.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

In [20]:
from langchain_community.vectorstores import Qdrant

qdrant_vectorstore = Qdrant.from_documents(
    split_chunks,
    embedding_model,
    location=":memory:",
    collection_name="extending_context_window_llama_3",
)

In [21]:
qdrant_retriever = qdrant_vectorstore.as_retriever()

In [33]:
from langchain_core.prompts import ChatPromptTemplate

RAG_PROMPT = """
CONTEXT:
{context}

QUERY:
{question}

You are an expert of 30 years of experience that can answer only questions related to the context provided. You will submit comprehensive responses and if you can't answer the question, say you don't have enough context at the moment to answer the question.
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [45]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-4o")

In [46]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser

rag_chain = (
    {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
    | rag_prompt | openai_chat_model | StrOutputParser()
)

In [47]:
print(rag_chain.invoke({"question" : "can you tell me what the document says about Human-AI Configuration"}))

The document discusses several aspects of Human-AI Configuration, highlighting its significance in AI deployment and operational settings. Here are the key points:

1. **Human-AI Configuration Tasks**: The document lists various tasks associated with Human-AI Configuration, including AI Deployment, Domain Experts, End-Users, Operation and Monitoring, and TEVV (Testing, Evaluation, Verification, and Validation) (page 42).

2. **Measures and Actions**:
   - **Monitoring and Tracking**: It suggests implementing systems to continually monitor and track the outcomes of human-GAI (Generative AI) configurations for future refinement and improvements (page 28).
   - **Involvement in Prototyping and Testing**: It emphasizes involving end-users, practitioners, and operators in the prototyping and testing activities of GAI systems. These tests should cover various scenarios, such as crisis situations or ethically sensitive contexts (page 28).

3. **Privacy and Risk Minimization**: Techniques such