In [1]:
import dotenv
dotenv.load_dotenv()

True

In [27]:
from langchain.document_loaders import TextLoader

# Create a decorator that logs function before executing it for debugging:

def log_function(func):
    def wrapper(*args, **kwargs):
        print(f"Calling function: {func.__name__}")
        return func(*args, **kwargs)
    return wrapper

# Load document with LangChain's TextLoader

@log_function
def load_doc():
    loader = TextLoader('./AI_executive_order_oct_2023.txt')
    documents = loader.load()
    return documents

documents = load_doc()

# Chunk the document

@log_function
def chunking():
    from langchain.text_splitter import CharacterTextSplitter
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_documents(documents)
    return chunks

chunks = chunking()

Calling function: load_doc


AttributeError: 'TextLoader' object has no attribute 'loadX'

In [22]:
# Create embeddings and vector store

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
import weaviate
from weaviate.embedded import EmbeddedOptions

@log_function
def embed_and_vector():
    client = weaviate.Client(
        embedded_options = EmbeddedOptions()
    )

    vectorstore = Weaviate.from_documents(
        client = client,
        documents = chunks,
        embedding = OpenAIEmbeddings(),
        by_text = False
    )
    return vectorstore

vectorstore = embed_and_vector()

Calling function: embed_and_vector
embedded weaviate is already listening on port 6666


{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"langchain_e50711c8f7814d73bfa62fd902e164be_eQT15VTliFkA","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-12-20T10:19:56-05:00","took":34996}


In [23]:
# Retriever

retriever = vectorstore.as_retriever()

# Augment

from langchain.prompts import ChatPromptTemplate

template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you do not know.
Use three setences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

print(prompt)

input_variables=['context', 'question'] output_parser=None partial_variables={} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, template="You are an assistant for question-answering tasks.\nUse the following pieces of retrieved context to answer the question.\nIf you don't know the answer, just say that you do not know.\nUse three setences maximum and keep the answer concise.\nQuestion: {question}\nContext: {context}\nAnswer:\n", template_format='f-string', validate_template=True), additional_kwargs={})]


In [24]:
# Generate

from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser ()
)

In [25]:
# Query[0]:

query = "How will the Secretary of Homeland Security create an AI Safety Board?"
rag_chain.invoke(query)

# Output: "The Secretary of Homeland Security will establish an Artificial Intelligence
# Safety and Security Board as an advisory committee. The board will include AI experts
# from the private sector, academia, and government. Its purpose is to provide advice,
# information, or recommendations for improving security, resilience, and incident response
# related to AI usage in critical infrastructure."

'The Secretary of Homeland Security will establish an Artificial Intelligence Safety and Security Board as an advisory committee. The board will include AI experts from the private sector, academia, and government. Its purpose is to provide advice, information, or recommendations for improving security, resilience, and incident response related to AI usage in critical infrastructure.'

In [None]:
# Query[1]:

query = "What does the AI Executive Order say about watermarking?"
rag_chain.invoke(query)

# Output: "The AI Executive Order defines watermarking as the act of
# embedding difficult-to-remove information into AI outputs for the
# purpose of verifying authenticity, identity, modifications, or conveyance."

In [None]:
# Query[2]:

query = "What is the theoretical maximum computing capacity cutoff for the technical conditions for AI models?"
rag_chain.invoke(query)

# Output: "The theoretical maximum computing capacity cutoff for the technical conditions for AI models is 1020
# integer or floating-point operations per second for training AI."
#
# **Note: 1020 means 10^20, and is correct based on the original text document which omits the '^' sign

In [None]:
# Query[3]:

query = "What is the easiest way to time travel?"
rag_chain.invoke(query)

# Output: "I do not know the answer."