In [1]:
import os
from bs4 import SoupStrainer, BeautifulSoup
import json
from langchain import hub
from langchain_chroma import Chroma
from langchain.docstore.document import Document
from langchain_community.document_loaders import WebBaseLoader,  WikipediaLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter

from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]
print("imported")

USER_AGENT environment variable not set, consider setting it to identify your requests.


imported


In [2]:
with open('documents.json', 'r') as f:
    loaded_docs = json.load(f)

print(loaded_docs)
json_docs = []

for doc in loaded_docs:
    text = doc["content"]
    metadata = {"title": doc["title"]}
    json_docs.extend([Document(metadata=metadata, page_content=text)])

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1300, chunk_overlap=200, add_start_index=True)
json_documents = text_splitter.split_documents(json_docs)

print(len(json_documents))
print(len(json_documents[0].page_content))
print(json_documents[0].page_content)

[{'title': 'Six (musical)', 'content': 'Six (stylised in all caps) is a British musical comedy in the style of a pop concert. Its music, book, and lyrics were written by Toby Marlow and Lucy Moss. It is a modern retelling of the lives of the six wives of Henry VIII, presented in the form of a singing competition. In the show, the wives (Catherine of Aragon, Anne Boleyn, Jane Seymour, Anna of Cleves, Katherine Howard, and Catherine Parr) take turns telling their story to determine who suffered the most from their shared husband, but ultimately seek to reclaim their individual identities and rewrite their stories.\nThe musical premiered at the Edinburgh Festival Fringe in 2017, where it was performed by students from Cambridge University. Six premiered in the West End at the Arts Theatre in January 2019, and has since embarked on a UK tour. It premiered in North America in May 2019 and on Broadway in February 2020. Following a hiatus due to the COVID-19 pandemic, it officially opened at 

In [3]:
class SingletonChroma:
    _instance = None
    
    @staticmethod
    def get_instance(documents=None):
        if SingletonChroma._instance is None:
            if documents is None:
                raise ValueError("Documents must be provided for the first initialization.")
            SingletonChroma(documents)
        return SingletonChroma._instance

    def __init__(self, documents):
        if SingletonChroma._instance is not None:
            raise Exception("This class is a singleton!")
        SingletonChroma._instance = Chroma.from_documents(
            documents=documents,
            embedding=OpenAIEmbeddings(model="text-embedding-ada-002")
        )

    
singleton_chroma = SingletonChroma.get_instance(documents=json_documents)
retriever = singleton_chroma.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [5]:
singleton_chroma = SingletonChroma.get_instance(documents=json_documents)
retriever = singleton_chroma.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [6]:
retrieved_docs = retriever.invoke("Anne Boleyn's relationship with Henry VIII")
print(retrieved_docs[0].metadata)
print(retrieved_docs[0].page_content)
print(retrieved_docs[1].metadata)
print(retrieved_docs[1].page_content)
print(retrieved_docs[2].metadata)
print(retrieved_docs[2].page_content)
print(retrieved_docs[3].metadata)
print(retrieved_docs[3].page_content)
print(retrieved_docs[4].metadata)
print(retrieved_docs[4].page_content)
print(retrieved_docs[5].metadata)
print(retrieved_docs[5].page_content)

{'start_index': 19321, 'title': 'Henry VIII'}
=== Marriage to Anne Boleyn ===
{'start_index': 0, 'title': 'Anne Boleyn'}
Anne Boleyn (; c. 1501 or 1507 – 19 May 1536) was Queen of England from 1533 to 1536, as the second wife of King Henry VIII. The circumstances of her marriage and execution, by beheading for treason, made her a key figure in the political and religious upheaval that marked the start of the English Reformation.
Anne was the daughter of Thomas Boleyn (later Earl of Wiltshire), and his wife, Elizabeth Howard, and was educated in the Netherlands and France. Anne returned to England in early 1522, to marry her cousin James Butler, 9th Earl of Ormond; the marriage plans were broken off, and instead, she secured a post at court as maid of honour to Henry VIII's wife, Catherine of Aragon. Early in 1523, Anne was secretly betrothed to Henry Percy, son of Henry Percy, 5th Earl of Northumberland, but the betrothal was broken off when the Earl refused to support it. Cardinal Tho

In [7]:
retrieved_docs = retriever.invoke("Who was Anne Boleyn?")
print(retrieved_docs[0].metadata)
print(retrieved_docs[0].page_content)
print(retrieved_docs[1].metadata)
print(retrieved_docs[1].page_content)
print(retrieved_docs[2].metadata)
print(retrieved_docs[2].page_content)
print(retrieved_docs[3].metadata)
print(retrieved_docs[3].page_content)
print(retrieved_docs[4].metadata)
print(retrieved_docs[4].page_content)
print(retrieved_docs[5].metadata)
print(retrieved_docs[5].page_content)

{'start_index': 0, 'title': 'Anne Boleyn'}
Anne Boleyn (; c. 1501 or 1507 – 19 May 1536) was Queen of England from 1533 to 1536, as the second wife of King Henry VIII. The circumstances of her marriage and execution, by beheading for treason, made her a key figure in the political and religious upheaval that marked the start of the English Reformation.
Anne was the daughter of Thomas Boleyn (later Earl of Wiltshire), and his wife, Elizabeth Howard, and was educated in the Netherlands and France. Anne returned to England in early 1522, to marry her cousin James Butler, 9th Earl of Ormond; the marriage plans were broken off, and instead, she secured a post at court as maid of honour to Henry VIII's wife, Catherine of Aragon. Early in 1523, Anne was secretly betrothed to Henry Percy, son of Henry Percy, 5th Earl of Northumberland, but the betrothal was broken off when the Earl refused to support it. Cardinal Thomas Wolsey refused the match in January 1524.
{'start_index': 19321, 'title': 

In [None]:
llm = ChatOpenAI(model_name="gpt-4o-mini")
prompt = "Hello, World!"

response = llm(messages=[{"role": "user", "content": prompt}], temperature=0)

print(response.content)

In [None]:
llm = ChatOpenAI(model_name="gpt-4o-mini")
prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

print(example_messages[0].content)

template = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:
"""


'''
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
rag_chain.invoke("What is Task Decomposition?")