In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_groq import ChatGroq 
from langchain_core.prompts import ChatPromptTemplate

_ = load_dotenv(find_dotenv())

In [2]:
llamaChatModel = ChatGroq(
    model="llama3-70b-8192"
)   

In [3]:
loader = TextLoader("../data/state_of_the_union.txt")

loaded_data = loader.load()

print(loaded_data)

[Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their 

In [4]:
chat_template = ChatPromptTemplate.from_messages(
    [
        ("human","Answer this {question}, here is some extrat {context}")
    ]
)

messages = chat_template.format_messages(
    name="JFK",
    question="where was JFK born",
    context=loaded_data
)

In [17]:
# response = llamaChatModel.invoke(messages)

In [6]:
# print(response.content)

In [121]:
# metadatas = [{"chunk": 0}, {"chunk": 1}]

# documents = text_splitter.create_documents(
#     [loaded_data[0].page_content], 
#     metadatas=metadatas
# )



[Document(metadata={'chunk': 0}, page_content='Be good'),
 Document(metadata={'chunk': 0}, page_content='April 2008(This essay is derived from a talk at the 2008 Startup School.)About a month after we started Y Combinator we came up with the\nphrase that became our motto: Make something people want.  We\'ve\nlearned a lot since then, but if I were choosing now that\'s still\nthe one I\'d pick.Another thing we tell founders is not to worry too much about the\nbusiness model, at least at first.  Not because making money is\nunimportant, but because it\'s so much easier than building something\ngreat.A couple weeks ago I realized that if you put those two ideas\ntogether, you get something surprising.  Make something people want.\nDon\'t worry too much about making money.  What you\'ve got is a\ndescription of a charity.When you get an unexpected result like this, it could either be a\nbug or a new discovery.  Either businesses aren\'t supposed to be\nlike charities, and we\'ve proven by 

In [22]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [23]:
second_recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=0,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)

In [24]:
texts_revers = second_recursive_splitter.split_documents(loaded_data)

In [25]:
from langchain.embeddings import HuggingFaceEmbeddings

In [26]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [123]:
# embedding_result = embeddings.embed_documents(texts_revers)

In [16]:
from langchain_chroma import Chroma

In [64]:
vector_db = Chroma.from_documents(texts_revers, embeddings)
retriever = vector_db.as_retriever(search_kwargs={"k": 1})

response = retriever.invoke("what did he say about ketanji brown jackson?")
response

[Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.')]

In [37]:
# question = "What did the president say about the John Lewis Voting Rights Act?"

# response = vector_db.similarity_search(question)

# print(response[0].page_content)

In [27]:
from langchain_community.vectorstores import FAISS

In [28]:
vector_db = FAISS.from_documents(texts_revers, embeddings)

In [32]:
retriever = vector_db.as_retriever(search_kwargs={"k": 1})

In [33]:
response = retriever.invoke("what did he say about ketanji brown jackson?")

In [34]:
response

[Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.')]

In [47]:
# print(response[0].page_content)

In [48]:
# print(response[1].page_content)

In [43]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel
template = """Answer the question based only on the following context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llamaChatModel
    | StrOutputParser()
)

response = chain.invoke("what did he say about ketanji brown jackson?")

In [41]:
print(response)

According to the context, he said that Ketanji Brown Jackson is "one of our nation's top legal minds, who will continue Justice Breyer's legacy of excellence" and that he nominated her to the United States Supreme Court 4 days ago.
