In [26]:
from langchain_groq import ChatGroq
from langchain.vectorstores import Pinecone as pc
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.runnable import RunnableMap, RunnableLambda
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from langchain.prompts import ChatPromptTemplate
import time

load_dotenv()

# Create the LLM
llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

# Initialize Pinecone
pinecone = Pinecone()

index_name = "default-index"

if index_name in pinecone.list_indexes()[0]['name']:
    pinecone.delete_index(index_name)

# Wait until the index is removed from the list
while index_name in pinecone.list_indexes():
    time.sleep(2)

pinecone.create_index(
    index_name, 
    dimension=1536, 
    spec=ServerlessSpec(cloud='aws', region='us-east-1')
)

# Connect to the newly created index
index = pinecone.Index(index_name)

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

documents = [
    (
        r"2024 A Diary Entry (That Probably Should Have Stayed in My Head) - VITU na VITU.txt", 
        open(
            r"2024 A Diary Entry (That Probably Should Have Stayed in My Head) - VITU na VITU.txt", 
            "r", 
            encoding="utf-8"
        ).read(), 
        {"author": "VITU na VITU"}
    ),
    (
        r"Diary entries from Anhedonia - Keyukemi Ubi.txt", 
        open(
            r"Diary entries from Anhedonia - Keyukemi Ubi.txt", 
            "r", 
            encoding="utf-8"
        ).read(), 
        {"author": "Keyukemi Ubi"}
    ),
]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=50
)

for doc_id, content, metadata in documents:
    chunks = text_splitter.split_text(content)
    for i, chunk in enumerate(chunks):
        embedding = embeddings.embed_query(chunk)
        chunk_metadata = metadata.copy()
        chunk_metadata["chunk_index"] = i
        # Upsert into Pinecone
        index.upsert([(f"{doc_id}_{i}", embedding, {"content": chunk, **chunk_metadata})])

# 4.a. Query + Embedding
create_query_and_embedding = RunnableMap({
    "raw_query": RunnableLambda(lambda x: x["query"]),
    "query_embedding": RunnableLambda(lambda x: embeddings.embed_query(x["query"]))
})

# 4.b. Pinecone Retrieval
retrieve_docs = RunnableLambda(
    lambda x: {
        "retrieved_contents": [
            (match["metadata"]["author"], match["metadata"]["content"])
            for match in index.query(
                vector=x["query_embedding"],
                top_k=4,
                include_metadata=True
            )["matches"]
        ],
        "query": x["raw_query"]
    }
)

# 4.c. Format Prompt
make_prompt_for_llm = RunnableLambda(
    lambda x:
        ChatPromptTemplate.from_messages([
            ("system", "You are a helpful assistant with access to relevant documents."),
            (
                "human", 
                f"The following documents may help:\n\n{x['retrieved_contents']}.\n\nNow, {x['query']}"
            ),
        ]
    )
)

# 4.d. Chain all steps using LCEL
rag_chain = create_query_and_embedding | retrieve_docs | make_prompt_for_llm | llm

question = "What happened in 2024"
result = rag_chain.invoke({"query": question})

print(">>> AI Response:")
print(result.content)


>>> AI Response:
Based on the provided documents, here's a summary of what happened in 2024:

* The author had a difficult year, with memories being "fried and deleted" due to trauma.
* They had a singular goal to change jobs and earn more, which they achieved.
* They worked on a project with a friend, Aidovhioghie (Oghie), which is expected to be completed in Q2 2025.
* The author took "leaps of faith" in their career and personal life, which they are proud of.
* They experienced a range of emotions, including depression, stagnation, and happiness.
* They felt like they were in an "Emotional Olympics" and didn't win a medal, but they are still aiming to thrive despite the challenges.

Overall, 2024 was a year of significant change and growth for the author, with both positive and negative experiences.
