In [None]:
%pip install -qU langchain langchain_community

# Local vector store via Chroma
%pip install -qU langchain_chroma

# Local inference and embeddings via Ollama
%pip install -qU langchain_ollama

# Web Loader
%pip install -qU beautifulsoup4

In [67]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=20)

Compile data together

In [68]:
import pandas as pd

entries = pd.read_csv("processed.csv")

In [69]:
notes_compiled = ""


#full_date,date,weekday,time,mood,activities,note_title,note

for idx, entry in entries.iterrows():
    notes_compiled += f"""
    Entry on {str(entry['full_date']).strip()}
    My mood {str(entry['mood']).strip()}
    
    Entry: {entry['note']}
    """
    

In [70]:
notes_compiled = notes_compiled.replace("<br>", ".")
notes_compiled = notes_compiled.replace("</br>", ".")

notes_compiled = notes_compiled.replace("<b>", ".")
notes_compiled = notes_compiled.replace("</b>", ".")


In [71]:
with open("Output.txt", "w") as text_file:
    text_file.write(notes_compiled)

In [72]:
all_splits = text_splitter.split_text(notes_compiled)

In [76]:
len(all_splits)

5690

In [74]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

local_embeddings = OllamaEmbeddings(model="nomic-embed-text")

vectorstore = Chroma.from_texts(texts=all_splits, embedding=local_embeddings)

In [None]:
question = "is there a specific person that i struggle to communicate with?"
docs = vectorstore.similarity_search(question, k=25)
print(len(docs))
print(docs[21])

In [38]:
from langchain_ollama import ChatOllama

model = ChatOllama(
    model="llama3.1:8b",
)

In [39]:
from IPython.display import Markdown, display

response_message = model.invoke(
    "how many letter rs are in strawberry"
)

# Compile the output to markdown
markdown_output =response_message.content

# Print or display the markdown output
display(Markdown(markdown_output))

There are 2 "R"s and 1 "S" in the word "strawberry".

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

RAG_TEMPLATE = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. These are some entries at different dates that give insight into your state of mind, memories and moods. Use direct references when necessary
Be as specific and thorough as possible
<context>
{context}
</context>

Answer the following question:

{question}"""

rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)

chain = (
    RunnablePassthrough.assign(context=lambda input: format_docs(input["context"]))
    | rag_prompt
    | model
    | StrOutputParser()
)

question = "who do i struggle in group projects with?"

docs = vectorstore.similarity_search(question, k=25)

print(docs)

# Compile the output to markdown
markdown_output = chain.invoke({"context": docs, "question": question})

# Print or display the markdown output
display(Markdown(markdown_output))
