In [1]:
from langchain_community.document_loaders import PyPDFLoader

# https://python.langchain.com/docs/integrations/document_loaders/pypdfloader/#add-a-custom-pages_delimiter-to-identify-where-are-ends-of-pages-in-single-mode
def pdf_loader(pdf_path: str):
    return PyPDFLoader(
        pdf_path,
        mode="single",
        pages_delimiter="\n-------END OF PAGE-------\n",
    )

dream_dictionary_docs = pdf_loader("assets/data/The_Dreamers_Dictionary.pdf").load()
jung_archetypes_docs = pdf_loader("assets/data/The_Archetypes_of_the_Collective_Unconscious_C.Jung.pdf").load()
jung_interpretations_docs = pdf_loader("assets/data/Symbols_and_the_Interpretation_of_Dreams_by_Carl_Jung.pdf").load()

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,                # chunk size (characters)
    chunk_overlap=200,              # chunk overlap (characters)
    add_start_index=True,           # track index in original document
)
dream_dictionary_splits = text_splitter.split_documents(dream_dictionary_docs)
jung_archetypes_splits = text_splitter.split_documents(jung_archetypes_docs)
jung_interpretations_splits = text_splitter.split_documents(jung_interpretations_docs)

In [3]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings  # local
from langchain_core.vectorstores import InMemoryVectorStore

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={"device": "cuda"},
)

# Create In-Memory Vector Stores
dream_dictionary_store = InMemoryVectorStore(embeddings)
jung_archetypes_store = InMemoryVectorStore(embeddings)
jung_interpretations_store = InMemoryVectorStore(embeddings)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
print("\nattempting embed...\n")
dream_dictionary_store.aadd_documents(documents=dream_dictionary_splits)
jung_archetypes_store.aadd_documents(documents=jung_archetypes_splits)
jung_interpretations_store.aadd_documents(documents=jung_interpretations_splits)
print("done")


attempting embed...

done


  dream_dictionary_store.aadd_documents(documents=dream_dictionary_splits)
  jung_archetypes_store.aadd_documents(documents=jung_archetypes_splits)
  jung_interpretations_store.aadd_documents(documents=jung_interpretations_splits)


In [5]:
import pickle

with open(
    "assets/pickles/dream_dictionary_store.dat", mode="wb"
) as f_dream_dictionary_store, open(
    "assets/pickles/jung_archetypes_store.dat", mode="wb"
) as f_jung_archetypes_store, open(
    "assets/pickles/jung_interpretations_store.dat", mode="wb"
) as f_jung_interpretations_store:
    pickle.dump(dream_dictionary_store, f_dream_dictionary_store)
    pickle.dump(jung_archetypes_store, f_jung_archetypes_store)
    pickle.dump(jung_interpretations_store, f_jung_interpretations_store)
    print("done storing")

done storing


In [1]:
import pickle

with open(
    "assets/pickles/dream_dictionary_store.dat", mode="rb"
) as f_dream_dictionary_store, open(
    "assets/pickles/jung_archetypes_store.dat", mode="rb"
) as f_jung_archetypes_store, open(
    "assets/pickles/jung_interpretations_store.dat", mode="rb"
) as f_jung_interpretations_store:
    dream_dictionary_store = pickle.load(f_dream_dictionary_store)
    jung_archetypes_store = pickle.load(f_jung_archetypes_store)
    jung_interpretations_store = pickle.load(f_jung_interpretations_store)
    print("done loading")

  from .autonotebook import tqdm as notebook_tqdm


done loading


In [2]:
from typing import Optional
from pydantic import BaseModel, Field
from langchain_ollama import ChatOllama


llm = ChatOllama(
    model="llama3.2:3b",
    temperature=0,
    # other params...
)

dream_text = "I fell off a bridge"

dream_dictionary_results = dream_dictionary_store.similarity_search(dream_text)
jung_archetypes_results = jung_archetypes_store.similarity_search(dream_text)
jung_interpretations_results = jung_interpretations_store.similarity_search(dream_text)

def parse(result):
    return "\n\n".join(doc.page_content for doc in result)

dream_dictionary_context = parse(dream_dictionary_results)
jung_archetypes_context = parse(jung_archetypes_results)
jung_interpretations_context = parse(jung_interpretations_results)


# Pydantic
# class strllm(BaseModel):
#     """Dream interpretation"""

#     dream: str = Field(description="Dream entered by the user")
#     interpretation: str = Field(description="Interpretation of dream")
#     stat: Optional[str] = Field(
#         default=None, description="What life issues the user may be experiencing"
#     )

# llm = llm.with_structured_output(strllm)
# answer = llm.invoke(dream_text)
# print(answer)

