# Memory project - Database vectorization

## Secrets

In [1]:
import os
from openai import OpenAI
from dotenv import load_dotenv
from pathlib import Path

# Load path from the environment variable
env_ih1 = os.getenv("ENV_IH1")

dotenv_path = Path(env_ih1)
load_dotenv(dotenv_path=dotenv_path)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY= os.getenv('PINECONE_KEY')
SERPAPI_API_KEY = os.getenv('SERPAPI_API_KEY')
STEAMSHIP_API_KEY = os.getenv('STEAMSHIP_API_KEY')
LANGSMITH_API_KEY = os.getenv('LANGSMITH_API_KEY')
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
GEMINI_KEY = os.getenv('GEMINI_KEY')

os.environ['PATH'] += os.pathsep + '/usr/bin'

## Libraries

In [2]:
import os
import json
import pandas as pd
import shutil
import openai
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from pinecone import Index  # Import Index for Pinecone operations
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import Pinecone
from langchain.embeddings import OpenAIEmbeddings
import pinecone

# Install missing packages
# %pip install sentence-transformers

from sentence_transformers import SentenceTransformer





## Family safe 

## Summarizer local


In [3]:
from huggingface_hub import login

# Use your token here
login(token=HUGGINGFACEHUB_API_TOKEN)

## Read json + create local df/csv

In [4]:
# V2

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

def process_json_files(data_dir=r'.\Family safe', processed_dir=r'.\Processed JSONs', output_file=r'.\combined_data.csv'):
    """
    Processes all JSON files in the specified directory, adds new data to an existing DataFrame, 
    and ensures no duplicates.

    Args:
        data_dir: Directory containing JSON files.
        processed_dir: Directory where processed files will be moved.
        output_file: Filepath for the saved combined DataFrame.

    Returns:
        A DataFrame containing the updated processed data.
    """
    all_data = []

    # Ensure the processed directory exists
    os.makedirs(processed_dir, exist_ok=True)

    # Process each JSON file
    for filename in os.listdir(data_dir):
        if filename.endswith(".json"):
            filepath = os.path.join(data_dir, filename)
            with open(filepath, 'r', encoding='utf-8') as f:
                json_data = json.load(f)

            # Extract document-level metadata
            name = json_data.get("Name", "Unknown")
            doc_type = json_data.get("Type", "Unknown")
            author = json_data.get("Author", "Unknown")
            date = json_data.get("Date", "Unknown")

            # Initialize a page counter
            page_counter = 1

            # Process each page
            for page in json_data.get("Pages", []):
                page_number = page.get("Page Number", None)
                
                # Assign a sequential number if Page Number is NaN
                if page_number is None:
                    page_number = page_counter
                    page_counter += 1  # Increment the counter

                page_data = {
                    "Doc name": name,
                    "Type": doc_type,
                    "Author": author,
                    "Date": date,
                    "Text": page.get("Extracted Text", ""),
                    "Page_number": page_number,
                }
                all_data.append(page_data)


            # Move the processed file
            processed_filepath = os.path.join(processed_dir, filename)
            os.rename(filepath, processed_filepath)

    # Create a DataFrame from new data
    new_df = pd.DataFrame(all_data)

    # Add summaries to the new DataFrame
    new_df = populate_summary_column(new_df)

    # Reorder columns
    new_df = new_df[["Doc name", "Type", "Author", "Date", "Text", "Page_number", "Summary"]]

    # Check if an existing DataFrame exists
    if os.path.exists(output_file):
        existing_df = pd.read_csv(output_file)
        combined_df = pd.concat([existing_df, new_df], ignore_index=True).drop_duplicates()
    else:
        combined_df = new_df

    # Save the updated DataFrame
    combined_df.to_csv(output_file, index=False)
    print(f"Updated DataFrame saved to {output_file}")

    return combined_df

def populate_summary_column(df):
    """
    Populates the 'Summary' column in the DataFrame using generate_text_summaries.
    
    Args:
        df: The DataFrame containing the text data.

    Returns:
        The DataFrame with the 'Summary' column populated.
    """
    # Extract texts from the DataFrame
    texts = df["Text"].tolist()

    # Generate summaries for the texts
    text_summaries, _ = generate_text_summaries(texts, tables=None, summarize_texts=True)

    # Assign the summaries back to the DataFrame
    df["Summary"] = text_summaries

    return df

def generate_text_summaries(texts, tables=None, summarize_texts=False):
    """
    Summarize text elements
    texts: List of str
    tables: List of str
    summarize_texts: Bool to summarize texts
    """

    # Prompt
    prompt_text = """You are an assistant tasked with summarizing tables and text for retrieval. \
    These summaries will be embedded and used to retrieve the raw text or table elements. \
    Give a concise summary of the table or text that is well optimized for retrieval. Table or text: {element} 
    Do not include "this is a summary" at the begining of the summary.
    The summary must be in French. """
    prompt = ChatPromptTemplate.from_template(prompt_text)

    # Text summary chain
    model = ChatOpenAI(temperature=0, model="gpt-4o-mini")
    summarize_chain = {"element": lambda x: x} | prompt | model | StrOutputParser()

    # Initialize empty summaries
    text_summaries = []
    table_summaries = []

    # Apply to text if texts are provided and summarization is requested
    if texts and summarize_texts:
        text_summaries = summarize_chain.batch(texts, {"max_concurrency": 5})
    elif texts:
        text_summaries = texts

    # Apply to tables if tables are provided
    if tables:
        table_summaries = summarize_chain.batch(tables, {"max_concurrency": 5})

    return text_summaries, table_summaries


In [5]:
df=process_json_files(data_dir=r'data\Family safe', processed_dir=r'data\Family safe')

Updated DataFrame saved to .\combined_data.csv


In [6]:
# del df
df.head(100)

Unnamed: 0,Doc name,Type,Author,Date,Text,Page_number,Summary
0,Pdf img,scan,John Doe,2025-01-23 15:43:26,"C'est le dernier, Isaac, dit Hovel, le plus so...",118,"Isaac, souvent appelé Louis, est l'ancêtre dir..."
1,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,POUR LA MÉMOIRE\nFAMILIALE\n\nFAMILLE HISTOIRE...,1,"Mémoires familiales de Jean-Georges Lambert, V..."
2,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,J'ai entrepris ce travail pour mes fils qui ti...,2,"L'auteur réalise ce travail pour ses fils, qui..."
3,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,VOLUME 1\n\nPage\n6- Préambule.\n\nTABLE DES M...,3,VOLUME 1\n\nCe volume comprend un préambule et...
4,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,"-Les sentiments de Bonaparte, puis de Napoléon...",4,Les sentiments de Bonaparte et de Napoléon env...
...,...,...,...,...,...,...,...
95,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,VOLUME 1\n\nPage\n6- Préambule.\n\nTABLE DES M...,3,VOLUME 1\n\nCe volume comprend un préambule et...
96,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,"-Les sentiments de Bonaparte, puis de Napoléon...",4,Les sentiments de Bonaparte et de Napoléon env...
97,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,PREAMBULLE\n\nLa mort de ma mère m'a profondém...,6,La narrateur évoque la profonde douleur ressen...
98,Pour la mémoire familiale 1-50,Unknown,Jean Lambert,2025-01-28 16:01:49,"vieillots, et quand je les entendais d'une ore...",7,L'auteur évoque sa prise de conscience tardive...


In [16]:
# Define LangChain Recursive Character Text Splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,  # Max length of a chunk
    chunk_overlap=120  # 20% overlap to maintain context
)

def split_text_dataframe(df):
    """Splits a DataFrame’s ‘Text’ column into chunks using LangChain’s text splitter."""
    documents = []

    for _, row in df.iterrows():
        text_chunks = text_splitter.split_text(row["Text"])  # Smart text splitting
        total_chunks = len(text_chunks)  # Ensure total chunk count is stored as an integer

        for i, chunk in enumerate(text_chunks):
            # Copy row data and create new chunk
            new_row = row.drop(labels=["word_count"], errors="ignore").copy()
            new_row["Text"] = chunk

            # 🔧 Ensure Chunk ID uses only integers
            new_row["Chunk_ID"] = f"{row['Doc name']}_Chunk{int(i+1)}"  
            new_row["Total_Chunks"] = int(total_chunks)  # Ensure integer storage

            # Convert into LangChain Document format
            documents.append(
                Document(
                    page_content=chunk,
                    metadata={
                        "Doc name": row["Doc name"],
                        "Author": row["Author"],
                        "Page_number": row["Page_number"],
                        "Chunk_ID": new_row["Chunk_ID"],
                        "Total_Chunks": new_row["Total_Chunks"],  # Ensure integer
                    }
                )
            )

    return documents


# Process DataFrame to Generate Chunks
docs = split_text_dataframe(df)


In [10]:
print(docs)

[Document(metadata={'Doc name': 'Pdf img', 'Author': 'John Doe', 'Page_number': 118, 'Chunk_ID': 'Pdf img_Chunk1', 'Total_Chunks': 8}, page_content="C'est le dernier, Isaac, dit Hovel, le plus souvent appelé Louis qui est notre ancêtre direct à la génération suivante. Il est né en 1766 à Froeningen, et s'est marié avec Rachel (ou Reiche ou Rosalie ou Thérèse) Gugenheim, née également à Froeningen en 1772, et décédée dans le même village en 1843, à 81 ans.\n\nDu ménage Louis et Rachel, je connais six enfants: Charlotte ex Judele, Jacques ex Jacob, marchand de bestiaux, Alexandre ex Samuel, marchand de bestiaux, Lehmann ou Clément, Marx, marchand de bétail, et Julie ex Sara, tous nés à Froeningen entre 1795 et 1802."), Document(metadata={'Doc name': 'Pdf img', 'Author': 'John Doe', 'Page_number': 118, 'Chunk_ID': 'Pdf img_Chunk2', 'Total_Chunks': 8}, page_content="Parmi eux, notre ancêtre direct est Lehmann, sur son prénom je note qu'en 1808 il devient Clément, mais que seul le prénom de

In [20]:
from langchain.text_splitter import NLTKTextSplitter
from langchain.schema import Document
import nltk

# Ensure NLTK tokenizer is available
nltk.download("punkt")

# Define a word-based text splitter with a max of 600 words per chunk
text_splitter = NLTKTextSplitter(chunk_size=600, chunk_overlap=120)

def split_text_dataframe(df):
    """Splits a DataFrame’s ‘Text’ column into chunks based on **words** (not characters)."""
    documents = []

    for _, row in df.iterrows():
        text_chunks = text_splitter.split_text(row["Text"])  # Word-based text splitting
        total_chunks = len(text_chunks)  # Store total chunk count as an integer

        for i, chunk in enumerate(text_chunks):
            # Copy row data and create new chunk
            new_row = row.drop(labels=["word_count"], errors="ignore").copy()
            new_row["Text"] = chunk

            # 🔧 Ensure Chunk ID uses only integers
            new_row["Chunk_ID"] = f"{row['Doc name']}_Chunk{int(i+1)}"
            new_row["Total_Chunks"] = int(total_chunks)  # Ensure integer storage

            # Convert into LangChain Document format
            documents.append(
                Document(
                    page_content=chunk,
                    metadata={
                        "Doc name": row["Doc name"],
                        "Author": row["Author"],
                        "Page_number": row["Page_number"],
                        "Chunk_ID": new_row["Chunk_ID"],
                        "Total_Chunks": new_row["Total_Chunks"],  # Ensure integer
                    }
                )
            )

    return documents

# Process DataFrame to Generate Chunks
docs2 = split_text_dataframe(df)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aurel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Created a chunk of size 927, which is longer than the specified 600
Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 689, which is longer than the specified 600
Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 689, which is longer than the specified 600
Created a chunk of size 927, which is longer than the specified 600
Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 689, which is longer than the specified 600
Created a chunk of size 927, which is longer than the specified 600
Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 689, which is longer than the specified 600
Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 689, whi

## Creating Pinecone DB

In [11]:
from pinecone import Pinecone

In [17]:
import pinecone as pc
from pinecone import Pinecone, ServerlessSpec

spec = ServerlessSpec(
    cloud="aws", region="us-east-1"
)

# connect to pinecone environment
pc = Pinecone(
    api_key = PINECONE_API_KEY,
    environment='us-east-1'  # find next to API key in console
)

In [21]:
import time

index_name = "memory-project3"
existing_indexes = [
    index_info["name"] for index_info in pc.list_indexes()
]

# check if index already exists (it shouldn't if this is first time)
if index_name not in existing_indexes:
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=1536,  # dimensionality of ada 002
        metric='dotproduct',
        spec=spec
    )
    # wait for index to be initialized
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# connect to index
index = pc.Index(index_name)
time.sleep(1)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [None]:
# Install missing packages
%pip install langchain

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [14]:
import os
from getpass import getpass
import torch

from langchain.embeddings.openai import OpenAIEmbeddings

# get API key from top-right dropdown on OpenAI website
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or getpass("Enter your OpenAI API key: ")
model_name = 'text-embedding-3-small'

# set device to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'

embed = OpenAIEmbeddings(
    model=model_name,
    openai_api_key=OPENAI_API_KEY,
    # device=device -> Not compatible with OpenAI embeddings
)

  embed = OpenAIEmbeddings(


In [22]:
# Indexing v1

from langchain.vectorstores import Pinecone as PineconeVectorStore

vectorstore = PineconeVectorStore.from_documents(
    docs2,
    embed,  # Your embedding function
    index_name=index_name
)


## Querying the Vector Store

In [15]:
# # Install the langgraph package
# %pip install langgraph

In [None]:
query = "Qui est Jean?"

vectorstore.similarity_search(query, k=5) 

[Document(metadata={'Author': 'Jean Lambert', 'Chunk_ID': 'Pour la mémoire familiale 1-50_Page12_Chunk3', 'Doc name': 'Pour la mémoire familiale 1-50', 'Page_number': 12.0}, page_content="des juifs en France depuis le début de l'ère chrétienne.\nTrès ignorant de toutes ces questions, j'ai lu pendant longtemps pour ma seule information.\nMais accumulant jour après jour beaucoup d'éléments intéressants, dont certains remettaient en cause des\nidées communément admises, je ne posais de plus en plus la question de décider si je devais ajouter un\nrésumé de tout cela à mon projet. J'avais le souci de situer le cadre de l'histoire de notre famille, et un\nrésumé de ses lectures ne pouvait qu'y participer."),
 Document(metadata={'Author': 'Jean Lambert', 'Chunk_ID': 'Pour la mémoire familiale 1-50_Page12_Chunk3', 'Doc name': 'Pour la mémoire familiale 1-50', 'Page_number': 12.0}, page_content="des juifs en France depuis le début de l'ère chrétienne.\nTrès ignorant de toutes ces questions, j'a

In [17]:
from langchain.vectorstores import Pinecone

text_field = "Summary"  # the metadata field that contains our text

# initialize the vector store object
vectorstore = Pinecone(
    index, embed.embed_query, text_field
)

  vectorstore = Pinecone(


In [None]:
query = "What happened in 1963?"

vectorstore.similarity_search(
    query,  # our search query
    k=3  # return 3 most relevant docs
)

Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.


[]

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA

# chat completion llm
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name='gpt-4o-mini',
    temperature=0.0
)
# conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)
# retrieval qa chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

  llm = ChatOpenAI(
  conversational_memory = ConversationBufferWindowMemory(


In [None]:
qa.run(query)

  qa.run(query)
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.


'Many significant events occurred in 1963, including:\n\n1. The assassination of U.S. President John F. Kennedy on November 22 in Dallas, Texas.\n2. The March on Washington for Jobs and Freedom on August 28, where Martin Luther King Jr. delivered his famous "I Have a Dream" speech.\n3. The establishment of the Partial Nuclear Test Ban Treaty, which was signed by the United States, the United Kingdom, and the Soviet Union in August.\n4. The publication of "The Feminine Mystique" by Betty Friedan, which is often credited with sparking the second wave of feminism in the United States.\n\nThese are just a few highlights from that year. If you have a specific event or topic in mind, feel free to ask!'

## Tool query to be edited

In [21]:
from langchain.agents import Tool

tools = [
    Tool(
        name='Knowledge Base',
        func=qa.run,
        description=(
            'use this tool when answering general knowledge queries to get '
            'more information about the topic'
        )
    )
]

In [22]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

  agent = initialize_agent(


In [None]:
agent("Who si Rachel Gugenheim?")

  agent("Who si Rachel Gugenheim?")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Knowledge Base",
    "action_input": "Rachel Gugenheim"
}
```[0m

Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.



Observation: [36;1m[1;3mI'm sorry, but I don't have any information on Rachel Gugenheim.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "I'm sorry, but I don't have any information on Rachel Gugenheim."
}
```[0m

[1m> Finished chain.[0m


{'input': 'Who si Rachel Gugenheim?',
 'chat_history': [],
 'output': "I'm sorry, but I don't have any information on Rachel Gugenheim."}

In [None]:
agent("What can you tell me about her husband?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Knowledge Base",
    "action_input": "Rachel Gugenheim husband"
}
```[0m

Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.



Observation: [36;1m[1;3mI don't know.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "I don't have any information about Rachel Gugenheim's husband."
}
```[0m

[1m> Finished chain.[0m


{'input': 'What can you tell me about her husband?',
 'chat_history': [HumanMessage(content='Who si Rachel Gugenheim?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I'm sorry, but I don't have any information on Rachel Gugenheim.", additional_kwargs={}, response_metadata={})],
 'output': "I don't have any information about Rachel Gugenheim's husband."}

In [None]:
agent("What's the common point between Jacques Dreyfus and Sleeping Beauty?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Knowledge Base",
    "action_input": "common point between Jacques Dreyfus and Sleeping Beauty"
}
```[0m

Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.



Observation: [36;1m[1;3mI don't know.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "I don't have information on a common point between Jacques Dreyfus and Sleeping Beauty."
}
```[0m

[1m> Finished chain.[0m


{'input': "What's the common point between Jacques Dreyfus and Sleeping Beauty?",
 'chat_history': [HumanMessage(content='Who si Rachel Gugenheim?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I'm sorry, but I don't have any information on Rachel Gugenheim.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What can you tell me about her husband?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I don't have any information about Rachel Gugenheim's husband.", additional_kwargs={}, response_metadata={})],
 'output': "I don't have information on a common point between Jacques Dreyfus and Sleeping Beauty."}

In [None]:
agent("Who died in 1963?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Knowledge Base",
    "action_input": "notable deaths in 1963"
}
```[0m

Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.
Found document with no `Summary` key. Skipping.



Observation: [36;1m[1;3mSome notable deaths in 1963 include:

1. **Pablo Picasso** (April 8, 1881 – April 8, 1973) - The renowned Spanish painter and sculptor passed away on April 8, 1973, but he is often associated with the cultural shifts of the early 1960s.
2. **Alfred Hitchcock** (August 13, 1899 – April 29, 1980) - The famous film director known for his suspenseful movies was still active in the 1960s.
3. **C.S. Lewis** (November 29, 1898 – November 22, 1963) - The British writer and theologian, best known for "The Chronicles of Narnia," died on November 22, 1963.
4. **John F. Kennedy** (May 29, 1917 – November 22, 1963) - The 35th President of the United States was assassinated on November 22, 1963, in Dallas, Texas.
5. **Robert Frost** (March 26, 1874 – January 29, 1963) - The American poet passed away on January 29, 1963.

These individuals had significant impacts in their respective fields.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input":

{'input': 'Who died in 1963?',
 'chat_history': [HumanMessage(content='Who si Rachel Gugenheim?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I'm sorry, but I don't have any information on Rachel Gugenheim.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What can you tell me about her husband?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I don't have any information about Rachel Gugenheim's husband.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="What's the common point between Jacques Dreyfus and Sleeping Beauty?", additional_kwargs={}, response_metadata={}),
  AIMessage(content="I don't have information on a common point between Jacques Dreyfus and Sleeping Beauty.", additional_kwargs={}, response_metadata={})],
 'output': 'Notable deaths in 1963 include C.S. Lewis, who died on November 22, and John F. Kennedy, the 35th President of the United States, who was assassinated on the same day. Addit

In [None]:
# Convert vector store to retriever
retriever = vector_store.as_retriever()


# Configure Retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"  # Use "stuff" or "map_reduce" depending on complexity
)

# Query the chain
query = "Why is my agent not behaving like I want it? I thought coding was easy and magic!"
response = qa_chain.run(query)
print("Response:", response)

NameError: name 'vector_store' is not defined