## InMemory VectorStore
### In-Memory Vector store implementation
### Uses dictionary and computes cosine similarity for search using numpy


In [12]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

from langchain.chat_models import init_chat_model
llm = init_chat_model("openai:gpt-4o-mini")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x130bf4690>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x130bf4e10>, root_client=<openai.OpenAI object at 0x130bf4b90>, root_async_client=<openai.AsyncOpenAI object at 0x130bf4a50>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [13]:
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embedding=OpenAIEmbeddings())

In [14]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [15]:
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [16]:
vector_store.add_documents(documents=documents)

['d0d11829-8127-4c67-8eb3-7d8a456f40a8',
 'b748f7cc-4f8f-4e0f-853b-f0e3fe19eac2',
 '7a4aa942-2359-402c-8445-b22adc5f66bc',
 '3288e0d7-fd61-427f-a66c-64826505bcb6',
 '1b8af252-877a-404c-be81-2fe97daa2b1b',
 'bcd00a59-af12-4d3b-a5c7-6ac0d456e8fe',
 '313f6a64-9919-4664-8222-2543e68f921d',
 '687a56d9-dfa4-4897-83be-8f8022d19607',
 '6a198b84-6a7a-4593-8914-bda533c3ac5d',
 '1a1db1a1-7ddf-4318-8ccc-4c55d945890a']

In [17]:
vector_store.similarity_search("Hows the weather forecast ", k=2)

[Document(id='b748f7cc-4f8f-4e0f-853b-f0e3fe19eac2', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='6a198b84-6a7a-4593-8914-bda533c3ac5d', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.')]

In [21]:
### Vectorstore to retriever
retriever = vector_store.as_retriever(search_kwargs={"k":2})

retriever

VectorStoreRetriever(tags=['InMemoryVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_core.vectorstores.in_memory.InMemoryVectorStore object at 0x130bf5e50>, search_kwargs={'k': 2})

In [22]:
## Invoke
retriever.invoke("hows the wweather forecast")

[Document(id='b748f7cc-4f8f-4e0f-853b-f0e3fe19eac2', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='6a198b84-6a7a-4593-8914-bda533c3ac5d', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.')]