### InMemoryVectorStore

In-memory vector store implementation

Uses a dictionary, and computes cosine similarity for search using numpy

In [6]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

from langchain.chat_models import init_chat_model

llm = llm=init_chat_model("openai:gpt-4o-mini")
llm


ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002096EC63140>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002096E3572F0>, root_client=<openai.OpenAI object at 0x000002096E2450D0>, root_async_client=<openai.AsyncOpenAI object at 0x000002096E21A480>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [7]:
from langchain_openai import OpenAIEmbeddings

from langchain_core.vectorstores import InMemoryVectorStore

vector_store=InMemoryVectorStore(embedding=OpenAIEmbeddings())

In [8]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [9]:
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [10]:
vector_store.add_documents(documents=documents)

['fa51ef6d-fa41-4695-aefc-6679eac2a2ed',
 '454896d5-bf3d-4501-8d66-060e886bcbd1',
 'f7cabb0b-8ca9-4cf4-9ee7-1d99444d77dc',
 '052fc3a9-c61e-4ba2-8be3-a3e80d32bbf4',
 '9bbed83c-751c-47fb-bbc4-c9ef6cd4090f',
 '559aaff6-5381-4ea0-9351-eb9f227ee093',
 '87c2a859-5e2d-4750-8b5d-233a8f0cfdc8',
 'c6531385-2968-464e-855c-79ef3b1b7198',
 'f710f0bc-47f2-46f8-b339-70a4a07de7d6',
 '07262d38-30e6-465b-acb1-b2ddd15f258e']

In [11]:
vector_store.similarity_search("hows weather forecast")

[Document(id='454896d5-bf3d-4501-8d66-060e886bcbd1', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='f710f0bc-47f2-46f8-b339-70a4a07de7d6', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(id='07262d38-30e6-465b-acb1-b2ddd15f258e', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='87c2a859-5e2d-4750-8b5d-233a8f0cfdc8', metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.')]

In [12]:
vector_store.similarity_search("hows weather forecast", k=2)

[Document(id='454896d5-bf3d-4501-8d66-060e886bcbd1', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='f710f0bc-47f2-46f8-b339-70a4a07de7d6', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.')]

In [13]:
retriever = vector_store.as_retriever(search_kwargs={"k":2})

In [14]:
retriever.invoke("hows weather forecast")

[Document(id='454896d5-bf3d-4501-8d66-060e886bcbd1', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='f710f0bc-47f2-46f8-b339-70a4a07de7d6', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.')]