In [None]:
import os 
from dotenv import load_dotenv, find_dotenv
_= load_dotenv(find_dotenv())
openai_api_key = os.getenv('OPENAI_API_KEY')

In [1]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")

## LangChian Documents

Having 2 attributes 
page_content
metadata

In [2]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="John F. Kennedy served as the 35th president of the United States from 1961 until his assassination in 1963.",
        metadata={"source": "us-presidents-doc"},
    ),
    Document(
        page_content="Robert F. Kennedy was a key political figure and served as the U.S. Attorney General; he was also assassinated.",
        metadata={"source": "us-politics-doc"},
    ),
    Document(
        page_content="The Kennedy family is known for their significant influence in American politics and their extensive public service.",
        metadata={"source": "kennedy-family-doc"},
    ),
    Document(
        page_content="Edward M. Kennedy, often known as Ted Kennedy, was a U.S. Senator who played a major role in American politics.",
        metadata={"source": "us-senators-doc"},
    ),
    Document(
        page_content="Jacqueline Kennedy Onassis, wife of John F. Kennedy, was an iconic First Lady known for her fashion and cultural influence.",
        metadata={"source": "first-lady-doc"},
    ),
]

## VectorStore vs Retrievers

VectorStore is a specialized storage space where information is kept in a very specific format.

Retrievers are more about actively finding information


In [3]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
vectorstore = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings(),
    
)

## similartity_search() function returns documents based on similarity to a stirng query


In [4]:
vectorstore.similarity_search("John")

[Document(id='60ac6a2a-664a-4306-9e12-ff04f81fd670', metadata={'source': 'us-presidents-doc'}, page_content='John F. Kennedy served as the 35th president of the United States from 1961 until his assassination in 1963.'),
 Document(id='c42f552e-b78d-43cb-90b3-1a8b4132ce62', metadata={'source': 'us-senators-doc'}, page_content='Edward M. Kennedy, often known as Ted Kennedy, was a U.S. Senator who played a major role in American politics.'),
 Document(id='2f05af02-b155-42cd-9d83-bc4c72ef401d', metadata={'source': 'us-politics-doc'}, page_content='Robert F. Kennedy was a key political figure and served as the U.S. Attorney General; he was also assassinated.'),
 Document(id='8c814c06-6cc6-427c-8bd2-7d4e8fa847fe', metadata={'source': 'first-lady-doc'}, page_content='Jacqueline Kennedy Onassis, wife of John F. Kennedy, was an iconic First Lady known for her fashion and cultural influence.')]

In [6]:
vectorstore.similarity_search_with_score("John")

[(Document(id='60ac6a2a-664a-4306-9e12-ff04f81fd670', metadata={'source': 'us-presidents-doc'}, page_content='John F. Kennedy served as the 35th president of the United States from 1961 until his assassination in 1963.'),
  0.4501720666885376),
 (Document(id='c42f552e-b78d-43cb-90b3-1a8b4132ce62', metadata={'source': 'us-senators-doc'}, page_content='Edward M. Kennedy, often known as Ted Kennedy, was a U.S. Senator who played a major role in American politics.'),
  0.46143385767936707),
 (Document(id='2f05af02-b155-42cd-9d83-bc4c72ef401d', metadata={'source': 'us-politics-doc'}, page_content='Robert F. Kennedy was a key political figure and served as the U.S. Attorney General; he was also assassinated.'),
  0.47088050842285156),
 (Document(id='8c814c06-6cc6-427c-8bd2-7d4e8fa847fe', metadata={'source': 'first-lady-doc'}, page_content='Jacqueline Kennedy Onassis, wife of John F. Kennedy, was an iconic First Lady known for her fashion and cultural influence.'),
  0.4750681221485138)]

## Retrievers 

In [10]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)

retriever.batch(["John", "Robert"])

[[Document(id='60ac6a2a-664a-4306-9e12-ff04f81fd670', metadata={'source': 'us-presidents-doc'}, page_content='John F. Kennedy served as the 35th president of the United States from 1961 until his assassination in 1963.')],
 [Document(id='2f05af02-b155-42cd-9d83-bc4c72ef401d', metadata={'source': 'us-politics-doc'}, page_content='Robert F. Kennedy was a key political figure and served as the U.S. Attorney General; he was also assassinated.')]]

In [11]:
retriever = vectorstore.as_retriever(
    serach_type="similarity",
    search_kwargs={"k": 1},
)

retriever.batch(["John", "Robert"])

[[Document(id='60ac6a2a-664a-4306-9e12-ff04f81fd670', metadata={'source': 'us-presidents-doc'}, page_content='John F. Kennedy served as the 35th president of the United States from 1961 until his assassination in 1963.')],
 [Document(id='2f05af02-b155-42cd-9d83-bc4c72ef401d', metadata={'source': 'us-politics-doc'}, page_content='Robert F. Kennedy was a key political figure and served as the U.S. Attorney General; he was also assassinated.')]]

## Retrievers with Runnables

LangChain VectorStore objects are not runnables , so they cannot immediately be intergrated into LangChain Exp Lan chains, On contrary , LangChain Retrievers are runnables

In [13]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

message = """
Answer this question based on the context below.

{question}

Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", message)])

chain = {
    "context": retriever,
    "question": RunnablePassthrough()
} | prompt | ChatOpenAI()


In [14]:
response = chain.invoke("tell me about John")
print(response.content)

John F. Kennedy served as the 35th president of the United States from 1961 until his assassination in 1963.
