In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
groq_api_key = os.getenv("GROQ_API_KEY")

In [4]:
from langchain_groq import ChatGroq
model = ChatGroq(model_name="qwen-2.5-32b")

In [5]:
from langchain_core.messages import HumanMessage
model.invoke([HumanMessage(content="Assalamualikum, i'm Al almin and i'm student of North south university and major on AI")]).content

"Wa Alaikum Assalam, Al Almin! It's great to hear that you are a student at North South University pursuing a major in Artificial Intelligence. AI is a fascinating and rapidly growing field with a lot of potential for innovation and solving real-world problems. How are you finding your studies so far? Is there any particular area within AI that interests you the most?"

In [6]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Hey there i'm alamin and i'm student of North South University and majoring in CSE.",
        metadata={"source": "nsu-database"}
    )
]

In [9]:
type(documents,)

list

In [10]:
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [12]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [13]:
vectorStore = Chroma.from_documents(documents=documents, embedding=embeddings)
vectorStore

  attn_output = torch.nn.functional.scaled_dot_product_attention(


<langchain_chroma.vectorstores.Chroma at 0x17059723380>

In [14]:
retriever = vectorStore.as_retriever()

In [17]:
retriever.invoke("Who is alamin?")

[Document(metadata={'source': 'nsu-database'}, page_content="Hey there i'm alamin and i'm student of North South University and majoring in CSE."),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')]

In [18]:
vectorStore.similarity_search_with_relevance_scores("who is alamin")

  vectorStore.similarity_search_with_relevance_scores("who is alamin")


[(Document(metadata={'source': 'nsu-database'}, page_content="Hey there i'm alamin and i'm student of North South University and majoring in CSE."),
  0.1734990362959128),
 (Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
  -0.29094886616940574),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  -0.3090863409575839),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  -0.33724515620989437)]

## Why retriever
- vectorstore can not be converted into the chain so dynamic query its need to converted into dynamic query supported so retriever play this role and help us

In [19]:
retriever = vectorStore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriever.batch("alamin")

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'nsu-database'}, page_content="Hey there i'm alamin and i'm student of North South University and majoring in CSE.")],
 [Document(metadata={'source': 'nsu-database'}, page_content="Hey there i'm alamin and i'm student of North South University and majoring in CSE.")],
 [Document(metadata={'source': 'nsu-database'}, page_content="Hey there i'm alamin and i'm student of North South University and majoring in CSE.")]]

In [20]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("human", message)])
rag_chain = {"context": retriever, "question": RunnablePassthrough()}|prompt|model

In [23]:
response = rag_chain.invoke("tell me about alamin")
response.content

'Alamin is a student at North South University, majoring in Computer Science and Engineering (CSE).'