In [1]:
from dotenv import load_dotenv
import os
load_dotenv()
from langchain_groq import ChatGroq

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
len(embeddings.embed_query("Hello sir"))

768

In [3]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [4]:
index = faiss.IndexFlatL2(768) ## if we want we can create co-sine then we can


In [5]:
## create VectorStore

vectoStore = FAISS(
    index=index,
    embedding_function=embeddings,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
    
)

In [6]:
vectoStore.add_texts(["Agentic ai is the future", "Agentic ai is most powerful", "Dhaka is the capital of BD"])

['3683e169-2519-4584-854a-b4003d1f398a',
 '42c280ae-5bf0-449f-af3e-9d469b42b634',
 'b0ef36ef-eea2-4cb1-b534-1a03b4d7d147']

In [7]:
vectoStore.index_to_docstore_id

{0: '3683e169-2519-4584-854a-b4003d1f398a',
 1: '42c280ae-5bf0-449f-af3e-9d469b42b634',
 2: 'b0ef36ef-eea2-4cb1-b534-1a03b4d7d147'}

In [8]:
vectoStore.similarity_search("tell me about agentic ai.", k=2)

[Document(id='3683e169-2519-4584-854a-b4003d1f398a', metadata={}, page_content='Agentic ai is the future'),
 Document(id='42c280ae-5bf0-449f-af3e-9d469b42b634', metadata={}, page_content='Agentic ai is most powerful')]

In [9]:
retriever = vectoStore.as_retriever()

In [10]:
retriever.invoke("Tell me about agentic ai")

[Document(id='3683e169-2519-4584-854a-b4003d1f398a', metadata={}, page_content='Agentic ai is the future'),
 Document(id='42c280ae-5bf0-449f-af3e-9d469b42b634', metadata={}, page_content='Agentic ai is most powerful'),
 Document(id='b0ef36ef-eea2-4cb1-b534-1a03b4d7d147', metadata={}, page_content='Dhaka is the capital of BD')]

In [11]:
from langchain.schema import Document

document_1 = Document(
    page_content="AI tools are changing how students learn by adapting to their individual needs.",
    metadata={"source": "news"},
)

document_2 = Document(
    page_content="Just tested an AI tutor that gives instant feedback—crazy how far tech has come!",
    metadata={"source": "tweet"},
)

document_3 = Document(
    page_content="Personalized learning paths powered by LLMs can boost student performance.",
    metadata={"source": "blog"},
)

document_4 = Document(
    page_content="AI-based systems can detect when students are struggling and offer support.",
    metadata={"source": "article"},
)

document_5 = Document(
    page_content="LangChain’s new educational framework makes building AI tutors easier.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Every failure brings you one step closer to success.",
    metadata={"source": "tweet"},
)

document_7 = Document(
    page_content="OpenAI's latest model showcases improved reasoning and instruction-following.",
    metadata={"source": "research"},
)

document_8 = Document(
    page_content="Studying late-night boosts retention more than cramming in the morning.",
    metadata={"source": "study"},
)

document_9 = Document(
    page_content="Started using LangGraph to manage agent memory. Game changer!",
    metadata={"source": "tweet"},
)

document_10 = Document(
    page_content="Python remains the most popular language in data science.",
    metadata={"source": "article"},
)

documents = [document_1, document_2, document_3, document_4, document_5, document_6, document_7, document_8, document_9, document_10]

## Now create the Vector Store

In [12]:
index = faiss.IndexFlatIP(768)
vector_store = FAISS(
 embedding_function=embeddings,
 index=index,
 docstore=InMemoryDocstore(),
 index_to_docstore_id={}
)

In [13]:
vector_store.add_documents(documents=documents)

['d13f993c-5059-4969-911e-db9d19aca132',
 '7170ebbd-3063-4dba-aaa1-4263f0d9dc09',
 '1d3b2379-ba9e-400e-83aa-028c453c1c70',
 '153a243c-3483-43a6-b2f1-2b04098019af',
 '0a548e0a-7bf3-43c5-bea6-d097a296a392',
 'db8c89c7-d76a-4e8c-b210-6de39f6705ba',
 'b564229e-add2-4c47-87d9-ebbacdb19055',
 '0ec73072-1360-4761-bbd5-e485393ca907',
 '6ed019b8-13c2-4420-b142-8616be6a750f',
 '39b46754-047d-411a-bf3c-13255067173c']

In [17]:
vector_store.similarity_search_with_score("How to boost student performance in study?", k=2, filter={"source": "tweet"})

[(Document(id='7170ebbd-3063-4dba-aaa1-4263f0d9dc09', metadata={'source': 'tweet'}, page_content='Just tested an AI tutor that gives instant feedback—crazy how far tech has come!'),
  0.1969367),
 (Document(id='0a548e0a-7bf3-43c5-bea6-d097a296a392', metadata={'source': 'tweet'}, page_content='LangChain’s new educational framework makes building AI tutors easier.'),
  0.19558191)]

In [18]:
retriever = vector_store.as_retriever(
    search_kwargs={
        "k": 3
    }
)

In [19]:
retriever.invoke("How to boost student performance in study?")

[Document(id='0ec73072-1360-4761-bbd5-e485393ca907', metadata={'source': 'study'}, page_content='Studying late-night boosts retention more than cramming in the morning.'),
 Document(id='153a243c-3483-43a6-b2f1-2b04098019af', metadata={'source': 'article'}, page_content='AI-based systems can detect when students are struggling and offer support.'),
 Document(id='1d3b2379-ba9e-400e-83aa-028c453c1c70', metadata={'source': 'blog'}, page_content='Personalized learning paths powered by LLMs can boost student performance.')]

## Save the vector DB

In [20]:
vector_store.save_local("Local-faissDB")

In [25]:
local_retriever = FAISS.load_local(
    "Local-faissDB",
    embeddings,
    allow_dangerous_deserialization=True
)

In [26]:
local_new_retriever = local_retriever.as_retriever() ## default search result is 4


In [27]:
local_new_retriever.invoke("How to boost student performance in study?")

[Document(id='0ec73072-1360-4761-bbd5-e485393ca907', metadata={'source': 'study'}, page_content='Studying late-night boosts retention more than cramming in the morning.'),
 Document(id='153a243c-3483-43a6-b2f1-2b04098019af', metadata={'source': 'article'}, page_content='AI-based systems can detect when students are struggling and offer support.'),
 Document(id='1d3b2379-ba9e-400e-83aa-028c453c1c70', metadata={'source': 'blog'}, page_content='Personalized learning paths powered by LLMs can boost student performance.'),
 Document(id='d13f993c-5059-4969-911e-db9d19aca132', metadata={'source': 'news'}, page_content='AI tools are changing how students learn by adapting to their individual needs.')]