In [44]:
from langchain_groq import ChatGroq
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Chroma, FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
import os
load_dotenv()
groq_key = os.getenv("GROQ")
groq_key

'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'

In [2]:
groq_model = ChatGroq(
    model="llama3-70b-8192",
    groq_api_key=groq_key)

In [3]:
messages = [
    ("system", "You are a helpful assistant."),
    ("human", "Tell me about my pet dog")
]

In [4]:
groq_model.invoke(messages).content

"I'm happy to help! However, I have to admit that I don't have any information about your pet dog. You see, I'm a large language model, I don't have the ability to know about your personal life or your pets unless you tell me about them.\n\nBut I'd love to learn more about your furry friend! Can you tell me a bit about your dog? What's their name, breed, age, and personality like? Do they have any fun quirks or habits?"

### Addind context from a txt file

In [15]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter

In [6]:
loader = TextLoader(
    "D:/training/ml_algorithms/my_pet.txt"
)
text_file = loader.load()
text_file

[Document(metadata={'source': 'D:/training/ml_algorithms/my_pet.txt'}, page_content="Micky, my German Shepherd, is the perfect mix of calm and playful energy. He has \na friendly nature that makes him a joy to be around, always greeting everyone \nwith a wagging tail. Despite his large and strong build, he is incredibly gentle \nand loves to spend time playing, whether it's chasing a ball or running around \nthe yard. His intelligence and loyalty make him not just a pet but a true \ncompanion. Whether he's lounging by my side or eagerly waiting for our next \ngame, Micky brings happiness and warmth to every moment.\n\nMicky also has a curious and playful side that makes every day exciting. He loves exploring new places, sniffing around for hidden treasures, and learning new tricks. His boundless energy comes alive during playtime, especially when he’s chasing after his favorite toy or running in the park. But after all the fun, he enjoys curling up beside me, showing his affectionate a

In [16]:
text_splitter_txt = CharacterTextSplitter(
    separator="\n",
    chunk_size=25,
    chunk_overlap=2,
    length_function=len,
    is_separator_regex=False
)
texts_txt = text_splitter_txt.split_text(text_file[0].page_content)
texts_txt

Created a chunk of size 81, which is longer than the specified 25
Created a chunk of size 78, which is longer than the specified 25
Created a chunk of size 81, which is longer than the specified 25
Created a chunk of size 79, which is longer than the specified 25
Created a chunk of size 74, which is longer than the specified 25
Created a chunk of size 76, which is longer than the specified 25
Created a chunk of size 56, which is longer than the specified 25


['Micky, my German Shepherd, is the perfect mix of calm and playful energy. He has',
 'a friendly nature that makes him a joy to be around, always greeting everyone',
 'with a wagging tail. Despite his large and strong build, he is incredibly gentle',
 "and loves to spend time playing, whether it's chasing a ball or running around",
 'the yard. His intelligence and loyalty make him not just a pet but a true',
 "companion. Whether he's lounging by my side or eagerly waiting for our next",
 'game, Micky brings happiness and warmth to every moment.',
 'Micky also has a curious and playful side that makes every day exciting. He loves exploring new places, sniffing around for hidden treasures, and learning new tricks. His boundless energy comes alive during playtime, especially when he’s chasing after his favorite toy or running in the park. But after all the fun, he enjoys curling up beside me, showing his affectionate and loyal nature. His expressive eyes and wagging tail always brighten 

In [27]:
reccursive_text_splitter_txt = RecursiveCharacterTextSplitter(
    chunk_size=25,
    chunk_overlap=2
)
texts_txt_reccursive = reccursive_text_splitter_txt.split_text(text_file[0].page_content)
texts_txt_reccursive
texts_txt_reccursive_chunks = reccursive_text_splitter_txt.split_documents(text_file)


In [33]:
# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

  embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")


In [35]:
# Create ChromaDB vector store
vector_db_reccursive = Chroma.from_documents(
    documents=texts_txt_reccursive_chunks,
    embedding=embedding_model  # This now uses the correct embedding class
)

In [47]:
question_txt = "What is the nature of my pet dog?"
response_txt_similarity_search = vector_db_reccursive.similarity_search(
    question_txt,
    k=2
)

response_txt_ = vector_db_reccursive.similarity_search(
    question_txt,
    k=2
)
print(response_txt_similarity_search)

[Document(id='46436470-3ba1-49bb-a591-6ac62f924761', metadata={'source': 'D:/training/ml_algorithms/my_pet.txt'}, page_content='a friendly nature that'), Document(id='7c35ce78-5ab9-417b-ae26-6e90f5b9fbce', metadata={'source': 'D:/training/ml_algorithms/my_pet.txt'}, page_content='him not just a pet but a')]


In [49]:

# Create ChromaDB vector store using FAISS
vector_db_reccursive = FAISS.from_documents(
    documents=texts_txt_reccursive_chunks,
    embedding=embedding_model  # This now uses the correct embedding class
)
retriver = vector_db_reccursive.as_retriever(search_kwargs={"k": 2})
print(retriver.invoke(question_txt))

[Document(id='f18de685-fae9-495c-a5c5-8d8af4f2896a', metadata={'source': 'D:/training/ml_algorithms/my_pet.txt'}, page_content='a friendly nature that'), Document(id='823807d0-0fa3-4a5f-a91c-4b16ee7ee7f5', metadata={'source': 'D:/training/ml_algorithms/my_pet.txt'}, page_content='him not just a pet but a')]


## Adding context from a csv file

In [11]:
from langchain_community.document_loaders import CSVLoader

In [13]:
csv_loader = CSVLoader(
    "D:/training/ml_algorithms/people.csv"
)
csv_data = csv_loader.load()
csv_data

[Document(metadata={'source': 'D:/training/ml_algorithms/people.csv', 'row': 0}, page_content='Index: 1\nUser Id: 88F7B33d2bcf9f5\nFirst Name: Shelby\nLast Name: Terrell\nSex: Male\nEmail: elijah57@example.net\nPhone: 001-084-906-7849x73518\nDate of birth: 1945-10-26\nJob Title: Games developer'),
 Document(metadata={'source': 'D:/training/ml_algorithms/people.csv', 'row': 1}, page_content='Index: 2\nUser Id: f90cD3E76f1A9b9\nFirst Name: Phillip\nLast Name: Summers\nSex: Female\nEmail: bethany14@example.com\nPhone: 214.112.6044x4913\nDate of birth: 1910-03-24\nJob Title: Phytotherapist'),
 Document(metadata={'source': 'D:/training/ml_algorithms/people.csv', 'row': 2}, page_content='Index: 3\nUser Id: DbeAb8CcdfeFC2c\nFirst Name: Kristine\nLast Name: Travis\nSex: Male\nEmail: bthompson@example.com\nPhone: 277.609.7938\nDate of birth: 1992-07-02\nJob Title: Homeopath'),
 Document(metadata={'source': 'D:/training/ml_algorithms/people.csv', 'row': 3}, page_content='Index: 4\nUser Id: A31Be

## Adding context from a HTML file

In [14]:
from langchain_community.document_loaders import UnstructuredHTMLLoader

In [15]:
html_loader = UnstructuredHTMLLoader(
    "D:/training/ml_algorithms/pricing.html"
)
html_data = html_loader.load()
html_data

[Document(metadata={'source': 'D:/training/ml_algorithms/pricing.html'}, page_content="Pricing Page\n\nPricing\n\nOur Pricing Plans\n\nThere are many variations of passages of Lorem Ipsum available but the majority have suffered alteration in some form.\n\nSTARTING FROM\n\n$ 19.99/mo\n\n5 User\n\nAll UI components\n\nLifetime access\n\nFree updates\n\nUse on 1 (one) project\n\n4 Months support\n\nPurchase Now\n\nPOPULAR\n\nSTARTING FROM\n\n$ 30.99/mo\n\n5 User\n\nAll UI components\n\nLifetime access\n\nFree updates\n\nUse on 1 (one) project\n\n4 Months support\n\nPurchase Now\n\nSTARTING FROM\n\n$ 70.99/mo\n\n5 User\n\nAll UI components\n\nLifetime access\n\nFree updates\n\nUse on 1 (one) project\n\n4 Months support\n\nPurchase Now\n\nshape\n\nFAQ\n\nAny Questions? Answered\n\nThere are many variations of passages of Lorem Ipsum available but the majority have suffered alteration in some form.\n\nLorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has

## Adding context from a pdf file

In [16]:
from langchain_community.document_loaders import PyPDFLoader

In [18]:
pdf_loader = PyPDFLoader(
    "D:/training/ml_algorithms/my_pet.pdf"
)
pdf_data = pdf_loader.load()
pdf_data

[Document(metadata={'producer': 'LibreOffice 24.8.1.2 (X86_64) / LibreOffice Community', 'creator': 'Writer', 'creationdate': '2025-03-15T16:30:21+00:00', 'source': 'D:/training/ml_algorithms/my_pet.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content="Micky, my German Shepherd, is the perfect mix of calm and playful energy. He has \na friendly nature that makes him a joy to be around, always greeting everyone \nwith a wagging tail. Despite his large and strong build, he is incredibly gentle \nand loves to spend time playing, whether it's chasing a ball or running around \nthe yard. His intelligence and loyalty make him not just a pet but a true \ncompanion. Whether he's lounging by my side or eagerly waiting for our next \ngame, Micky brings happiness and warmth to every moment.")]

## Adding context through wikipedia

In [19]:
from langchain_community.document_loaders import WikipediaLoader

In [20]:
wiki_loader = WikipediaLoader(
    query="Tesla",
    load_max_docs=1
)
wiki_data = wiki_loader.load()
wiki_data

[Document(metadata={'title': 'Tesla, Inc.', 'summary': 'Tesla, Inc. (  TESS-lə or  TEZ-lə) is an American multinational automotive and clean energy company. Headquartered in Austin, Texas, it designs, manufactures and sells battery electric vehicles (BEVs), stationary battery energy storage devices from home to grid-scale, solar panels and solar shingles, and related products and services.\nTesla was incorporated in July 2003 by Martin Eberhard and Marc Tarpenning as Tesla Motors. Its name is a tribute to inventor and electrical engineer Nikola Tesla. In February 2004, Elon Musk led Tesla\'s first funding round and became the company\'s chairman; in 2008, he was named chief executive officer. In 2008, the company began production of its first car model, the Roadster sports car, followed by the Model S sedan in 2012, the Model X SUV in 2015, the Model 3 sedan in 2017, the Model Y crossover in 2020, the Tesla Semi truck in 2022 and the Cybertruck pickup truck in 2023.\nTesla is one of th