In [1]:
#!pip install langchain langchain-community langchain-google-genai langchain-core langchain-text-splitters chromadb python-dotenv

In [2]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from dotenv import load_dotenv
import os


True

In [3]:

# Step 1: Load laptop information from the text file
document_loader = TextLoader("laptops_info.txt")
raw_documents = document_loader.load()



In [None]:
# Step 2: Split the raw documents into smaller chunks
doc_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = doc_splitter.split_documents(raw_documents)
print(f"Total number of document chunks: {len(split_docs)}")

print("--- Sample Document Chunk ---")
print(split_docs[2])



Total number of documents:  5
printing one of the document........
page_content='3. ASUS TUF Gaming F15
- Price: â‚¹79,990
- CPU: Intel i5 11400H
- GPU: NVIDIA RTX 3050 (4GB)
- RAM: 16GB DDR4
- Storage: 512GB SSD
- Good for: Deep learning models, parallel processing
- Comments: Rugged build, best performance for the price

4. Acer Aspire 7
- Price: â‚¹62,990
- CPU: AMD Ryzen 5 5500U
- GPU: NVIDIA GTX 1650
- RAM: 8GB
- Storage: 512GB SSD
- Good for: Intro ML, data science
- Comments: Value for money, limited by RAM/GPU' metadata={'source': 'laptops_info.txt'}


In [10]:
# Step 3: Set up the Google Generative AI embedding model
# Note: Ensure you have a GOOGLE_API_KEY in your .env file.
# You can obtain a key from https://ai.google.dev/gemini-api/docs/api-key

load_dotenv()

google_embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.getenv("GOOGLE_API_KEY")
)

# Example of embedding a query
sample_embedding = google_embedding_model.embed_query("This is a sample query.")

print("--- Sample Embedding ---")
print(sample_embedding[:5])
print(f"Embedding dimension: {len(sample_embedding)}")

example embeddings........
[0.05636945366859436, 0.004828543867915869, -0.07625909894704819, -0.023642510175704956, 0.053293220698833466]


768

In [12]:
# Step 4: Create a Chroma vector store from the document chunks
chroma_vectorstore = Chroma.from_documents(documents=split_docs, embedding=google_embedding_model)

In [15]:
# Step 5: Create a retriever from the vector store
document_retriever = chroma_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})

In [16]:

# Step 6: Perform a similarity search to retrieve relevant documents
search_query = "Which laptop is the most suitable for machine learning tasks under ₹80,000?"]
retrieved_documents = document_retriever.invoke(search_query)
print(f"Number of retrieved documents: {len(retrieved_documents)}")


2


In [17]:
# Step 7: Print retrieved chunks
print("\nTop Retrieved Chunks:")
for i, doc in enumerate(retrieved_docs):
    print(f"\nChunk {i+1}:\n{doc.page_content}")


Top Retrieved Chunks:

Chunk 1:
# Laptop Buying Tips for AI/ML (2024)
- Prefer 16GB RAM or more
- Look for NVIDIA GPUs like GTX 1650, RTX 3050 or better
- Avoid integrated graphics for training models
- SSD preferred for fast data access
- Ryzen 5, i5 H-series or better recommended

Chunk 2:
# Laptop Buying Tips for AI/ML (2024)
- Prefer 16GB RAM or more
- Look for NVIDIA GPUs like GTX 1650, RTX 3050 or better
- Avoid integrated graphics for training models
- SSD preferred for fast data access
- Ryzen 5, i5 H-series or better recommended
