In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['GROQ AI Key'] = os.getenv('GROQ_API_KEY')

In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

def scrape_website(url):
    # Set up headless Chrome
    options = Options()
    options.add_argument("--headless")  # run in background
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")

    driver = webdriver.Chrome(options=options)
    driver.get(url)

    # Wait a bit for JS content to load
    time.sleep(2)  

    # Get the full visible text of the page
    text = driver.find_element("tag name", "body").text

    driver.quit()
    return text

# Example usage
url = "https://en.wikipedia.org/wiki/Pakistan"
web_text = scrape_website(url)
print(web_text[:10])  # first 1000 characters


Jump to co


In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

chunks = text_splitter.split_text(web_text)
print(f"Number of chunks: {len(chunks)}")


Number of chunks: 673


In [7]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [8]:
from langchain_community.vectorstores import FAISS  

# Free local embedding model

vector_store = FAISS.from_texts(chunks, embedding=embeddings)

In [9]:
from langchain_classic.chains import RetrievalQA
from langchain.chat_models import init_chat_model

# LLM for answering
llm = init_chat_model(model="llama-3.1-8b-instant",
                      model_provider='groq',
                       temperature=0)

# Retrieval-based QA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # "stuff" combines chunks into one context
    retriever=vector_store.as_retriever()
)



In [10]:
query = "Who is the first PM of Pakistan?"
answer = qa_chain.invoke({"query": query})  # returns a dict
print(answer["result"])  # access the answer text

Liaquat Ali Khan was elected the first Prime Minister of Pakistan.
