Context Re-Ranking
Scrape the Techify Website and create a Vector Store for it. Then create an agent in AGNO that can be used as a Q&A bot for this website data.

Web Scraper for Techify Website

In [25]:
import os
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

from groq import Groq

load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
faiss_index = None

def scrape_website(base_url):
    response = requests.get(base_url)
    soup = BeautifulSoup(response.text, "lxml")
    paragraphs = soup.find_all("p")
    content = "\n".join(p.get_text() for p in paragraphs)
    return content

Splitting and Embedding the Content into a Vector Store

In [26]:
def split_texts(text,chunk_size=500,chunk_overlap=50):
    splitters = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return splitters.split_text(text)

def embed_and_store(text_chunks):
    global faiss_index
    faiss_index = FAISS.from_texts(text_chunks, embedding_model)
    print(f"Stored {len(text_chunks)} chunks into FAISS index")

In [27]:
def retrieve_context(query, k=4):
    results = faiss_index.similarity_search(query, k=k)
    return "\n".join(doc.page_content for doc in results)

In [28]:
def query_groq_llama3(question, context):
    response = client.chat.completions.create(
        model="llama3-70b-8192",
        temperature=0.3,
        messages=[
            {"role": "system", "content": "You are a helpful assistant answering questions based on provided context."},
            {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
        ]
    )
    return response.choices[0].message.content


In [29]:
class TechifyAgent:
    def load_data(self, url: str):
        content = scrape_website(url)
        chunks = split_texts(content)
        embed_and_store(chunks)
        return f"Loaded and embedded data from {url}"

    def ask(self, question: str):
        print(f"Answering: {question}")
        context = retrieve_context(question)
        return query_groq_llama3(question, context)

In [30]:
if __name__ == "__main__":
    agent = TechifyAgent()
    techify_url = "https://techifysolutions.com/"

    print("Initializing Techify Agent...")
    print(agent.load_data(techify_url))

    while True:
        q = input("\nAsk a question about Techify (or type 'exit'): ")
        if q.lower() == "exit":
            break
        ans = agent.ask(q)
        print("\nAnswer:")
        print(ans)

Initializing Techify Agent...
Stored 18 chunks into FAISS index
Loaded and embedded data from https://techifysolutions.com/
Answering: What do they offer?

Answer:
Based on the provided context, Techify offers customized digital solutions, including:

1. Building special digital solutions to grow, manage, and channelize businesses.
2. Developing tailor-made solutions that meet specific requirements.
3. Providing customized and scalable financial solutions for secure and faster processes.

They also provide services related to Cloud, LaunchPad, Bizio, Walkins CRM, and AI CardVault.
