## Read Web URLs

In [4]:
from typing import Optional
import requests

def fetch_url_content(url: str) -> Optional[str]:
    """
    Fetches content from a URL by performing an HTTP GET request.

    Parameters:
        url (str): The endpoint or URL to fetch content from.

    Returns:
        Optional[str]: The content retrieved from the URL as a string,
                       or None if the request fails.
    """
    prefix_url: str = "https://r.jina.ai/"
    full_url: str = prefix_url + url  # Concatenate the prefix URL with the provided URL

    try:
        response = requests.get(full_url)  # Perform a GET request
        if response.status_code == 200:
            return response.content.decode('utf-8')  # Return the content of the response as a string
        else:
            print(f"Error: HTTP GET request failed with status code {response.status_code}")
            return None
    except requests.RequestException as e:
        print(f"Error: Failed to fetch URL {full_url}. Exception: {e}")
        return None

In [26]:
# Replace this with the specific endpoint or URL you want to fetch
url: str = "https://www.analyticsvidhya.com/blog/2021/09/complete-guide-to-feature-engineering-zero-to-hero/"
content: Optional[str] = fetch_url_content(url)


if content is not None:
    print("Content retrieved successfully:")
else:
    print("Failed to retrieve content from the specified URL.")

Content retrieved successfully:


## Split the texts

In [27]:
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from litellm import completion

In [28]:
token_size = 150
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            model_name="gpt-4",
            chunk_size=token_size,
            chunk_overlap=0,
        )

In [29]:
import re

def clean_text(text):
    # Remove all newline characters
    text = text.replace('\n', ' ').replace('\r', ' ')

    # Replace multiple spaces with a single space
    text = re.sub(r'\s+', ' ', text)

    # Strip leading and trailing spaces
    text = text.strip()

    return text

In [30]:
text_chunks = text_splitter.split_text(content)
print(f"Total chunks: {len(text_chunks)}")

Total chunks: 48


In [31]:
text_chunks[0]

'Title: Complete Guide to Feature Engineering: Zero to Hero\n\nURL Source: https://www.analyticsvidhya.com/blog/2021/09/complete-guide-to-feature-engineering-zero-to-hero/\n\nPublished Time: 2021-09-21T12:31:05+00:00\n\nMarkdown Content:\n****This article was published as a part of the\xa0[Data Science Blogathon](https://datahack.analyticsvidhya.com/contest/data-science-blogathon-12/)****\n\n### Introduction'

In [32]:
import torch
from sentence_transformers import SentenceTransformer

def get_embeddings(texts, model_name="BAAI/bge-large-en-v1.5"):
    """
    Generate embeddings for a list of texts using Sentence Transformers.

    Args:
        texts (list): List of text strings to embed
        model_name (str): Name of the sentence-transformers model to use
                         Default is 'all-MiniLM-L6-v2' which is a good balance of speed and quality

    Returns:
        list: List of embeddings as numpy arrays
    """
    # Load the model (first time will download it)
    model = SentenceTransformer(model_name)

    # Generate embeddings
    embeddings = model.encode(texts, convert_to_tensor=True)

    # Convert to numpy arrays and return as list
    return embeddings.cpu().numpy().tolist()

In [33]:
# Using your text_chunks variable:
embeddings = get_embeddings(text_chunks)
assert len(embeddings) == len(text_chunks)

In [34]:
print(f"Embedding dimension: {len(embeddings[0])}")

Embedding dimension: 1024


In [35]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

# Initialize Qdrant client with in-memory storage (best for Colab)
client = QdrantClient(":memory:")

In [36]:
collection_name = "agent_rag_index"
VECTOR_SIZE = 1024

client.delete_collection(collection_name)

client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE),

)

True

In [37]:
ids = []
payload = []

for id, text in enumerate(text_chunks):
    ids.append(id)
    payload.append({"ul": url, "content": text})

payload[0]

{'ul': 'https://www.analyticsvidhya.com/blog/2021/09/complete-guide-to-feature-engineering-zero-to-hero/',
 'content': 'Title: Complete Guide to Feature Engineering: Zero to Hero\n\nURL Source: https://www.analyticsvidhya.com/blog/2021/09/complete-guide-to-feature-engineering-zero-to-hero/\n\nPublished Time: 2021-09-21T12:31:05+00:00\n\nMarkdown Content:\n****This article was published as a part of the\xa0[Data Science Blogathon](https://datahack.analyticsvidhya.com/contest/data-science-blogathon-12/)****\n\n### Introduction'}

In [38]:
client.upload_collection(
    collection_name=collection_name,
    vectors=embeddings,
    payload=payload,
    ids=ids,
    batch_size=256,
)

In [39]:
client.count(collection_name)

CountResult(count=48)

In [40]:
def search(text: str, top_k: int):
    """
    Search for similar documents using sentence-transformer embeddings

    Args:
        text (str): Query text to search for
        top_k (int): Number of results to return

    Returns:
        search_result: Qdrant search results
    """
    # Get query embedding using the same model as documents
    query_embedding = get_embeddings([text])[0]  # Note: get_embeddings expects a list

    search_result = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        query_filter=None,
        limit=top_k
    )
    return search_result

def format_docs(docs):
    """
    Format search results into a single string

    Args:
        docs: List of search results from Qdrant

    Returns:
        str: Formatted string of document contents
    """
    return "\n\n".join(doc.payload["content"] for doc in docs)

# Prompts

1. First prompt will check to see if the *retrieved context* can answer the user question.
2. Second prompt will get the context and question and generates the response.

## First Prompt

In [41]:
decision_system_prompt = """Your job is decide if a given question can be answered with a given context.
If context can answer the question return 1.
If not return 0.

Do not return anything else except for 0 or 1.

Context: {context}
"""

user_prompt = """
Question: {question}

Answer:"""

## Second Prompt

In [42]:
system_prompt = """You are an expert for answering questions. Answer the question according only to the given context.
If question cannot be answered using the context, simply say I don't know. Do not make stuff up.
Your answer MUST be informative, concise, and action driven. Your response must be in Markdown.

Context: {context}
"""

user_prompt = """
Question: {question}

Answer:"""

## Ask questions

In [47]:
question = "what is openai 4o"
results = search(question, top_k=3)
context = format_docs(results)

In [48]:
from sklearn.metrics.pairwise import cosine_similarity

def relevance_decision(context, question, threshold=0.75):
    """
    Determine if the question is relevant to the context based on cosine similarity.

    Args:
        context (str): The context text (could be aggregated from multiple sources).
        question (str): The question text.
        threshold (float): The similarity threshold to decide relevance.

    Returns:
        int: 1 if relevant, 0 otherwise.
    """
    # Preprocessing for consistency
    context = context.strip().lower()
    question = question.strip().lower()

    # Generate embeddings
    context_embedding = get_embeddings([context])[0]
    question_embedding = get_embeddings([question])[0]

    # Compute cosine similarity
    similarity = cosine_similarity([context_embedding], [question_embedding])[0][0]

    # Debugging output
    print(f"Question: {question}")
    print(f"Context: {context[:100]}...")  # Truncate for display
    print(f"Similarity Score: {similarity}")

    # Return 1 if similarity exceeds threshold, else 0
    return 1 if similarity >= threshold else 0

# Example Usage
has_answer = relevance_decision(context, question, threshold=0.7)
print(f"Has Answer: {has_answer}")


Question: what is openai 4o
Context: title: complete guide to feature engineering: zero to hero

url source: https://www.analyticsvidhya....
Similarity Score: 0.4848825694040557
Has Answer: 0


# Check to see if retrieved context can answer the question or not

In [50]:
from IPython.display import Markdown, display  # Only import from here
import nltk
# nltk.download('punkt')      download while running for the first time
# nltk.download('punkt_tab')  download while running for the first time
from transformers import pipeline
from nltk.tokenize import sent_tokenize
from duckduckgo_search import DDGS
import warnings

# Suppress warnings globally
warnings.filterwarnings("ignore")

# Initialize the text generation pipeline
text_generator = pipeline(
    "text-generation",
    model="gpt2",
    tokenizer="gpt2",
    truncation=True,
    pad_token_id=50256
)

# Function to clean and limit the response
def clean_and_limit_response(response_text, max_words=220):
    """
    Clean and limit the response text to a specified number of words, ensuring it ends on a complete sentence.
    Args:
        response_text (str): The raw text generated by the model.
        max_words (int): Maximum number of words to include in the final response.

    Returns:
        str: Cleaned and trimmed response text.
    """
    # Remove unwanted parts
    cleaned_text = response_text.split("Context:")[-1].strip()
    
    # Tokenize into sentences
    sentences = sent_tokenize(cleaned_text)
    final_text = ""
    current_word_count = 0
    
    # Add sentences until the word limit is reached
    for sentence in sentences:
        word_count = len(sentence.split())
        if current_word_count + word_count > max_words:
            break
        final_text += sentence + " "
        current_word_count += word_count

    return final_text.strip()

# Function to generate a response
def generate_response(context, question, max_words=220):
    """
    Generate a response based on context and question using a local text generation model.
    Args:
        context (str): Context text to guide the response.
        question (str): User's question.
        max_words (int): Maximum number of words for the response.

    Returns:
        str: Generated response in Markdown format, cleaned and limited.
    """
    prompt = f"{system_prompt.format(context=context)}\n\n{user_prompt.format(question=question)}"
    approx_tokens = int(max_words * 1.33)  # Estimate tokens based on word count
    generated = text_generator(prompt, max_new_tokens=approx_tokens, num_return_sequences=1)
    response = clean_and_limit_response(generated[0]['generated_text'], max_words)
    return response

# Function to format DuckDuckGo search results
def format_search_results(results):
    """
    Format search results from DuckDuckGo for context.
    Args:
        results (list): List of search results.

    Returns:
        str: Formatted context string.
    """
    return "\n\n".join(result["body"] for result in results)

# Main logic

print(f"Question: {question}")
if has_answer == 1:  # Ensure the integer type is used for the relevance check
    print("Context can answer the question")
    answer = generate_response(context, question, max_words=220)  # Set word limit
    print(f"Answer: {type(answer)}")

    display(Markdown(answer))  # Display Markdown formatted answer
else:
    print("Context is NOT relevant. Searching online...")
    results = list(DDGS().text(question, max_results=5))  # Retrieve DuckDuckGo results
    context = format_search_results(results)
    print("Found online sources. Generating the response...")
    answer = generate_response(context, question, max_words=220)  # Set word limit
    print(f"Answer: {type(answer)}")

    display(Markdown(answer))  # Display Markdown formatted answer

Device set to use cpu


Question: what is openai 4o
Context is NOT relevant. Searching online...
Found online sources. Generating the response...
Answer: <class 'str'>


Prior to GPT-4o, you could use Voice Mode ⁠ to talk to ChatGPT with latencies of 2.8 seconds (GPT-3.5) and 5.4 seconds (GPT-4) on average. To achieve this, Voice Mode is a pipeline of three separate models: one simple model transcribes audio to text, GPT-3.5 or GPT-4 takes in text and outputs text, and a third simple model converts that text back to audio. GPT-4o ⁠ is our newest flagship model that provides GPT-4-level intelligence but is much faster and improves on its capabilities across text, voice, and vision. Today, GPT-4o is much better than any existing model at understanding and discussing the images you share. For example, you can now take a picture of a menu in a different language and talk to GPT-4o to translate it, learn about the ... The OpenAI o1 model represents a significant leap in AI's ability to handle complex reasoning, outperforming GPT-4o in tasks that require deep thought and multi-step problem-solving. However ...

GPT-4o mini is OpenAI's fastest model and offers applications at a lower cost. GPT-4o mini is smarter than GPT-3.5 Turbo and is 60% cheaper. The training data goes through October 2023. GPT-4o mini is available in text and vision models for developers through Assistants API, Chat Completions API and Batch API.

In [25]:
print(results)

[ScoredPoint(id=36, version=0, score=0.7602269910524094, payload={'ul': 'https://em360tech.com/tech-article/what-is-llama-3', 'content': "In this article, we will explore **Meta Llama 3, how to use it** and the differences between Llama 2 and Llama 3.\n\nWhat is Meta Llama 3?\n---------------------\n\nMeta Llama 3 is a [large language model (LLM)](https://em360tech.com/tech-article/large-language-model) developed by Meta that's trained on a massive amount of text data.\n\nThis allows it to understand and respond to language in a comprehensive way, making it suitable for tasks like writing different kinds of creative content, translating languages, and answering your questions in an informative way."}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=38, version=0, score=0.7496102422717222, payload={'ul': 'https://em360tech.com/tech-article/what-is-llama-3', 'content': "Compared to previous versions like Llama 2, Llama 3 boasts better reasoning abilities, code generation, 