In [4]:
!pip install langchain langchain-google-genai faiss-cpu python-dotenv pydantic langchain_community


Collecting langchain_community
  Downloading langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain
  Downloading langchain-0.3.13-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.25 (from langchain)
  Downloading langchain_core-0.3.28-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.7.0-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.23.2-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading

In [8]:
import os
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.retrievers import BaseRetriever
from typing import List
from langchain.docstore.document import Document
from pydantic import BaseModel, Field

from google.colab import userdata
api_key = userdata.get('GOOGLE_API_KEY')

# Load environment variables from a .env file
load_dotenv()
os.environ["GOOGLE_API_KEY"] = api_key

In [9]:
class relevant_score(BaseModel):
    score: float = Field(description="The relevance score of the document to the query", example=8.0)

In [10]:
class FactualRetrievalStrategy:
    def __init__(self, texts):
        self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
        self.documents = text_splitter.create_documents(texts)
        self.db = FAISS.from_documents(self.documents, self.embeddings)
        self.llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-1.5-pro", max_tokens=4000)

    def retrieve(self, query, k=4):
        print("retrieving factual")
        # Use LLM to enhance the query
        enhanced_query_prompt = PromptTemplate(
            input_variables=["query"],
            template="Enhance this factual query for better information retrieval: {query}"
        )
        query_chain = enhanced_query_prompt | self.llm
        enhanced_query = query_chain.invoke(query).content
        print(f'enhanced query: {enhanced_query}')

        # Retrieve documents using the enhanced query
        docs = self.db.similarity_search(enhanced_query, k=k*2)

        # Use LLM to rank the relevance of retrieved documents
        ranking_prompt = PromptTemplate(
            input_variables=["query", "doc"],
            template="On a scale of 1-10, how relevant is this document to the query: '{query}'?\nDocument: {doc}\nRelevance score:"
        )
        ranking_chain = ranking_prompt | self.llm.with_structured_output(relevant_score)

        ranked_docs = []
        print("ranking docs")
        for doc in docs:
            input_data = {"query": enhanced_query, "doc": doc.page_content}
            score = float(ranking_chain.invoke(input_data).score)
            ranked_docs.append((doc, score))

        # Sort by relevance score and return top k
        ranked_docs.sort(key=lambda x: x[1], reverse=True)
        return [doc for doc, _ in ranked_docs[:k]]

In [11]:
class AdaptiveRAG:
    def __init__(self, texts: List[str]):
        self.retriever = FactualRetrievalStrategy(texts)
        self.llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-1.5-pro", max_tokens=4000)

        # Create a custom prompt
        prompt_template = """Use the following pieces of context to answer the question at the end.
        If you don't know the answer, just say that you don't know, don't try to make up an answer.

        {context}

        Question: {question}
        Answer:"""
        prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

        # Create the LLM chain
        self.llm_chain = prompt | self.llm

    def answer(self, query: str) -> str:
        docs = self.retriever.retrieve(query)
        input_data = {"context": "\n".join([doc.page_content for doc in docs]), "question": query}
        return self.llm_chain.invoke(input_data)


In [12]:
texts = [
    '''Earth's Distance from the Sun:

"The average distance between the Earth and the Sun is about 93 million miles (150 million kilometers), which is also known as one astronomical unit (AU). The Earth's distance from the Sun varies throughout the year due to its elliptical orbit, ranging from about 91.4 million miles (147.1 million kilometers) at perihelion (closest point) to about 94.5 million miles (152.1 million kilometers) at aphelion (farthest point) 12."
Earth's Orbit:

"Earth orbits the Sun in an elliptical path with an eccentricity of 0.0167. This means the orbit is nearly circular but slightly elongated. The Earth completes one orbit around the Sun in approximately 365.25 days, which defines a year. The average orbital speed of Earth is about 29.78 kilometers per second (18.5 miles per second) 34."
Astronomical Units and Light Years:

"An astronomical unit (AU) is a unit of distance used to describe the average distance between the Earth and the Sun, approximately 150 million kilometers (93 million miles). In terms of light travel, the Sun is about 8.3 light minutes away from Earth, meaning light from the Sun takes about 8.3 minutes to reach us. This distance is also equivalent to 0.00001581 light-years 12."
Orbital Parameters:

"The Earth's orbit has several key parameters: the semi-major axis is about 149.6 million kilometers (1 AU), the orbital eccentricity is 0.0167, and the inclination of the orbit relative to the plane of the ecliptic is about 0.00005 degrees. The perihelion occurs around January 3rd each year, and the aphelion occurs around July 4th 34."
Ephemerides and Orbital Elements:

"Ephemerides are tables that provide the positions of astronomical objects in the sky at given times. They are essential for tracking the orbits of planets, moons, and other celestial bodies. Orbital elements describe the shape and orientation of an orbit, including parameters such as the semi-major axis, eccentricity, inclination, and the longitude of the ascending node
''']

In [7]:
rag_system = AdaptiveRAG(texts)
factual_result = rag_system.answer("What is the distance between the Earth and the Sun?").content
print(f"Answer: {factual_result}")

retrieving factual
enhanced query: Here are some enhanced versions of the query, depending on the specific information you're looking for:

**For a simple, average distance:**

* What is the average distance between the Earth and the Sun in kilometers/miles?  (Specifying units helps)
* What is an astronomical unit (AU)? (Since 1 AU is defined as the average Earth-Sun distance)

**For more precise information:**

* What is the Earth's perihelion and aphelion distance from the Sun? (This gives the closest and furthest points in Earth's orbit)
* What is the current distance between the Earth and the Sun? (This requires a real-time source, as the distance constantly changes)
* How does the Earth-Sun distance vary throughout the year?

**For even more detail:**

* What factors influence the variation in Earth-Sun distance?
* How is the Earth-Sun distance measured?
* What is the semi-major axis of Earth's orbit?


By being more specific in your query, you'll get more relevant and accurate re



Answer: The average distance between the Earth and the Sun is about 93 million miles (150 million kilometers), or one astronomical unit (AU).  However, this distance varies throughout the year due to Earth's elliptical orbit.  It ranges from about 91.4 million miles (147.1 million kilometers) at perihelion (closest point) to about 94.5 million miles (152.1 million kilometers) at aphelion (farthest point).

