In [30]:
import pandas as pd
import os

In [31]:
from google import genai
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key=api_key)

In [32]:
df = pd.read_csv("data.csv")

In [33]:
import minsearch

In [34]:
df.columns

Index(['country_long', 'name', 'primary_fuel', 'capacity_mw',
       'commissioning_year', 'passage'],
      dtype='object')

In [36]:
df

Unnamed: 0,country_long,name,primary_fuel,capacity_mw,commissioning_year,passage
0,United Kingdom,14 Tullywiggan Road,Biomass,1.000,,The 14 Tullywiggan Road power plant in United ...
1,United Kingdom,289 Finvoy Road,Solar,5.900,,The 289 Finvoy Road power plant in United King...
2,United Kingdom,30 Acres,Solar,4.965,,The 30 Acres power plant in United Kingdom. It...
3,United Kingdom,A C Shropshire (Farm AD),Biomass,2.500,,The A C Shropshire (Farm AD) power plant in Un...
4,United Kingdom,A'Chruach,Wind,42.600,,The A'Chruach power plant in United Kingdom. I...
...,...,...,...,...,...,...
12579,United States of America,Zion Landfill Gas to Energy Facility,Waste,7.000,2003,The Zion Landfill Gas to Energy Facility power...
12580,United States of America,Zorn,Gas,18.000,1969,The Zorn power plant in United States of Ameri...
12581,United States of America,Zotos International WPGF,Wind,3.400,2012,The Zotos International WPGF power plant in Un...
12582,United States of America,Zumbro Community Solar Garden,Solar,1.000,2018,The Zumbro Community Solar Garden power plant ...


In [37]:
documents = df.to_dict(orient='records')

In [38]:
index = minsearch.Index(
    text_fields=['country_long', 'name', 'passage'],
    keyword_fields=[]
)

In [39]:
index.fit(documents)

<minsearch.minsearch.Index at 0x7f18edcf8100>

In [40]:
query = "Drax Power Station capacity in MW"

In [41]:
def search(query):
    
    results = index.search(
        query=query,
        filter_dict={},
        num_results=1
    )
    return results

In [42]:
documents[0]

{'country_long': 'United Kingdom',
 'name': '14 Tullywiggan Road',
 'primary_fuel': 'Biomass',
 'capacity_mw': 1.0,
 'commissioning_year': None,
 'passage': 'The 14 Tullywiggan Road power plant in United Kingdom. It is a Biomass facility with a capacity of 1 MW. Located at latitude 55 and longitude -7.'}

In [44]:

prompt_template = """
You're an Energy plants assistant. Answer the QUESTION based on the CONTEXT from the GPPD.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()


entry_template = """
country_long: {country_long},
name: {name},
primary_fuel: {primary_fuel},
capacity_mw: {capacity_mw},
commissioning_year: {commissioning_year},
passage: {passage}
""".strip()
    
def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [45]:
search_results = search(query)
prompt = build_prompt(query, search_results)
print(prompt)

You're an Energy plants assistant. Answer the QUESTION based on the CONTEXT from the GPPD.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: Drax Power Station capacity in MW

CONTEXT: 
country_long: United Kingdom,
name: Drax,
primary_fuel: Coal,
capacity_mw: 1980.0,
commissioning_year: None,
passage: The Drax power plant in United Kingdom. It is a Coal facility with a capacity of 1980 MW. Located at latitude 54 and longitude -1.


In [57]:
def llm(prompt):
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt
    )
    return response.text

In [58]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [59]:
answer = rag(query)
print(answer)

Drax Power Station has a capacity of 1980 MW.
