# Aixplain Agent RAG System

In [1]:
from aixplain_api import api_key
import os

# Set the API key as an environment variable
os.environ["TEAM_API_KEY"] = api_key

## get the data from kaggle

In [128]:
# Install dependencies as needed:
# pip install kagglehub[pandas-datasets]
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "world-data-2023.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "nelgiriyewithana/countries-of-the-world-2023",
  file_path,
)

df.head()

  df = kagglehub.load_dataset(


Unnamed: 0,Country,Density\n(P/Km2),Abbreviation,Agricultural Land( %),Land Area(Km2),Armed Forces size,Birth Rate,Calling Code,Capital/Major City,Co2-Emissions,...,Out of pocket health expenditure,Physicians per thousand,Population,Population: Labor force participation (%),Tax revenue (%),Total tax rate,Unemployment rate,Urban_population,Latitude,Longitude
0,Afghanistan,60,AF,58.10%,652230,323000.0,32.49,93.0,Kabul,8672,...,78.40%,0.28,38041754,48.90%,9.30%,71.40%,11.12%,9797273,33.93911,67.709953
1,Albania,105,AL,43.10%,28748,9000.0,11.78,355.0,Tirana,4536,...,56.90%,1.2,2854191,55.70%,18.60%,36.60%,12.33%,1747593,41.153332,20.168331
2,Algeria,18,DZ,17.40%,2381741,317000.0,24.28,213.0,Algiers,150006,...,28.10%,1.72,43053054,41.20%,37.20%,66.10%,11.70%,31510100,28.033886,1.659626
3,Andorra,164,AD,40.00%,468,,7.2,376.0,Andorra la Vella,469,...,36.40%,3.33,77142,,,,,67873,42.506285,1.521801
4,Angola,26,AO,47.50%,1246700,117000.0,40.73,244.0,Luanda,34693,...,33.40%,0.21,31825295,77.50%,9.20%,49.10%,6.89%,21061025,-11.202692,17.873887


## Indexing for csv file

In [None]:
from aixplain.factories import IndexFactory
from aixplain.modules.model.record import Record

# Delete existing index if it exists
index_list = IndexFactory.list(query="country-info-index")
for index_1 in index_list["results"]:
  index_1.delete()

# Create a new index
index = IndexFactory.create(
        "country-info-index",
        "Index for country information. "
    )

# Convert DataFrame to records and upsert into the index
records = []
for i, row in df.iterrows():
    records.append(Record(value=row.to_json()))

# Upsert records into the index
index.upsert(records)


## Indexing for web scraping

In [None]:
from aixplain.factories import IndexFactory, ModelFactory
from aixplain.modules.model.record import Record

# Delete existing index if it exists
records = []
index_list = IndexFactory.list(query="Regulatory and Guidance Information for Air")
for index_1 in index_list["results"]:
  index_1.delete()

# Create a new index for Regulatory and Guidance Information for Air
Regulatory_index = IndexFactory.create(
        "Regulatory and Guidance Information for Air",
        "Index for Regulatory and Guidance Information by Topic: Air"
    )

utility = ModelFactory.get("66f423426eb563fa213a3531")
website_result = utility.run({"text": "https://www.epa.gov/regulatory-information-topic/regulatory-and-guidance-information-topic-air"})
#website_result = utility.run({"text": "https://dga.gov.sa/en/regulatory_framework"})
records.append(Record(value=website_result.data))


Regulatory_index.upsert(records)


## create SQL/csv tool

In [None]:
from aixplain.factories import AgentFactory
current_dir = os.getcwd()

# Create a SQL tool for company policies
csv_tool = AgentFactory.create_sql_tool(
    name="company_policies DB",
    source= current_dir + "/company_policies.csv",
    source_type="csv",
    enable_commit=False,
    description="Database about company policies."
)

## utility for flight information 

In [None]:
def get_flight_info(airline:str):
    """
    get flight information besed on the airline name.
    """
    import requests

    # Define the API endpoint and parameters
    params = {
        'access_key': ''
    }

    api_result = requests.get('https://api.aviationstack.com/v1/flights', params=params)

    api_response = api_result.json()

    results = []
    for flight in api_response.get('data', []):
        airline_name = flight.get('airline', {}).get('name')
        if airline and (not airline_name or airline.lower() not in airline_name.lower()):
            continue
        result =(u'%s flight %s from %s (%s) to %s (%s) is in the air.' % (
                    flight['airline']['name'],
                    flight['flight']['iata'],
                    flight['departure']['airport'],
                    flight['departure']['iata'],
                    flight['arrival']['airport'],
                    flight['arrival']['iata']))
        results.append(result)
    return results


# Import factories
from aixplain.factories import ModelFactory

utility_list = ModelFactory.list(query="Get flight information")
for utility_1 in utility_list["results"]:
  utility_1.delete()

# Step 1: Create and deploy the utility model
utility = ModelFactory.create_utility_model(
    name="Get flight information",
    description="Get flight information.",
    code=get_flight_info
)

# Deploy the model to make it reusable
utility.deploy()

print(utility.id)

## The Agent 

In [None]:
from aixplain.factories import AgentFactory

# Create an agent that uses the index and tools
agent = AgentFactory.create(
    name="Knowledge Assistant for countries information",
    description="Answers based on the provided index",
    instructions=(
        """
        Your task is to handle natural language queries by searching and retrieving relevant information 
        using the appropriate tool from the list below. Understand the user's intent, select the best tool
        accordingly, apply filters when mentioned, and return concise and accurate results.
        Tool Guide:
        - Use `country-info-index` for country-specific questions.
        - Use `Regulatory and Guidance Information for Air` for air-related regulations.
        - Use `company_policies DB` for company policy queries.
        - Use `Get flight information` for flight details based on airline name or destination.
        """
    ),
    tools=[
        AgentFactory.create_model_tool(model=index.id),
        AgentFactory.create_model_tool(model=Regulatory_index.id),
        csv_tool,
        AgentFactory.create_model_tool(model=utility.id),

    ],#llm_id="669a63646eb56306647e1091" # GPT-4o Mini
)

agent_response = agent.run("What is the birth rate in Saudi Arabia?")

agent_response.data['intermediate_steps']


## result 

In [117]:
# Run the agent with a query
agent_response = agent.run("What is the birth rate in Saudi Arabia?")

agent_response.data['intermediate_steps']

INFO:root:Polling for Model: Start polling for model_process


[{'agent': 'Knowledge Assistant for countries information',
  'input': "{'input': 'What is the birth rate in Saudi Arabia?', 'chat_history': [], 'outputFormat': 'text'}",
  'output': 'The birth rate in Saudi Arabia is 17.8.',
  'tool_steps': [{'tool': 'search-aixplain-country-info-index',
    'input': "{'text': 'birth rate in Saudi Arabia'}",
    'output': '[\'{"Country":"Saudi Arabia","Density\\\\n(P\\\\/Km2)":"16","Abbreviation":"SA","Agricultural Land( %)":"80.80%","Land Area(Km2)":"2,149,690","Armed Forces size":"252,000","Birth Rate":17.8,"Calling Code":966.0,"Capital\\\\/Major City":"Riyadh","Co2-Emissions":"563,449","CPI":"118.4","CPI Change (%)":"-1.20%","Currency-Code":"SAR","Fertility Rate":2.32,"Forested Area (%)":"0.50%","Gasoline Price":"$0.24 ","GDP":"$792,966,838,162 ","Gross primary education enrollment (%)":"99.80%","Gross tertiary education enrollment (%)":"68.00%","Infant mortality":6.0,"Largest city":"Riyadh","Life expectancy":75.0,"Maternal mortality ratio":17.0,"M

In [118]:
# Display the final output from the agent
agent_response.data.output

'The birth rate in Saudi Arabia is 17.8.'

In [119]:
agent_response = agent.run("is there are EPA regulations or standards?")

agent_response.data['intermediate_steps']

INFO:root:Polling for Model: Start polling for model_process


[{'agent': 'Knowledge Assistant for countries information',
  'input': "{'input': 'is there are EPA regulations or standards?', 'chat_history': [], 'outputFormat': 'text'}",
  'output': "The EPA sets limits on certain air pollutants under the Clean Air Act (CAA). This includes regulations on emissions from sources like chemical plants and utilities. Key areas of focus include greenhouse gases, criteria air pollutants, and toxic air pollutants. For more detailed information, you can refer to the EPA's official resources on air quality standards and regulations.",
  'tool_steps': [{'tool': 'search-aixplain-regulatory_and_guidance_information_for_air',
    'input': "{'text': 'EPA regulations or standards'}",
    'output': '[\'Regulatory and Guidance Information by Topic: Air | US EPA\\nSkip to main content\\nAn official website of the United States government\\nHere’s how you know\\nHere’s how you know\\nOfficial websites use .gov\\n A .gov website belongs to an official government organi

In [120]:
agent_response.data.output

"The EPA sets limits on certain air pollutants under the Clean Air Act (CAA). This includes regulations on emissions from sources like chemical plants and utilities. Key areas of focus include greenhouse gases, criteria air pollutants, and toxic air pollutants. For more detailed information, you can refer to the EPA's official resources on air quality standards and regulations."

In [125]:
agent_response = agent.run("what is the latest flight information for Emirates?")

agent_response.data['intermediate_steps']

INFO:root:Polling for Model: Start polling for model_process


[{'agent': 'Knowledge Assistant for countries information',
  'input': "{'input': 'what is the latest flight information for Emirates?', 'chat_history': [], 'outputFormat': 'text'}",
  'output': 'The latest flight information for Emirates includes:\n- Emirates flight EK5453 from Perth International (PER) to Karratha (KTA) is in the air.\n- Emirates flight EK5718 from Perth International (PER) to Canberra (CBR) is in the air.',
  'tool_steps': [{'tool': 'utilities-aixplain-get_flight_information',
    'input': "{'airline': 'Emirates'}",
    'output': "['Emirates flight EK5453 from Perth International (PER) to Karratha (KTA) is in the air.',\n 'Emirates flight EK5718 from Perth International (PER) to Canberra (CBR) is in the air.']"}],
  'thought': None,
  'runTime': 5.344,
  'usedCredits': 0.0003993,
  'apiCalls': 2,
  'task': None}]

In [127]:
print(agent_response.data.output)

The latest flight information for Emirates includes:
- Emirates flight EK5453 from Perth International (PER) to Karratha (KTA) is in the air.
- Emirates flight EK5718 from Perth International (PER) to Canberra (CBR) is in the air.
