In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
from dotenv import load_dotenv

In [3]:
# Load Environment Variables
load_dotenv()

True

In [4]:
# Create an engine to connect to the database

from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.asyncio import (
    AsyncSession,
    create_async_engine,
)
db_type="sqlite"

aengine = create_async_engine(
    url=f"{db_type}+aiosqlite:///demo_databases/tn_covid_cases_11_may.sqlite"
)

async_session = sessionmaker(
    bind=aengine,
    class_=AsyncSession,
    expire_on_commit=False
)

In [5]:
# Parameters
metric_db_id = "test"
llm = "gpt-4o"
guardrails_llm = "gpt-4o"
sys_message = "Government and health officials in Tamil Nadu, India will ask you questions. You need to help them manage COVID cases and the availablity of beds in health facilities."
db_description = "- bed_vacancies_clinics_11_may: Each row identifies a district and the beds earmarked, occupied and available for COVID cases in the district clinics.\
- bed_vacancies_health_centers_and_district_hospitals_11_may: Each row identifies a district and the beds earmarked, occupied and available, with and without oxygen supply, and with and without ICU support, for COVID cases in the disctrict health centers and hospitals.\
- covid_cases_11_may: Each row identifies a district and the number of people who received treatment, were discharged and died due to COVID.\
"
num_common_values = 10
indicator_vars="district_name" # This should be a comma delimited string in multiple vars

# Your question
query = {
    "query_text": "How many beds are there in chennai??",
    "query_metadata": {}
}

In [6]:
# Ask  a question
from askametric.query_processor.query_processor import LLMQueryProcessor

async with async_session() as session:
    qp = LLMQueryProcessor(
        query,
        session,
        metric_db_id,
        db_type,
        llm,
        guardrails_llm,
        sys_message,
        db_description,
        column_description="",
        num_common_values=num_common_values,
        indicator_vars=indicator_vars
    )
    await qp.process_query()
    print(qp.final_answer)

In Chennai, there are a total of 20,334 beds available. This total is derived from two sources: 7,179 beds from clinics and 13,155 beds from health centers and district hospitals. This information is based on the latest data collected on May 11.


In [7]:
# Validate a question
from askametric.validation.validation_processor import QueryEvaluator

validation_question = {
    "correct_language": "English",
    "correct_script": "Latin",
    "question": "How many active COVID cases are there in Chennai?",
    "correct_answer": "There are 37713 active COVID cases in Chennai as of May 11.",
    "correct_best_tables": ["covid_cases_11_may"],
    "correct_best_columns": {"covid_cases_11_may": ["num_persons_under_treatment_on_11_may",
                     "district_name"]},
    "tests_to_run": ["Relevancy", "Accuracy", "Guardrails", "Instructions"],
    "instructions": sys_message
}

response = LLMQueryProcessor(
    {"query_text": validation_question["question"], "query_metadata": {}},
    session,
    metric_db_id,
    db_type,
    llm,
    guardrails_llm,
    sys_message,
    db_description,
    column_description="",
    num_common_values=num_common_values,
    indicator_vars=indicator_vars
)
await response.process_query()

llm_response = {
    "llm_response": response.final_answer,
    "llm_ided_script": response.query_script,
    "llm_ided_language": response.query_language,
    "guardrails_status": response.guardrails.guardrails_status,
    "llm_ided_best_tables": response.best_tables,
    "llm_ided_best_columns": response.best_columns,
    }

In [8]:
query_evaluator = QueryEvaluator(llm="gpt-3.5-turbo")
result = await query_evaluator.get_eval_results(
    validation_question,
    llm_response)

In [9]:
result

{'relevancy_score': 1,
 'relevancy_reason': 'The answer directly addresses the key element of the question by providing the specific number of active COVID cases in Chennai as of 11th May.',
 'accuracy_score': 1,
 'accuracy_reason': "The 'Answer' provided is similar in meaning to the 'Correct Answer' as both convey the same information about the number of active COVID cases in Chennai on May 11.",
 'accuracy_is_correct_language': 1.0,
 'accuracy_is_correct_script': 1.0,
 'accuracy_has_best_tables': 1.0,
 'accuracy_has_best_columns': 1.0,
 'instructions_score': 1,
 'instructions_reason': 'The answer provides the specific number of active COVID cases in Chennai as of the given date, which aligns with the instructions to help manage COVID cases and availability of beds in health facilities.',
 'guardrails_score': 0.0,
 'guardrails_reason': 'All guardrails passed when they should not have'}