# Table selection

In [1]:
%load_ext dotenv
%dotenv

from src.utils.table_selection.table_selector import *
from src.utils.table_selection.table_details import *
from src.utils.api_data_request.api_generator import *
from src.utils.data_analysis.data_analysis import *

In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
inference_server_url = "http://34.28.195.178:8000/v1"

chat = ChatOpenAI(
    model="TheBloke/toxicqa-Llama2-7B-AWQ",
    openai_api_key="EMPTY",
    openai_api_base= inference_server_url,
    max_tokens=5,
    temperature=0,
)
messages = [
    SystemMessage(
        content="You are a helpful assistant that translates English to Italian."
    ),
    HumanMessage(
        content="Translate the following sentence from English to Italian: I love programming."
    ),
]
chat.invoke(messages)

AIMessage(content='\n\n[INST]', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 47, 'total_tokens': 52}, 'model_name': 'TheBloke/toxicqa-Llama2-7B-AWQ', 'system_fingerprint': None, 'finish_reason': 'length', 'logprobs': None}, id='run-de7f427b-bda7-4bf0-a9c7-cacac1109449-0')

In [2]:
TABLES_PATH = getenv('TABLES_PATH')
manager = TableManager(TABLES_PATH)

In [3]:
query = 'what was the CPI of electronics in March of 2014?'

#### Getting table from LLM (OpenAI's API)

In [4]:
table_llm = get_relevant_tables_from_lm(query, manager)



ChatGPT response: The 'Consumer Price Index - CPI' table is the most relevant because it contains the Consumer Price Index (CPI) for various product categories, including electronics, for different periods, including March 2014.

```
{
"table": "Consumer Price Index - CPI"
}
```

Tables: {
"table": "Consumer Price Index - CPI"
}


#### Getting table from DB (with embeddings)

In [5]:
table_db = get_relevant_tables_from_database(query)

#### Getting table with hybrid approach 
- Gets top k matches from DB, then asks the LLM to choose the most relevant

In [6]:
table_db_llm = request_tables_to_lm_from_db(query, manager)


ChatGPT response: The 'Consumer Price Index - CPI' table is the most relevant for this query as it contains the Consumer Price Index (CPI) for various product categories, including electronics, for different periods including March 2014.

```
{
  "table": "Consumer Price Index - CPI"
}
```

Tables: {
  "table": "Consumer Price Index - CPI"
}


# API parameter generator

In [7]:
v, m, c = get_api_params_from_lm(query, table_db_llm, model = 'gpt-4-1106-preview')


ChatGPT response: The chosen variables are "Year", "Month and Year", and "Level 1.1" because we need to specify the time period (March 2014) and the product category (Electronics). The measure is "Consumer Price Index" as we want to know the CPI. The filter is applied to the variables to specify the exact time and product category.

```
{
    "variables": ["Year", "Month and Year", "Level 1.1"],
    "measures": ["Consumer Price Index"],
    "filters": ["Year = 2014", "Month and Year = 201403", "Level 1.1 = Electronics"]
}
```

Parameters: {
    "variables": ["Year", "Month and Year", "Level 1.1"],
    "measures": ["Consumer Price Index"],
    "filters": ["Year = 2014", "Month and Year = 201403", "Level 1.1 = Electronics"]
}


In [8]:
print("Variables:\n\n", v, "\n", "\nMeasures:\n\n", m, "\n", "\nCuts:\n\n", c)

Variables:

 ['Year', 'Month and Year', 'Level 1.1'] 
 
Measures:

 ['Consumer Price Index'] 
 
Cuts:

 ['Year = 2014', 'Month and Year = 201403', 'Level 1.1 = Electronics']


In [9]:
api_url = api_build(table = table_db_llm, table_manager = manager, drilldowns = v, measures = m, cuts = c)

In [10]:
api_url

'https://api-dev.datausa.io/tesseract/data.jsonrecords?cube=Consumer Price Index - CPI&drilldowns=Year,Month and Year,Level 4.4&measures=Consumer Price Index&Year=2014&Month and Year=201403&Level 4.4=403051619'

# API Request

In [11]:
data, df, _ = api_request(api_url)

In [12]:
data

[{'Consumer Price Index': -3.299999952316284,
  'Level 4.4 ID': 403051619,
  'Level 4.4': 'Appliances',
  'Year': 2014,
  'Month and Year ID': 201403,
  'Month and Year': 'March, 2014'}]

In [13]:
df

Unnamed: 0,Consumer Price Index,Level 4.4 ID,Level 4.4,Year,Month and Year ID,Month and Year
0,-3.3,403051619,Appliances,2014,201403,"March, 2014"


# Data Analysis

In [14]:
agent_answer(df, query)



[1m> Entering new  chain...[0m
[32;1m[1;3mThought: To answer the user's question, I need to check if there is data related to electronics for March 2014 in the dataframe. Since the dataframe might use different terminology, I should look for a category that could represent electronics, such as "Appliances" or a similar term. I will need to filter the dataframe based on the "Month and Year" and the "Level 4.4" or a similar column that might contain the category "electronics".

Action: I will first check the unique values in the "Level 4.4" column to see if there is a category that could represent electronics.
Action Input: df['Level 4.4'].unique()[0m
Observation: I will first check the unique values in the "Level 4.4" column to see if there is a category that could represent electronics. is not a valid tool, try another one.
Thought:[32;1m[1;3mI need to use the correct tool to execute the Python command to check the unique values in the "Level 4.4" column.

Action: python_repl_

'The Consumer Price Index (CPI) for "Appliances" in March of 2014 was -3.3. Please note that the dataset does not specifically categorize "electronics," so "Appliances" is the closest available category that may encompass electronics.'

In [15]:
response = agent_answer(df, query)
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3mThought: To answer the user's question, I need to check if there is data related to electronics for March 2014 in the dataframe. Since the dataframe might use different terminology, I should look for any entries that could correspond to electronics, such as "Appliances" or similar terms, and check for the specific month and year.

Action: I will first check the unique values in the 'Level 4.4' column to see if there is a category that could correspond to electronics.

Action Input: df['Level 4.4'].unique()[0m
Observation: I will first check the unique values in the 'Level 4.4' column to see if there is a category that could correspond to electronics. is not a valid tool, try another one.
Thought:[32;1m[1;3mI need to use the correct tool to execute the Python command to check the unique values in the 'Level 4.4' column.

Action: python_repl_ast
Action Input: df['Level 4.4'].unique()[0m
Observation: [36;1m[1;3m['Appliances'][0m
Thoug

In [16]:
response

'I cannot provide the exact Consumer Price Index (CPI) for electronics in March of 2014 with the available data, as the dataframe only contains information for the category "Appliances." The CPI for appliances in March 2014 was -3.3. If "Appliances" includes electronics in this context, this would be the closest available information. If you need the CPI specifically for electronics, additional data would be required.'