# Test the components individually

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
import sys

current_path = os.getcwd()  # Current working directory
root_path = os.path.abspath(os.path.join(current_path, os.pardir))
sys.path.append(root_path)

In [3]:
data_path = os.path.join(root_path, "data", "housing.csv")

---

## Summarization node

In [4]:
from src.nodes.summarization_node import summarization_node

In [5]:
state = {"data_path": data_path, "metadata": ""}
summarization_node(state)
summary = state["data_summary"]



In [6]:
print(state["data_summary"])

# House Price Dataset

- **Number of Rows**: 545

## Column: `price`
- **Data Type**: int64
- **Range**: 1750000 to 13300000
- **Null values**: 0
- **Example Values**: [13300000, 12250000, 12250000, 12215000, 11410000]
- **Description**: Price of the house in some currency.


## Column: `area`
- **Data Type**: int64
- **Range**: 1650 to 16200
- **Null values**: 0
- **Example Values**: [7420, 8960, 9960, 7500, 7420]
- **Description**: Area of the house in some unit.


## Column: `bedrooms`
- **Data Type**: int64
- **Range**: 1 to 6
- **Null values**: 0
- **Example Values**: [4, 4, 3, 4, 4]
- **Description**: Number of bedrooms in the house.


## Column: `bathrooms`
- **Data Type**: int64
- **Range**: 1 to 4
- **Null values**: 0
- **Example Values**: [2, 4, 2, 2, 1]
- **Description**: Number of bathrooms in the house.


## Column: `stories`
- **Data Type**: int64
- **Range**: 1 to 4
- **Null values**: 0
- **Example Values**: [3, 4, 2, 2, 2]
- **Description**: Number of stories in the hou

---

## Grading user query relevance to the dataset

In [7]:
from src.nodes.grade_user_query import grade_query

In [8]:
query = "Which is the most expensive housing area?"

state.update({"query": query})
grade_query(state)
print(state["related_to_data"])

yes


In [9]:
query = "Which is the most expensive airport?"

state.update({"query": query})
grade_query(state)
print(state["related_to_data"])

no


---

## Grading if visualization is required or not for the user query

In [10]:
from src.nodes.grade_visualization_requirement import grade_visualization_requirement

In [11]:
query = "Which is the most expensive housing area?"

state = {"data_summary": summary, "query": query}

grade_visualization_requirement(state)
print(state["requires_visualization"])

no


In [12]:
query = "What is the distribution of the housing prices?"

state = {"data_summary": summary, "query": query}

grade_visualization_requirement(state)
print(state["requires_visualization"])

yes


---

## Code Agent

It answers the user query using code (and no visualizations)

In [25]:
from src.nodes.data_query_node import data_query_node

In [26]:
state = {
    "data_summary": summary,
    "query": "Which is the most expensive housing area?",
    "data_path": data_path,
}

In [27]:
data_query_node(state)

Generation:
{'answer': "Based on the average house price, the most expensive area is indicated by 'yes' in the 'prefarea' column.", 'charts': []}


{'generation': {'answer': "Based on the average house price, the most expensive area is indicated by 'yes' in the 'prefarea' column.",
  'charts': []}}

In [28]:
print(state["generation"])

{'answer': "Based on the average house price, the most expensive area is indicated by 'yes' in the 'prefarea' column.", 'charts': []}


---

## Chart Node

In [17]:
from src.nodes.chart_node import chart_node

In [18]:
state = {
    "data_summary": summary,
    "query": "What is the relation price and other features?",
    "data_path": data_path,
}

In [19]:
chart_node(state)



{'generation': {'answer': "Analysis of House Price vs. Features:\n\nBased on the generated visualizations:\n\n**Numerical Features:**\n- **Area:** A strong positive correlation is observed between price and area. As the area increases, the price tends to increase.\n- **Bedrooms:** A positive correlation exists, but it's not as strong as the area. More bedrooms generally mean higher prices, but the relationship is less linear.\n- **Bathrooms:** Similar to bedrooms, a positive correlation is seen, indicating that more bathrooms are associated with higher prices.\n- **Stories:**  A positive correlation is present; houses with more stories tend to be more expensive.\n- **Parking:** A positive correlation is observed; more parking spaces are associated with higher prices.\n\n**Categorical Features:**\nThe box plots show the price distribution for each category.  Significant differences in median prices across categories suggest that these features also influence house prices.  For example, 

In [20]:
print(state["generation"])

{'answer': "Analysis of House Price vs. Features:\n\nBased on the generated visualizations:\n\n**Numerical Features:**\n- **Area:** A strong positive correlation is observed between price and area. As the area increases, the price tends to increase.\n- **Bedrooms:** A positive correlation exists, but it's not as strong as the area. More bedrooms generally mean higher prices, but the relationship is less linear.\n- **Bathrooms:** Similar to bedrooms, a positive correlation is seen, indicating that more bathrooms are associated with higher prices.\n- **Stories:**  A positive correlation is present; houses with more stories tend to be more expensive.\n- **Parking:** A positive correlation is observed; more parking spaces are associated with higher prices.\n\n**Categorical Features:**\nThe box plots show the price distribution for each category.  Significant differences in median prices across categories suggest that these features also influence house prices.  For example, houses on the m

---

## Transform query

In [21]:
from src.nodes.transform_query import transform_query

In [22]:
state = {"query": "Which is the most expensive huse area?"}

In [23]:
transform_query(state)



{'query': 'What is the most expensive housing area?'}

---