# Baseline Naive Agent

In [221]:
import pandas as pd
import json
import os 
from datetime import datetime

from dotenv import load_dotenv
from openai import OpenAI
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
import numpy as np
from tqdm import tqdm
from pathlib import Path
import time

import duckdb

In [2]:
!pip show openai

Name: openai
Version: 2.14.0
Summary: The official Python library for the openai API
Home-page: https://github.com/openai/openai-python
Author: 
Author-email: OpenAI <support@openai.com>
License: Apache-2.0
Location: /opt/anaconda3/lib/python3.12/site-packages
Requires: anyio, distro, httpx, jiter, pydantic, sniffio, tqdm, typing-extensions
Required-by: 


In [3]:
!pip install --upgrade openai



In [65]:
# Loading the stratified features 
X_train = pd.read_parquet('data_output/prod_features/X_train.parquet')
y_train = pd.read_parquet('data_output/prod_features/y_train.parquet')
X_test = pd.read_parquet('data_output/prod_features/X_test.parquet')
y_test = pd.read_parquet('data_output/prod_features/y_test.parquet')

In [132]:
# Partitioning data into chunks of 100 records (to monitor cost efficiency)
output_dir = 'data_output/prod_features/json_test_files/'
partition_size = 100

X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

df_to_chunk ={'X_test':X_test, 'y_test':y_test}

for name, file in df_to_chunk.items():

    for chunk_id, chunk_df in iter(file.groupby(np.arange(len(file)) // partition_size)):
        file_path = f'{output_dir}{name}_part_{chunk_id}.jsonl'
        chunk_df.to_json(file_path,orient='records', lines=True)

    print(f'Partioning Complete: {chunk_id} Partitions Created')


Partioning Complete: 87 Partitions Created
Partioning Complete: 87 Partitions Created


In [None]:
class NaiveAgent:
    def __init__(self, system_prompt = None, model = 'gpt-5-nano' ):
        load_dotenv()
        
        api_key = os.getenv('OPENAI_API_KEY')
        if not api_key:
            raise ValueError('OpenAI API Key not found in .env')
        
        self.client = OpenAI(api_key=api_key)
        
        self.model = model
        self.system_prompt = system_prompt
        self.user_prompt = None
        self.base_data_path = Path('data_output/prod_features/json_test_files/')
        self.output_dir = Path('data_output/agentic_outputs')
        self.input_data = None
    
    def JSON_read(self, file_name, output_dir='data_output'):
        try:
            data = []
            self.file_path = self.base_data_path / file_name
            with self.file_path.open('r', encoding='utf-8') as f:
                for line in f:
                    data.append(json.loads(line.strip()))
            return data
        except Exception as e:
            print(f'Error Loading JSON: \n{e}')
            return None
    
    def call_agent(self):   
        str_user_prompt = str(self.user_prompt)
        str_sys_prompt = str(self.system_prompt)
        
        agent_response = self.client.responses.create(
            model = self.model,
            input = [
                {
                    "role":"system",
                    "content":str_sys_prompt
                },
                {
                    "role":"user",
                    "content":str_user_prompt
                }
            ],
            text = {
                "format":{
                    "type":"json_schema",
                    "name":"agent_output",
                    "strict":True,
                    "schema": {
                        "type":"object",
                        "properties":{
                                        "price":{"type":"number"}
                                    },
                                    "required":["price"],
                                    "additionalProperties": False
                                }
                            }
                        }
    )
        output = json.loads(agent_response.output_text)
        return output
    
    def batch_predict(self, file_name, output_file='preds.jsonl'):
        data = self.JSON_read(file_name)
        if not data:
            return
        results = []
        
        for i, record in enumerate(tqdm(data, desc="Processing Test Data Predictions")):
            self.user_prompt = f'Predict the price for this item:\n{json.dumps(record,indent=2)}'
            prediction = self.call_agent()
        
            if prediction:
                result = {
                    'predicted_price': prediction['price'],
                    'pred_id':i
                }
                results.append(result)
        
        # Saving results as JSONL
        
        output_dir = Path('data_output/agentic_preds/') 
        start = Path(file_name).stem #before the .jsonl
        end = Path(file_name).suffix
        output_file_name = f'{start}_preds{end}'
        output_path = output_dir / output_file_name
        
        with output_path.open('w', encoding='utf-8') as f:
            for result in results:
                f.write(json.dumps(result) + '\n')
        
        print(f'Saved {len(results)} predictions at {str(output_path)}')

In [None]:
"""
start_time = time.time()

agent_naive = NaiveAgent(
    system_prompt=(
    'Predict the house price in GBP based on what you know and the information provided.'
    )
    )
chunk1_response = agent_naive.batch_predict(file_name='X_test_part_0.jsonl')

end_time = time.time()
duration = end_time - start_time
print(f'Predictions were completed in {duration}')
"""

Processing Test Data Predictions: 100%|██████████| 100/100 [17:38<00:00, 10.58s/it]

Saved 100 predictions at data_output/agentic_preds/X_test_part_0_preds.jsonl
Predictions were completed in 1058.4592680931091





In [234]:
# Evaluating the Naive Model
y_preds_fp = 'data_output/agentic_preds/X_test_part_0_preds.jsonl'
y_actuals_fp = 'data_output/prod_features/json_test_files/y_test_part_0.jsonl'

y_preds_AI = pd.read_json(y_preds_fp, lines=True)
y_preds_AI = y_preds_AI['predicted_price'] # I only need the predicted price column.
y_actuals= pd.read_json(y_actuals_fp, lines=True, chunksize=None)

test_rmse_AI = np.sqrt(mean_squared_error(y_actuals, y_preds_AI))
test_mae_AI = mean_absolute_error(y_actuals,y_preds_AI)
test_mape_AI = round(mean_absolute_percentage_error(y_actuals,y_preds_AI)*100,2)


print(f'Test RMSE: {test_rmse_AI} \nTest MAE: {test_mae_AI} \nTest MAPE: {test_mape_AI}%')


Test RMSE: 311629.8876407717 
Test MAE: 177110.9 
Test MAPE: 26.2%


In [249]:
# Iterating over all available the chunks for the predicitons
number_of_partitions = 1 # Only 1 partition was predicted using openAI
y_actuals_list = []
y_preds_list = []

for n in range(number_of_partitions): 
    y_actuals_file = f'y_test_part_{str(n)}.jsonl'
    y_preds_file = f'X_test_part_{str(n)}_preds.jsonl'
    
    y_actuals_list.append(y_actuals_file)
    y_preds_list.append(y_preds_file)

print(y_preds_list)
print(y_actuals_list)


['X_test_part_0_preds.jsonl']
['y_test_part_0.jsonl']


In [251]:
# Evaluating all available chunks:
rmse_AI = []
mae_AI = []
mape_AI = []

for actual,pred in zip(y_actuals_list, y_preds_list):
    y_preds_fp = Path('data_output/agentic_preds/') / pred
    y_actuals_fp = Path('data_output/prod_features/json_test_files/') / actual

    y_preds_AI = pd.read_json(y_preds_fp, lines=True)
    y_preds_AI = y_preds_AI['predicted_price'] # I only need the predicted price column.
    y_actuals= pd.read_json(y_actuals_fp, lines=True, chunksize=None)

    test_rmse_AI = np.sqrt(mean_squared_error(y_actuals, y_preds_AI))
    rmse_AI.append(test_rmse_AI)
    test_mae_AI = mean_absolute_error(y_actuals,y_preds_AI)
    mae_AI.append(test_mae_AI)
    test_mape_AI = round(mean_absolute_percentage_error(y_actuals,y_preds_AI)*100,2) 
    mape_AI.append(test_mape_AI)

print(f'Test RMSE: {test_rmse_AI} \nTest MAE: {test_mae_AI} \nTest MAPE: {test_mape_AI}%')


Test RMSE: 311629.8876407717 
Test MAE: 177110.9 
Test MAPE: 26.2%


## 2. Multi-Agent Models

In [141]:
X_train

results = duckdb.sql('SELECT * FROM X_train LIMIT 10').df()

results.head()

Unnamed: 0,1HE_district,1HE_CURRENT_ENERGY_RATING,1HE_POTENTIAL_ENERGY_RATING,1HE_PROPERTY_TYPE,1HE_BUILT_FORM,1HE_ENERGY_TARIFF,1HE_MAINS_GAS_FLAG,1HE_GLAZED_AREA,1HE_HOT_WATER_ENERGY_EFF,1HE_HOT_WATER_ENV_EFF,...,CORE_HEATING_COST_POTENTIAL,CORE_HOT_WATER_COST_CURRENT,CORE_HOT_WATER_COST_POTENTIAL,CORE_TOTAL_FLOOR_AREA,CORE_MULTI_GLAZE_PROPORTION,CORE_EXTENSION_COUNT,CORE_NUMBER_HABITABLE_ROOMS,CORE_NUMBER_HEATED_ROOMS,CORE_LOW_ENERGY_LIGHTING,CORE_NUMBER_OPEN_FIREPLACES
0,BUCKINGHAMSHIRE,D,C,House,End-Terrace,Unknown,Not Available,Normal,Good,Good,...,327.0,118.0,103.0,88.24,100.0,0.0,5.0,5.0,0.0,0.0
1,BUCKINGHAMSHIRE,C,C,House,Mid-Terrace,Single,Y,Normal,Good,Good,...,282.0,85.0,74.0,48.794,100.0,0.0,3.0,3.0,0.0,0.0
2,SLOUGH,E,C,House,Semi-Detached,Single,Y,Normal,Good,Good,...,511.0,85.0,54.0,73.0,13.0,2.0,5.0,5.0,83.0,1.0
3,BUCKINGHAMSHIRE,D,D,House,Mid-Terrace,Single,Y,Normal,Good,Good,...,589.0,93.0,82.0,72.7,0.0,0.0,4.0,4.0,78.0,1.0
4,WINDSOR AND MAIDENHEAD,F,C,House,Mid-Terrace,Single,Y,Normal,Average,Average,...,648.0,102.0,81.0,84.0,5.0,2.0,4.0,2.0,0.0,1.0


In [213]:
agent_list = {
    "AnalystAgent":
        """
        Research average prices for houses given the main features:
        1HE_district
        1HE_PROPERTY TYPE 
        1HE_BUILT_FORM 
        CORE_TOTAL_FLOOR_AREA 
        CORE_EXTENSION_COUNT 
        CORE_NUMBER_OF_HABITABLE_ROOMS
        
        Consider the other features if necessary and provide a short summary for the Prediction agent to predict the house price for this particular record. Limit your summary to 100 words! 
        """,
    "PredictionAgent":
        """
        Using data obtained from the Analyst Agent, make a prediction for the house price in GBP for the JSON input. 
        Focus on and consider the following core features from the JSON input to make your prediction but you can use the other columns should you deem it necessary:
        1HE_district
        1HE_PROPERTY TYPE 
        1HE_BUILT_FORM 
        CORE_TOTAL_FLOOR_AREA 
        CORE_EXTENSION_COUNT 
        CORE_NUMBER_OF_HABITABLE_ROOMS
        """
}

In [214]:
test_data = str({"1HE_district":"SLOUGH","1HE_CURRENT_ENERGY_RATING":"D","1HE_POTENTIAL_ENERGY_RATING":"B","1HE_PROPERTY_TYPE":"House","1HE_BUILT_FORM":"Semi-Detached","1HE_ENERGY_TARIFF":"Single","1HE_MAINS_GAS_FLAG":"Y","1HE_GLAZED_AREA":"Normal","1HE_HOT_WATER_ENERGY_EFF":"Good","1HE_HOT_WATER_ENV_EFF":"Good","1HE_WINDOWS_ENERGY_EFF":"Average","1HE_WINDOWS_ENV_EFF":"Average","1HE_WALLS_ENERGY_EFF":"Average","1HE_WALLS_ENV_EFF":"Average","1HE_ROOF_ENERGY_EFF":"Poor","1HE_MAINHEAT_ENERGY_EFF":"Good","1HE_MAINHEATC_ENERGY_EFF":"Good","1HE_LIGHTING_ENERGY_EFF":"Very Good","1HE_MECHANICAL_VENTILATION":"natural","1HE_TENURE":"Owner Occupied","CORE_CURRENT_ENERGY_EFFICIENCY":67.0,"CORE_POTENTIAL_ENERGY_EFFICIENCY":83.0,"CORE_ENVIRONMENT_IMPACT_CURRENT":63.0,"CORE_ENVIRONMENT_IMPACT_POTENTIAL":80.0,"CORE_ENERGY_CONSUMPTION_CURRENT":219.0,"CORE_ENERGY_CONSUMPTION_POTENTIAL":105.0,"CORE_CO2_EMISSIONS_CURRENT":3.2,"CORE_CO2_EMISS_CURR_PER_FLOOR_AREA":39.0,"CORE_CO2_EMISSIONS_POTENTIAL":1.6,"CORE_LIGHTING_COST_CURRENT":72.0,"CORE_LIGHTING_COST_POTENTIAL":72.0,"CORE_HEATING_COST_CURRENT":540.0,"CORE_HEATING_COST_POTENTIAL":461.0,"CORE_HOT_WATER_COST_CURRENT":94.0,"CORE_HOT_WATER_COST_POTENTIAL":66.0,"CORE_TOTAL_FLOOR_AREA":82.0,"CORE_MULTI_GLAZE_PROPORTION":100.0,"CORE_EXTENSION_COUNT":0.0,"CORE_NUMBER_HABITABLE_ROOMS":5.0,"CORE_NUMBER_HEATED_ROOMS":5.0,"CORE_LOW_ENERGY_LIGHTING":100.0,"CORE_NUMBER_OPEN_FIREPLACES":0.0})

In [None]:
class MultiAgent(NaiveAgent):
    
    def __init__(self, agent_list,max_agents = 5, model='gpt-5-nano'):
        super().__init__(model) # Initialising the parent class. 
        self.memory = []
        self.max_agents = max_agents
        self.agent_list = agent_list
        self.analyst_sys_prompt = self.agent_list['AnalystAgent']
        self.predictor_sys_prompt = self.agent_list['PredictionAgent']
        self.rag_errors = 0
        
        #Initialising RAG database in-memory - descroped as SQL prompts were highly erroneous. 
        #self.conn = duckdb.connect(':memory:')
        #self.conn.execute(f"CREATE TABLE masterRAG AS SELECT * FROM '{'data_output/RAGdb/RAGdb.parquet'}'")
        #load_count = self.conn.execute("SELECT COUNT(*) FROM X_train").fetchone()[0]
        #print(f'Loaded {load_count} records into memory.')
        
        if len(agent_list) > self.max_agents: # Validation to prevent high costs
            raise ValueError(f"You have more than {self.max_agents} agents, please reduce n_agents or increase max_agents.")
        
    def initialise_Analyst(self, start_prompt):
        self.memory = []
        self.memory.append(start_prompt)
        
        # Formatting the start prompt as a str (JSON file read in)
        if isinstance(start_prompt, (dict, pd.Series)):
            prompt_text = "\n".join([f'{k}:{v}' for k, v in start_prompt.items()])
        else: 
            prompt_text = str(start_prompt)
            
        self.analyst_response = self.client.responses.create(
        model = self.model,
        input = [
            {
                "role":"system",
                "content":self.analyst_sys_prompt
            },
            {
                "role":"user",
                "content": prompt_text
            }
        ]
        )
        #self.memory.append(self.analyst_response.output_text)
        #augmentation = self.rag(sql= self.analyst_response.output_text)
        
        self.memory.append(
                f'Data To Support Predicton:\n{self.analyst_response.output_text}'
            )
        
        """ 
        # Removed due to poor quality SQL scripts from AnalystAgent.
        def rag(self, sql):
        print(f'Starting RAG with the following SQL Query: \n{self.analyst_response.output_text}')
        try:
            results = self.conn.execute(sql).fetch_df()
            json_results = results.to_json(orient='records', indent=2)
            return json_results
            
        except:
            self.rag_errors += 1
            print('Something went wrong generating the SQL RAG Output. RAG Failed for this records.')
            return 'No augmentation data available.'
        """

    def initialise_Predictor(self):
        
        context = '\n'.join([str(item) for item in self.memory])
        self.prediction_context = context
        
        predictor_prompt = f'Context From Previous Analysis:\n{context}\nMake your prediction based on this data.'
        
        predictor_response = self.client.responses.create(
            model = self.model,
            input = [
                {
                    "role":"system",
                    "content":self.predictor_sys_prompt
                },
                {
                    "role":"user",
                    "content": predictor_prompt
                }
            ],
            text = {
                "format":{
                    "type":"json_schema",
                    "name":"agent_output",
                    "strict":True,
                    "schema": {
                        "type":"object",
                        "properties":{
                                        "price":{"type":"number"}
                                    },
                                    "required":["price"],
                                    "additionalProperties": False
                                }
                            }
                        }
        )
        self.batch_output = json.loads(predictor_response.output_text)
        self.memory.append(self.batch_output)
        #print(f'Prediction: {self.batch_output}') #Used for testing purpose
        #print(f'Context used:{predictor_prompt} ') #Used for testing purpose
        
    def main(self,start_prompt):
        self.initialise_Analyst(start_prompt=start_prompt)
        self.initialise_Predictor()
    
    def batch_predict_advanced(self, file_name, output_file='preds.jsonl'):
        
        data = self.JSON_read(file_name)
        batch_results = []
        
        for i, record in enumerate(tqdm(data, desc="Processing Test Data Predictions")):
            self.main(start_prompt=record)
        
            if self.batch_output:
                result = {
                    'predicted_price': self.batch_output['price'],
                    'pred_id':i,
                    'context':self.prediction_context
                }
                batch_results.append(result)
        
        # Saving results as JSONL
        
        output_dir = Path('data_output/advanced_agentic_preds/') 
        start = Path(file_name).stem #before the .jsonl
        end = Path(file_name).suffix
        output_file_name = f'{start}_preds{end}'
        output_path = output_dir / output_file_name
        
        with output_path.open('w', encoding='utf-8') as f:
            for result in batch_results:
                f.write(json.dumps(result) + '\n')
        
        print(f'Saved {len(batch_results)} predictions at {str(output_path)}')


In [None]:
start_time = time.time()

agent_advanced = MultiAgent(agent_list=agent_list)
chunk1_response = agent_advanced.batch_predict_advanced(file_name='X_test_part_0.jsonl')

end_time = time.time()
duration = end_time - start_time
print(f'Predictions were completed in {(duration/60):.2f} minutes.')

Processing Test Data Predictions: 100%|██████████| 3/3 [01:18<00:00, 26.16s/it]

Saved 3 predictions at data_output/advanced_agentic_preds/test_small_preds.jsonl
Predictions were completed in 1.31 minutes.



